diff --git a/dune/istl/umfpack.hh b/dune/istl/umfpack.hh index 20d72de5839b636da4b40dbf6885e34c6fdd1ec5..6f2e8771b71da24e587e328598d24470f49893a6 100644 --- a/dune/istl/umfpack.hh +++ b/dune/istl/umfpack.hh @@ -17,6 +17,7 @@ #include<dune/common/fvector.hh> #include<dune/istl/bccsmatrixinitializer.hh> #include<dune/istl/bcrsmatrix.hh> +#include<dune/istl/foreach.hh> #include<dune/istl/solvers.hh> #include<dune/istl/solvertype.hh> #include <dune/istl/solverfactory.hh> @@ -193,6 +194,13 @@ namespace Dune { /** @brief The type of the range of the solver */ using range_type = BlockVector<T, A>; }; + + + // dummy class to represent no BitVector + struct NoBitVector + {}; + + } /** @brief The %UMFPack direct sparse solver @@ -223,7 +231,7 @@ namespace Dune { /** @brief The matrix type. */ using Matrix = M; using matrix_type = M; - /** @brief The corresponding UMFPack matrix type.*/ + /** @brief The corresponding (scalar) UMFPack matrix type.*/ using UMFPackMatrix = ISTL::Impl::BCCSMatrix<typename Matrix::field_type, size_type>; /** @brief Type of an associated initializer class. */ using MatrixInitializer = ISTL::Impl::BCCSMatrixInitializer<M, size_type>; @@ -368,17 +376,37 @@ namespace Dune { if (b.size() == 0) return; + // we have to convert x and b into flat structures + // however, this is linear in time + std::vector<T> xFlat(x.dim()), bFlat(b.dim()); + + flatVectorForEach(x, [&](auto&& entry, auto&& offset) + { + xFlat[ offset ] = entry; + }); + + flatVectorForEach(b, [&](auto&& entry, auto&& offset) + { + bFlat[ offset ] = entry; + }); + double UMF_Apply_Info[UMFPACK_INFO]; Caller::solve(UMFPACK_A, umfpackMatrix_.getColStart(), umfpackMatrix_.getRowIndex(), umfpackMatrix_.getValues(), - reinterpret_cast<double*>(&x[0]), - reinterpret_cast<double*>(&b[0]), + reinterpret_cast<double*>(&xFlat[0]), + reinterpret_cast<double*>(&bFlat[0]), UMF_Numeric, UMF_Control, UMF_Apply_Info); + // copy back to blocked vector + flatVectorForEach(x, [&](auto&& entry, auto offset) + { + entry = xFlat[offset]; + }); + //this is a direct solver res.iterations = 1; res.converged = true; @@ -399,6 +427,8 @@ namespace Dune { * @brief additional apply method with c-arrays in analogy to superlu * @param x solution array * @param b rhs array + * + * NOTE If the user hands over a pure pointer, we assume that they know what they are doing, hence no copy to flat structures */ void apply(T* x, T* b) { @@ -444,8 +474,17 @@ namespace Dune { DUNE_THROW(Dune::Exception,"IO ERROR while trying to save UMFPack decomposition"); } - /** @brief Initialize data from given matrix. */ - void setMatrix(const Matrix& matrix) + /** @brief Initialize data from given matrix. + * + * \tparam BitVector a compatible bitvector to the domain_type/range_type. + * Defaults to NoBitVector for backwards compatibility + * + * A positive bit indices that the corresponding matrix row/column is excluded from the + * UMFPACK decomposition. + * WARNING This is an opposite behavior of the previous implementation in `setSubMatrix`. + */ + template<class BitVector = Impl::NoBitVector> + void setMatrix(const Matrix& matrix, const BitVector& bitVector = {}) { if ((umfpackMatrix_.N() + umfpackMatrix_.M() > 0) || matrixIsLoaded_) free(); @@ -454,16 +493,128 @@ namespace Dune { if (umfpackMatrix_.N() + umfpackMatrix_.M() + umfpackMatrix_.nonzeroes() != 0) umfpackMatrix_.free(); - umfpackMatrix_.setSize(MatrixDimension<Matrix>::rowdim(matrix), - MatrixDimension<Matrix>::coldim(matrix)); - ISTL::Impl::BCCSMatrixInitializer<Matrix, size_type> initializer(umfpackMatrix_); - copyToBCCSMatrix(initializer, matrix); + constexpr bool useBitVector = not std::is_same_v<BitVector,Impl::NoBitVector>; + + // use a dynamic flat vector for the bitset + std::vector<bool> flatBitVector; + // and a mapping from the compressed indices + std::vector<size_type> subIndices; + + int numberOfIgnoredDofs = 0; + int nonZeros = 0; + + if constexpr ( useBitVector ) + { + auto flatSize = flatVectorForEach(bitVector, [](auto&&, auto&&){}); + flatBitVector.resize(flatSize); + + flatVectorForEach(bitVector, [&](auto&& entry, auto&& offset) + { + flatBitVector[ offset ] = entry; + if ( entry ) + { + numberOfIgnoredDofs++; + } + }); + } + + // compute the flat dimension and the number of nonzeros of the matrix + auto [flatRows,flatCols] = flatMatrixForEach( matrix, [&](auto&& /*entry*/, auto&& row, auto&& col){ + // do not count ignored entries + if constexpr ( useBitVector ) + if ( flatBitVector[row] or flatBitVector[col] ) + return; + + nonZeros++; + }); + + if constexpr ( useBitVector ) + { + // use the original flatRows! + subIndices.resize(flatRows,std::numeric_limits<std::size_t>::max()); + + size_type subIndexCounter = 0; + for ( size_type i=0; i<size_type(flatRows); i++ ) + if ( not flatBitVector[ i ] ) + subIndices[ i ] = subIndexCounter++; + + // update the original matrix size + flatRows -= numberOfIgnoredDofs; + flatCols -= numberOfIgnoredDofs; + } + + + umfpackMatrix_.setSize(flatRows,flatCols); + umfpackMatrix_.Nnz_ = nonZeros; + + // prepare the arrays + umfpackMatrix_.colstart = new size_type[flatCols+1]; + umfpackMatrix_.rowindex = new size_type[nonZeros]; + umfpackMatrix_.values = new T[nonZeros]; + + for ( size_type i=0; i<size_type(flatCols+1); i++ ) + { + umfpackMatrix_.colstart[i] = 0; + } + + // at first, we need to compute the column start indices + // therefore, we count all entries in each column and in the end we accumulate everything + flatMatrixForEach(matrix, [&](auto&& /*entry*/, auto&& flatRowIndex, auto&& flatColIndex) + { + // do nothing if entry is excluded + if constexpr ( useBitVector ) + if ( flatBitVector[flatRowIndex] or flatBitVector[flatColIndex] ) + return; + + // pick compressed or uncompressed index + // compiler will hopefully do some constexpr optimization here + auto colIdx = useBitVector ? subIndices[flatColIndex] : flatColIndex; + + umfpackMatrix_.colstart[colIdx+1]++; + }); + + // now accumulate + for ( size_type i=0; i<(size_type)flatCols; i++ ) + { + umfpackMatrix_.colstart[i+1] += umfpackMatrix_.colstart[i]; + } + + // we need a compressed position counter in each column + std::vector<size_type> colPosition(flatCols,0); + + // now we can set the entries: the procedure below works with both row- or column major index ordering + flatMatrixForEach(matrix, [&](auto&& entry, auto&& flatRowIndex, auto&& flatColIndex) + { + // do nothing if entry is excluded + if constexpr ( useBitVector ) + if ( flatBitVector[flatRowIndex] or flatBitVector[flatColIndex] ) + return; + + // pick compressed or uncompressed index + // compiler will hopefully do some constexpr optimization here + auto rowIdx = useBitVector ? subIndices[flatRowIndex] : flatRowIndex; + auto colIdx = useBitVector ? subIndices[flatColIndex] : flatColIndex; + + // the start index of each column is already fixed + auto colStart = umfpackMatrix_.colstart[colIdx]; + // get the current number of picked elements in this column + auto colPos = colPosition[colIdx]; + // assign the corresponding row index and the value of this element + umfpackMatrix_.rowindex[ colStart + colPos ] = rowIdx; + umfpackMatrix_.values[ colStart + colPos ] = entry; + // increase the number of picked elements in this column + colPosition[colIdx]++; + }); decompose(); } - template<class S> + // Keep legacy version using a set of scalar indices + // The new version using a bitVector type for marking the active matrix indices is + // directly given in `setMatrix` with an additional BitVector argument. + // The new version is more flexible and allows, e.g., marking single components of a matrix block. + template<typename S> void setSubMatrix(const Matrix& _mat, const S& rowIndexSet) { if ((umfpackMatrix_.N() + umfpackMatrix_.M() > 0) || matrixIsLoaded_)