diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fc0d4a49aa60f1ea8382ddcb963dcbf97736b42..d037dec9a0b8e395a8a3d8313cec7f72fa30dd04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ SPDX-License-Identifier: LicenseRef-GPL-2.0-only-with-DUNE-exception # Master (will become release 2.10) +- `UMFPACK` is extended to arbitrary blocked matrix structures. This includes `MultiTypeBlockMatrix`. The external interface is unchanged. + However, internally the `BCCSMatrixInitializer` is replaced by direct calls of `flatMatrixForEach` similar to `Cholmod`. This requires a + compatible vector field of "ignored" degrees of freedom. The method `setSubMatrix` with a top-level index set is preserved for compatibility. + - The internal storage in `MatrixIndexSet` was changed from `std::set` to a sorted `std::vector` (with a `std::set` fallback for very dense rows) to improve performance. The stored index type was changed from `std::size_t` to `uint32_t` to reduce memory consumption and improve diff --git a/dune/istl/test/umfpacktest.cc b/dune/istl/test/umfpacktest.cc index 334107cfac40ddeed9263f074cd52896979733fe..dcfadbc716970634ca9eef5eb15da2248ebabddc 100644 --- a/dune/istl/test/umfpacktest.cc +++ b/dune/istl/test/umfpacktest.cc @@ -8,23 +8,47 @@ #include <dune/common/fmatrix.hh> #include <dune/common/fvector.hh> +#include <dune/common/indices.hh> #include <dune/common/timer.hh> +#include <dune/common/test/testsuite.hh> #include <dune/istl/bvector.hh> #include <dune/istl/umfpack.hh> #include "laplacian.hh" +using namespace Dune; -template<typename Matrix, typename Vector> -void runUMFPack(std::size_t N) +// helper to add something to the diagonal of a BCRSMatrix with blocks +auto addToDiagonal = [](auto&& matrix, const auto& value) { + for( auto rowIt=matrix.begin(); rowIt!=matrix.end(); rowIt++ ) + { + for( auto entryIt=rowIt->begin(); entryIt!=rowIt->end(); entryIt++ ) + { + if ( entryIt.index() == rowIt.index() ) + { + auto&& block = Dune::Impl::asMatrix(*entryIt); + for (auto it=block.begin(); it!=block.end(); ++it) + { + (*it)[it.index()] += value; + } + } + } + } +}; + +template<typename Matrix, typename Vector, typename BitVector> +TestSuite runUMFPack(std::size_t N) { - typedef Dune::MatrixAdapter<Matrix,Vector,Vector> Operator; + TestSuite t; Matrix mat; - Operator fop(mat); Vector b(N*N), x(N*N), b1(N/2), x1(N/2); setupLaplacian(mat,N); + + // make this regular + addToDiagonal(mat,100.0); + b=1; b1=1; x=0; @@ -41,26 +65,170 @@ void runUMFPack(std::size_t N) solver.apply(x, b, res); solver.free(); + // test + auto norm = b.two_norm(); + mat.mmv(x,b); + t.check( b.two_norm()/norm < 1e-11 ) << " Error in UMFPACK, relative residual is too large: " << b.two_norm()/norm; + Dune::UMFPack<Matrix> solver1; std::set<std::size_t> mrs; + BitVector bitVector(N*N); + bitVector = true; for(std::size_t s=0; s < N/2; ++s) + { mrs.insert(s); + bitVector[s] = false; + } + + solver1.setMatrix(mat,bitVector); + solver1.apply(x1,b1, res); + // test + x=0; + b=0; + for( std::size_t i=0; i<N/2; i++ ) + { + // set subindices + x[i] = x1[i]; + b[i] = b1[i]; + } + + norm = b.two_norm(); + mat.mmv(x,b); + + // truncate deactivated indices + for( std::size_t i=N/2; i<N*N; i++ ) + b[i] = 0; + + t.check( b.two_norm()/norm < 1e-11 ) << " Error in UMFPACK, relative residual is too large: " << b.two_norm()/norm; + + // compare with setSubMatrix solver1.setSubMatrix(mat,mrs); solver1.setVerbosity(true); - solver1.apply(x1,b1, res); + auto x2 = x1; + x2 = 0; + solver1.apply(x2,b1, res); + + x2 -= x1; + t.check( x2.two_norm()/x1.two_norm() < 1e-11 ) << " Error in UMFPACK, setSubMatrix yields different result as setMatrix with BitVector, relative diff: " << x2.two_norm()/x1.two_norm(); + + solver1.apply(reinterpret_cast<typename Matrix::field_type*>(&x1[0]), reinterpret_cast<typename Matrix::field_type*>(&b1[0])); Dune::UMFPack<Matrix> save_solver(mat,"umfpack_decomp",0); Dune::UMFPack<Matrix> load_solver(mat,"umfpack_decomp",0); + + return t; +} + + + +template<typename Matrix, typename Vector, typename BitVector> +TestSuite runUMFPackMultitype2x2(std::size_t N) +{ + TestSuite t; + + using namespace Dune::Indices; + + Matrix mat; + + Vector b,x,b1,x1; + + b[_0].resize(N*N); + b[_1].resize(N*N); + + x[_0].resize(N*N); + x[_1].resize(N*N); + + b1[_0].resize(N/2); + b1[_1].resize(N/2); + x1[_0].resize(N/2); + x1[_1].resize(N/2); + + + // initialize all 4 matrix blocks + setupLaplacian(mat[_0][_0],N); + setupLaplacian(mat[_0][_1],N); + setupLaplacian(mat[_1][_0],N); + setupLaplacian(mat[_1][_1],N); + + // make this regular + addToDiagonal(mat[_0][_0],200.0); + addToDiagonal(mat[_1][_1],100.0); + + b=1.0; + b1=1.0; + x=0.0; + x1=0.0; + + Dune::Timer watch; + + watch.reset(); + + Dune::UMFPack<Matrix> solver(mat,1); + + Dune::InverseOperatorResult res; + + solver.apply(x, b, res); + solver.free(); + + // test + auto norm = b.two_norm(); + mat.mmv(x,b); + t.check( b.two_norm()/norm < 1e-11 ) << " Error in UMFPACK, relative residual is too large: " << b.two_norm()/norm; + + Dune::UMFPack<Matrix> solver1; + + BitVector bitVector; + bitVector[_0].resize(N*N); + bitVector[_1].resize(N*N); + + bitVector = true; + for(std::size_t s=0; s < N/2; ++s) + { + bitVector[_0][s] = false; + bitVector[_1][s] = false; + } + + solver1.setMatrix(mat,bitVector); + solver1.apply(x1,b1, res); + + // test + x=0; + b=0; + for( std::size_t i=0; i<N/2; i++ ) + { + // set subindices + x[_0][i] = x1[_0][i]; + x[_1][i] = x1[_1][i]; + b[_0][i] = b1[_0][i]; + b[_1][i] = b1[_1][i]; + } + + norm = b.two_norm(); + mat.mmv(x,b); + + // truncate deactivated indices + for( std::size_t i=N/2; i<N*N; i++ ) + { + b[_0][i] = 0; + b[_1][i] = 0; + } + + t.check( b.two_norm()/norm < 1e-11 ) << " Error in UMFPACK, relative residual is too large: " << b.two_norm()/norm; + + return t; } + int main(int argc, char** argv) try { #if HAVE_SUITESPARSE_UMFPACK + + TestSuite t; std::size_t N=100; if(argc>1) @@ -69,28 +237,49 @@ int main(int argc, char** argv) try // ------------------------------------------------------------------------------ std::cout<<"testing for N="<<N<<" BCRSMatrix<double>"<<std::endl; { - using Matrix = Dune::BCRSMatrix<double>; - using Vector = Dune::BlockVector<double>; - runUMFPack<Matrix,Vector>(N); + using Matrix = Dune::BCRSMatrix<double>; + using Vector = Dune::BlockVector<double>; + using BitVector = Dune::BlockVector<int>; + t.subTest(runUMFPack<Matrix,Vector,BitVector>(N)); } // ------------------------------------------------------------------------------ std::cout<<"testing for N="<<N<<" BCRSMatrix<FieldMatrix<double,1,1> >"<<std::endl; { - using Matrix = Dune::BCRSMatrix<Dune::FieldMatrix<double,1,1> >; - using Vector = Dune::BlockVector<Dune::FieldVector<double,1> >; - runUMFPack<Matrix,Vector>(N); + using Matrix = Dune::BCRSMatrix<Dune::FieldMatrix<double,1,1> >; + using Vector = Dune::BlockVector<Dune::FieldVector<double,1> >; + using BitVector = Dune::BlockVector<Dune::FieldVector<int,1> >; + t.subTest(runUMFPack<Matrix,Vector,BitVector>(N)); } // ------------------------------------------------------------------------------ std::cout<<"testing for N="<<N<<" BCRSMatrix<FieldMatrix<double,2,2> >"<<std::endl; { - using Matrix = Dune::BCRSMatrix<Dune::FieldMatrix<double,2,2> >; - using Vector = Dune::BlockVector<Dune::FieldVector<double,2> >; - runUMFPack<Matrix,Vector>(N); + using Matrix = Dune::BCRSMatrix<Dune::FieldMatrix<double,2,2> >; + using Vector = Dune::BlockVector<Dune::FieldVector<double,2> >; + using BitVector = Dune::BlockVector<Dune::FieldVector<int,2> >; + t.subTest(runUMFPack<Matrix,Vector,BitVector>(N)); + } + + // ------------------------------------------------------------------------------ + std::cout<<"testing for N="<<N<<" MultiTypeBlockMatrix<BCRSMatrix<FieldMatrix<double,2,2>> ... >"<<std::endl; + { + using MatrixBlock = Dune::BCRSMatrix<Dune::FieldMatrix<double,2,2> >; + using BlockRow = Dune::MultiTypeBlockVector<MatrixBlock,MatrixBlock>; + using Matrix = Dune::MultiTypeBlockMatrix<BlockRow,BlockRow>; + + using VectorBlock = Dune::BlockVector<Dune::FieldVector<double,2> >; + using Vector = Dune::MultiTypeBlockVector<VectorBlock,VectorBlock>; + + using BitVectorBlock = Dune::BlockVector<Dune::FieldVector<int,2> >; + using BitVector = Dune::MultiTypeBlockVector<BitVectorBlock,BitVectorBlock>; + + t.subTest(runUMFPackMultitype2x2<Matrix,Vector,BitVector>(N)); } - return 0; + + + return t.exit(); #else // HAVE_SUITESPARSE_UMFPACK std::cerr << "You need SuiteSparse's UMFPack to run this test." << std::endl; return 77; diff --git a/dune/istl/umfpack.hh b/dune/istl/umfpack.hh index 20d72de5839b636da4b40dbf6885e34c6fdd1ec5..63fa05dcd353da99f39b8a46c61a479840e2b647 100644 --- a/dune/istl/umfpack.hh +++ b/dune/istl/umfpack.hh @@ -17,6 +17,9 @@ #include<dune/common/fvector.hh> #include<dune/istl/bccsmatrixinitializer.hh> #include<dune/istl/bcrsmatrix.hh> +#include<dune/istl/foreach.hh> +#include<dune/istl/multitypeblockmatrix.hh> +#include<dune/istl/multitypeblockvector.hh> #include<dune/istl/solvers.hh> #include<dune/istl/solvertype.hh> #include <dune/istl/solverfactory.hh> @@ -193,6 +196,32 @@ namespace Dune { /** @brief The type of the range of the solver */ using range_type = BlockVector<T, A>; }; + + // to make the `UMFPackVectorChooser` work with `MultiTypeBlockMatrix`, we need to add an intermediate step for the rows, which are typically `MultiTypeBlockVector` + template<typename FirstBlock, typename... Blocks> + struct UMFPackVectorChooser<MultiTypeBlockVector<FirstBlock, Blocks...> > + { + /** @brief The type of the domain of the solver */ + using domain_type = MultiTypeBlockVector<typename UMFPackVectorChooser<FirstBlock>::domain_type,typename UMFPackVectorChooser<Blocks>::domain_type... >; + /** @brief The type of the range of the solver */ + using range_type = typename UMFPackVectorChooser<FirstBlock>::range_type; + }; + + // specialization for `MultiTypeBlockMatrix` with `MultiTypeBlockVector` rows + template<typename FirstRow, typename... Rows> + struct UMFPackVectorChooser<MultiTypeBlockMatrix<FirstRow, Rows...> > + { + /** @brief The type of the domain of the solver */ + using domain_type = typename UMFPackVectorChooser<FirstRow>::domain_type; + /** @brief The type of the range of the solver */ + using range_type = MultiTypeBlockVector< typename UMFPackVectorChooser<FirstRow>::range_type, typename UMFPackVectorChooser<Rows>::range_type... >; + }; + + // dummy class to represent no BitVector + struct NoBitVector + {}; + + } /** @brief The %UMFPack direct sparse solver @@ -223,7 +252,7 @@ namespace Dune { /** @brief The matrix type. */ using Matrix = M; using matrix_type = M; - /** @brief The corresponding UMFPack matrix type.*/ + /** @brief The corresponding (scalar) UMFPack matrix type.*/ using UMFPackMatrix = ISTL::Impl::BCCSMatrix<typename Matrix::field_type, size_type>; /** @brief Type of an associated initializer class. */ using MatrixInitializer = ISTL::Impl::BCCSMatrixInitializer<M, size_type>; @@ -368,17 +397,37 @@ namespace Dune { if (b.size() == 0) return; + // we have to convert x and b into flat structures + // however, this is linear in time + std::vector<T> xFlat(x.dim()), bFlat(b.dim()); + + flatVectorForEach(x, [&](auto&& entry, auto&& offset) + { + xFlat[ offset ] = entry; + }); + + flatVectorForEach(b, [&](auto&& entry, auto&& offset) + { + bFlat[ offset ] = entry; + }); + double UMF_Apply_Info[UMFPACK_INFO]; Caller::solve(UMFPACK_A, umfpackMatrix_.getColStart(), umfpackMatrix_.getRowIndex(), umfpackMatrix_.getValues(), - reinterpret_cast<double*>(&x[0]), - reinterpret_cast<double*>(&b[0]), + reinterpret_cast<double*>(&xFlat[0]), + reinterpret_cast<double*>(&bFlat[0]), UMF_Numeric, UMF_Control, UMF_Apply_Info); + // copy back to blocked vector + flatVectorForEach(x, [&](auto&& entry, auto offset) + { + entry = xFlat[offset]; + }); + //this is a direct solver res.iterations = 1; res.converged = true; @@ -399,6 +448,8 @@ namespace Dune { * @brief additional apply method with c-arrays in analogy to superlu * @param x solution array * @param b rhs array + * + * NOTE If the user hands over a pure pointer, we assume that they know what they are doing, hence no copy to flat structures */ void apply(T* x, T* b) { @@ -444,8 +495,17 @@ namespace Dune { DUNE_THROW(Dune::Exception,"IO ERROR while trying to save UMFPack decomposition"); } - /** @brief Initialize data from given matrix. */ - void setMatrix(const Matrix& matrix) + /** @brief Initialize data from given matrix. + * + * \tparam BitVector a compatible bitvector to the domain_type/range_type. + * Defaults to NoBitVector for backwards compatibility + * + * A positive bit indices that the corresponding matrix row/column is excluded from the + * UMFPACK decomposition. + * WARNING This is an opposite behavior of the previous implementation in `setSubMatrix`. + */ + template<class BitVector = Impl::NoBitVector> + void setMatrix(const Matrix& matrix, const BitVector& bitVector = {}) { if ((umfpackMatrix_.N() + umfpackMatrix_.M() > 0) || matrixIsLoaded_) free(); @@ -454,16 +514,128 @@ namespace Dune { if (umfpackMatrix_.N() + umfpackMatrix_.M() + umfpackMatrix_.nonzeroes() != 0) umfpackMatrix_.free(); - umfpackMatrix_.setSize(MatrixDimension<Matrix>::rowdim(matrix), - MatrixDimension<Matrix>::coldim(matrix)); - ISTL::Impl::BCCSMatrixInitializer<Matrix, size_type> initializer(umfpackMatrix_); - copyToBCCSMatrix(initializer, matrix); + constexpr bool useBitVector = not std::is_same_v<BitVector,Impl::NoBitVector>; + + // use a dynamic flat vector for the bitset + std::vector<bool> flatBitVector; + // and a mapping from the compressed indices + std::vector<size_type> subIndices; + + int numberOfIgnoredDofs = 0; + int nonZeros = 0; + + if constexpr ( useBitVector ) + { + auto flatSize = flatVectorForEach(bitVector, [](auto&&, auto&&){}); + flatBitVector.resize(flatSize); + + flatVectorForEach(bitVector, [&](auto&& entry, auto&& offset) + { + flatBitVector[ offset ] = entry; + if ( entry ) + { + numberOfIgnoredDofs++; + } + }); + } + + // compute the flat dimension and the number of nonzeros of the matrix + auto [flatRows,flatCols] = flatMatrixForEach( matrix, [&](auto&& /*entry*/, auto&& row, auto&& col){ + // do not count ignored entries + if constexpr ( useBitVector ) + if ( flatBitVector[row] or flatBitVector[col] ) + return; + + nonZeros++; + }); + + if constexpr ( useBitVector ) + { + // use the original flatRows! + subIndices.resize(flatRows,std::numeric_limits<std::size_t>::max()); + + size_type subIndexCounter = 0; + for ( size_type i=0; i<size_type(flatRows); i++ ) + if ( not flatBitVector[ i ] ) + subIndices[ i ] = subIndexCounter++; + + // update the original matrix size + flatRows -= numberOfIgnoredDofs; + flatCols -= numberOfIgnoredDofs; + } + + + umfpackMatrix_.setSize(flatRows,flatCols); + umfpackMatrix_.Nnz_ = nonZeros; + + // prepare the arrays + umfpackMatrix_.colstart = new size_type[flatCols+1]; + umfpackMatrix_.rowindex = new size_type[nonZeros]; + umfpackMatrix_.values = new T[nonZeros]; + + for ( size_type i=0; i<size_type(flatCols+1); i++ ) + { + umfpackMatrix_.colstart[i] = 0; + } + + // at first, we need to compute the column start indices + // therefore, we count all entries in each column and in the end we accumulate everything + flatMatrixForEach(matrix, [&](auto&& /*entry*/, auto&& flatRowIndex, auto&& flatColIndex) + { + // do nothing if entry is excluded + if constexpr ( useBitVector ) + if ( flatBitVector[flatRowIndex] or flatBitVector[flatColIndex] ) + return; + + // pick compressed or uncompressed index + // compiler will hopefully do some constexpr optimization here + auto colIdx = useBitVector ? subIndices[flatColIndex] : flatColIndex; + + umfpackMatrix_.colstart[colIdx+1]++; + }); + + // now accumulate + for ( size_type i=0; i<(size_type)flatCols; i++ ) + { + umfpackMatrix_.colstart[i+1] += umfpackMatrix_.colstart[i]; + } + + // we need a compressed position counter in each column + std::vector<size_type> colPosition(flatCols,0); + + // now we can set the entries: the procedure below works with both row- or column major index ordering + flatMatrixForEach(matrix, [&](auto&& entry, auto&& flatRowIndex, auto&& flatColIndex) + { + // do nothing if entry is excluded + if constexpr ( useBitVector ) + if ( flatBitVector[flatRowIndex] or flatBitVector[flatColIndex] ) + return; + + // pick compressed or uncompressed index + // compiler will hopefully do some constexpr optimization here + auto rowIdx = useBitVector ? subIndices[flatRowIndex] : flatRowIndex; + auto colIdx = useBitVector ? subIndices[flatColIndex] : flatColIndex; + + // the start index of each column is already fixed + auto colStart = umfpackMatrix_.colstart[colIdx]; + // get the current number of picked elements in this column + auto colPos = colPosition[colIdx]; + // assign the corresponding row index and the value of this element + umfpackMatrix_.rowindex[ colStart + colPos ] = rowIdx; + umfpackMatrix_.values[ colStart + colPos ] = entry; + // increase the number of picked elements in this column + colPosition[colIdx]++; + }); decompose(); } - template<class S> + // Keep legacy version using a set of scalar indices + // The new version using a bitVector type for marking the active matrix indices is + // directly given in `setMatrix` with an additional BitVector argument. + // The new version is more flexible and allows, e.g., marking single components of a matrix block. + template<typename S> void setSubMatrix(const Matrix& _mat, const S& rowIndexSet) { if ((umfpackMatrix_.N() + umfpackMatrix_.M() > 0) || matrixIsLoaded_)