diff --git a/CHANGELOG.md b/CHANGELOG.md index d037dec9a0b8e395a8a3d8313cec7f72fa30dd04..3871a3fc5b8af536ba8182544adc968a40fa9566 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,14 @@ SPDX-License-Identifier: LicenseRef-GPL-2.0-only-with-DUNE-exception performance. Hence, `MatrixIndexSet` can no longer be used for very large matrices with more than 2^32 columns. +- Added flag 'useFixedOrder' to the coarsen method of AMGs ParallelIndicesCoarsener. + If set to true, during the creation of the coarser matrix (by accumulation and restriction + to fewer processes) the coarse matrix rows are ordered in a fixed manner, making parallel runs + reproducible; the runtime is possibly not ideal though. + If set to false (which is the default), the row order depends on the order of messages received from the + processes responsible for the respective parts of the finer grid. Then the indices on the coarser grid + may differ from run to run. + ## Deprecations and removals - The deprecated CMake variables `SUPERLU_INCLUDE_DIRS`, `SUPERLU_LIBRARIES`, diff --git a/dune/istl/paamg/indicescoarsener.hh b/dune/istl/paamg/indicescoarsener.hh index 4f9a6f9ef5c075395a050824c7c739de1d1e63da..b24592b3bd49cab779af73fa3c5c3b6af7290c62 100644 --- a/dune/istl/paamg/indicescoarsener.hh +++ b/dune/istl/paamg/indicescoarsener.hh @@ -83,6 +83,10 @@ namespace Dune * @param aggregates The mapping of unknowns onto aggregates. * @param coarseInfo The information about the parallel data decomposition * on the coarse level. + * @param noAggregates + * @param useFixedOrder Flag indicating if creating indices for the coarser level + * should be done in a fixed order, i.e., the order in which the rows were sent. + * If set to true, this makes the runs reproducible but it might slow down performance. * @return The number of unknowns on the coarse level. */ template<typename Graph, typename VM> @@ -92,7 +96,8 @@ namespace Dune VM& visitedMap, AggregatesMap<typename Graph::VertexDescriptor>& aggregates, ParallelInformation& coarseInfo, - typename Graph::VertexDescriptor noAggregates); + typename Graph::VertexDescriptor noAggregates, + bool useFixedOrder = false); private: template<typename G, typename I> @@ -187,7 +192,8 @@ namespace Dune const AggregatesMap<typename Graph::VertexDescriptor>& aggregates, ParallelIndexSet& coarseIndices, RemoteIndices& coarseRemote, - ParallelAggregateRenumberer<Graph,I>& renumberer); + ParallelAggregateRenumberer<Graph,I>& renumberer, + bool useFixedOrder); }; @@ -219,7 +225,8 @@ namespace Dune VM& visitedMap, AggregatesMap<typename Graph::VertexDescriptor>& aggregates, SequentialInformation& coarseInfo, - typename Graph::VertexDescriptor noAggregates); + typename Graph::VertexDescriptor noAggregates, + bool useFixedOrder = false); }; #if HAVE_MPI @@ -231,13 +238,14 @@ namespace Dune VM& visitedMap, AggregatesMap<typename Graph::VertexDescriptor>& aggregates, ParallelInformation& coarseInfo, - [[maybe_unused]] typename Graph::VertexDescriptor noAggregates) + [[maybe_unused]] typename Graph::VertexDescriptor noAggregates, + bool useFixedOrder) { ParallelAggregateRenumberer<Graph,typename ParallelInformation::GlobalLookupIndexSet> renumberer(aggregates, fineInfo.globalLookup()); buildCoarseIndexSet(fineInfo, fineGraph, visitedMap, aggregates, coarseInfo.indexSet(), renumberer); buildCoarseRemoteIndices(fineInfo.remoteIndices(), aggregates, coarseInfo.indexSet(), - coarseInfo.remoteIndices(), renumberer); + coarseInfo.remoteIndices(), renumberer, useFixedOrder); return renumberer; } @@ -318,7 +326,8 @@ namespace Dune const AggregatesMap<typename Graph::VertexDescriptor>& aggregates, ParallelIndexSet& coarseIndices, RemoteIndices& coarseRemote, - ParallelAggregateRenumberer<Graph,I>& renumberer) + ParallelAggregateRenumberer<Graph,I>& renumberer, + bool useFixedOrder) { std::vector<char> attributes(static_cast<std::size_t>(renumberer)); @@ -375,7 +384,7 @@ namespace Dune // sync the index set and the remote indices to recompute missing // indices IndicesSyncer<ParallelIndexSet> syncer(coarseIndices, coarseRemote); - syncer.sync(renumberer); + syncer.sync(renumberer, useFixedOrder); } @@ -390,7 +399,8 @@ namespace Dune [[maybe_unused]] VM& visitedMap, [[maybe_unused]] AggregatesMap<typename Graph::VertexDescriptor>& aggregates, [[maybe_unused]] SequentialInformation& coarseInfo, - [[maybe_unused]] typename Graph::VertexDescriptor noAggregates) + [[maybe_unused]] typename Graph::VertexDescriptor noAggregates, + [[maybe_unused]] bool useFixedOrder) { return noAggregates; } diff --git a/dune/istl/paamg/matrixhierarchy.hh b/dune/istl/paamg/matrixhierarchy.hh index acba3021baa160c8ad0b6968ae67a1579ad126b3..ec21cf86ca0e941339936fc47aabd1e30609fbab 100644 --- a/dune/istl/paamg/matrixhierarchy.hh +++ b/dune/istl/paamg/matrixhierarchy.hh @@ -297,10 +297,13 @@ namespace Dune * coarsening will stop (default: 1.2) * @param prolongDamp The damping factor to apply to the prolongated update (default: 1.6) * @param accumulate Whether to accumulate the data onto fewer processors on coarser levels. + * @param useFixedOrder Flag indicating if creating indices for the coarser level + * should be done in a fixed order, i.e., the order in which the rows were sent. + * If set to true, this makes the runs reproducible but it might slow down performance. */ CoarsenCriterion(int maxLevel=100, int coarsenTarget=1000, double minCoarsenRate=1.2, - double prolongDamp=1.6, AccumulationMode accumulate=successiveAccu) - : AggregationCriterion(Dune::Amg::Parameters(maxLevel, coarsenTarget, minCoarsenRate, prolongDamp, accumulate)) + double prolongDamp=1.6, AccumulationMode accumulate=successiveAccu, bool useFixedOrder = false) + : AggregationCriterion(Dune::Amg::Parameters(maxLevel, coarsenTarget, minCoarsenRate, prolongDamp, accumulate, useFixedOrder)) {} CoarsenCriterion(const Dune::Amg::Parameters& parms) @@ -610,7 +613,8 @@ namespace Dune visitedMap, *aggregatesMap, *infoLevel, - noAggregates); + noAggregates, + criterion.useFixedOrder()); GraphCreator::free(graphs); if(criterion.debugLevel()>2) { diff --git a/dune/istl/paamg/parameters.hh b/dune/istl/paamg/parameters.hh index 84262fc53e5b0f22b525a815d4fdb2f147c0cb95..8552904c10c1e4ae2ba2a85e074d79107d89e3c5 100644 --- a/dune/istl/paamg/parameters.hh +++ b/dune/istl/paamg/parameters.hh @@ -314,7 +314,7 @@ namespace Dune return accumulate_; } /** - * @brief Set whether he data should be accumulated on fewer processes on coarser levels. + * @brief Set whether the data should be accumulated on fewer processes on coarser levels. */ void setAccumulate(AccumulationMode accu) { @@ -324,6 +324,20 @@ namespace Dune void setAccumulate(bool accu){ accumulate_=accu ? successiveAccu : noAccu; } + + /** + * @brief Check if the indices for the coarser levels should be created in a fixed order. + */ + bool useFixedOrder() const + { + return useFixedOrder_; + } + + void setUseFixedOrder(bool useFixedOrder) + { + useFixedOrder_ = useFixedOrder; + } + /** * @brief Set the damping factor for the prolongation. * @@ -352,11 +366,15 @@ namespace Dune * coarsening will stop (default: 1.2) * @param prolongDamp The damping factor to apply to the prolongated update (default: 1.6) * @param accumulate Whether to accumulate the data onto fewer processors on coarser levels. + * @param useFixedOrder Flag indicating if creating indices for the coarser level + * should be done in a fixed order, i.e., the order in which the rows were sent. + * If set to true, this makes the runs reproducible but it might slow down performance. */ CoarseningParameters(int maxLevel=100, int coarsenTarget=1000, double minCoarsenRate=1.2, - double prolongDamp=1.6, AccumulationMode accumulate=successiveAccu) + double prolongDamp=1.6, AccumulationMode accumulate=successiveAccu, + bool useFixedOrder = false) : maxLevel_(maxLevel), coarsenTarget_(coarsenTarget), minCoarsenRate_(minCoarsenRate), - dampingFactor_(prolongDamp), accumulate_( accumulate) + dampingFactor_(prolongDamp), accumulate_( accumulate), useFixedOrder_(useFixedOrder) {} private: @@ -381,6 +399,12 @@ namespace Dune * coarser levels. */ AccumulationMode accumulate_; + /** + * @brief useFixedOrder Flag indicating if creating indices for the coarser + * level should be done in a fixed order. If set to true, this makes the runs reproducible + * but it might slow down performance. + */ + bool useFixedOrder_; }; /** @@ -491,8 +515,8 @@ namespace Dune * @param accumulate Whether to accumulate the data onto fewer processors on coarser levels. */ Parameters(int maxLevel=100, int coarsenTarget=1000, double minCoarsenRate=1.2, - double prolongDamp=1.6, AccumulationMode accumulate=successiveAccu) - : CoarseningParameters(maxLevel, coarsenTarget, minCoarsenRate, prolongDamp, accumulate) + double prolongDamp=1.6, AccumulationMode accumulate=successiveAccu, bool useFixedOrder = false) + : CoarseningParameters(maxLevel, coarsenTarget, minCoarsenRate, prolongDamp, accumulate, useFixedOrder) , debugLevel_(2), preSmoothSteps_(2), postSmoothSteps_(2), gamma_(1), additive_(false) {} diff --git a/dune/istl/paamg/test/CMakeLists.txt b/dune/istl/paamg/test/CMakeLists.txt index 43bc9bc3c0442c0362f76c1aa504d7dd91aad97b..1b71b0fc793e2069f913ca42e6e511da555f639a 100644 --- a/dune/istl/paamg/test/CMakeLists.txt +++ b/dune/istl/paamg/test/CMakeLists.txt @@ -92,6 +92,11 @@ dune_add_test(SOURCES hierarchytest.cc TIMEOUT 600 CMAKE_GUARD MPI_FOUND) +dune_add_test(SOURCES coarsentest.cc + MPI_RANKS 1 2 4 + TIMEOUT 600 + CMAKE_GUARD MPI_FOUND) + dune_add_test(NAME pamgtest SOURCES parallelamgtest.cc MPI_RANKS 1 2 4 diff --git a/dune/istl/paamg/test/coarsentest.cc b/dune/istl/paamg/test/coarsentest.cc new file mode 100644 index 0000000000000000000000000000000000000000..9f33fd81fa625c89af60eddbdb4dcb0878b58565 --- /dev/null +++ b/dune/istl/paamg/test/coarsentest.cc @@ -0,0 +1,115 @@ +// SPDX-FileCopyrightText: Copyright © DUNE Project contributors, see file LICENSE.md in module root +// SPDX-License-Identifier: LicenseRef-GPL-2.0-only-with-DUNE-exception +// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- +// vi: set et ts=4 sw=2 sts=2: +#include <config.h> + +#include <dune/common/parallel/mpihelper.hh> +#include <dune/istl/paamg/matrixhierarchy.hh> +#include <dune/istl/owneroverlapcopy.hh> +#include <dune/istl/schwarz.hh> +#include "anisotropic.hh" + +constexpr int blockSize = 10; + +using Communication = Dune::OwnerOverlapCopyCommunication<int,int>; +using ParallelIndexSet = Communication::ParallelIndexSet; +using MatrixBlock = Dune::FieldMatrix<double,blockSize,blockSize>; +using BCRSMat = Dune::BCRSMatrix<MatrixBlock>; +using VectorBlock = Dune::FieldVector<double,blockSize>; +using Vector = Dune::BlockVector<VectorBlock>; + +/** + * Check if the matrices A and B are the same. + * + * @param BCRSMat A + * @param BCRSMat B + * @return bool - True if A and B are the same, false if they are not. + */ +bool areSame(BCRSMat A, BCRSMat B) +{ + for (auto rowIt = A.begin(); rowIt != A.end(); ++rowIt) + for (auto colIt = rowIt->begin(); colIt != rowIt->end(); ++colIt) + for (size_t k = 0; k < blockSize; k++) + for (size_t l = 0; l < blockSize; l++) { + auto entryA = (*colIt)[k][l]; + try { + auto entryB = B[rowIt.index()][colIt.index()][k][l]; + } catch (Dune::ISTLError e) { + return false; // If the entry B[rowIt.index()][colIt.index()][k][l] does not exist, then the matrices are not the same. + } + if (std::abs((*colIt)[k][l]) > 1e-7 and std::abs(B[rowIt.index()][colIt.index()][k][l]) > 1e-7 + and std::abs((*colIt)[k][l] - B[rowIt.index()][colIt.index()][k][l])/(*colIt)[k][l] > 1e-7) { + return false; // Check if the entries are approximately the same. + } + } + return true; +} + +/** + * Builds two AMG matrix hierarchies and checks if they are equal. + * When setting up such matrix hierarchies on multiple processes, they get created in a non-deterministic way and the two matrix hierarchies might differ. + * + * @param int N Total number of unknowns + * @param int rank Rank of current process + * @param int noProcesses Number of all processes + * @return int - 0 if the hierarchies are the same, 1 if they are not. + */ +int testHierarchy(int N, int rank, int noProcesses) +{ + int n; + Communication pinfo(Dune::MPIHelper::getCommunicator()); + ParallelIndexSet& indices = pinfo.indexSet(); + + using RemoteIndices = Dune::RemoteIndices<ParallelIndexSet>; + RemoteIndices& remoteIndices = pinfo.remoteIndices(); + + BCRSMat mat = setupAnisotropic2d<MatrixBlock>(N, indices, Dune::MPIHelper::getCommunication(), &n); + + remoteIndices.rebuild<false>(); + + using OverlapFlags = Dune::NegateSet<Communication::OwnerSet>; + using Operator = Dune::OverlappingSchwarzOperator<BCRSMat,Vector,Vector,Communication>; + using Hierarchy = Dune::Amg::MatrixHierarchy<Operator,Communication>; + + Operator op(mat, pinfo); + Hierarchy hierarchyA( + Dune::stackobject_to_shared_ptr(op), + Dune::stackobject_to_shared_ptr(pinfo)); + + Hierarchy hierarchyB( + Dune::stackobject_to_shared_ptr(op), + Dune::stackobject_to_shared_ptr(pinfo)); + + using Criterion = Dune::Amg::CoarsenCriterion<Dune::Amg::SymmetricCriterion<BCRSMat,Dune::Amg::FirstDiagonal> >; + + Criterion criterionA(1000,10,1.2,1.6,Dune::Amg::AccumulationMode::successiveAccu,true); + Criterion criterionB(1000,10,1.2,1.6,Dune::Amg::AccumulationMode::successiveAccu,true); + + hierarchyA.template build<OverlapFlags>(criterionA); + hierarchyB.template build<OverlapFlags>(criterionB); + + MPI_Barrier(MPI_COMM_WORLD); + //Check if the second finest matrices are the same + auto finestMatrixA = hierarchyA.matrices().finest(); + finestMatrixA++; + auto finestMatrixB = hierarchyB.matrices().finest(); + finestMatrixB++; + if (!areSame(finestMatrixA->getmat(), finestMatrixB->getmat())) { + std::cerr << "During run with " << noProcesses << " processes: The matrix hierarchy on rank " << rank << " was created in a non-deterministic way." << std::endl; + return 1; + } + return 0; +} + + +int main(int argc, char** argv) +{ + Dune::MPIHelper& mpiHelper = Dune::MPIHelper::instance(argc, argv); + int N=10; + + if(argc>1) + N = atoi(argv[1]); + + return testHierarchy(N, mpiHelper.rank(), mpiHelper.size()); +}