From d44c57a8964cd9793bf9fb81ea883e217cf13339 Mon Sep 17 00:00:00 2001 From: Christian Engwer <christi@dune-project.org> Date: Wed, 24 Mar 2010 18:52:36 +0000 Subject: [PATCH] * implementation and test for MPIGuard This class detect a thrown exception and communicate to all other processes. [[Imported from SVN: r5943]] --- dune/common/mpiguard.hh | 202 +++++++++++++++++++++++++++++++++++ dune/common/test/Makefile.am | 49 ++++++--- dune/common/test/mpiguard.cc | 89 +++++++++++++++ 3 files changed, 328 insertions(+), 12 deletions(-) create mode 100644 dune/common/mpiguard.hh create mode 100644 dune/common/test/mpiguard.cc diff --git a/dune/common/mpiguard.hh b/dune/common/mpiguard.hh new file mode 100644 index 000000000..ebc8eac07 --- /dev/null +++ b/dune/common/mpiguard.hh @@ -0,0 +1,202 @@ +// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- +// vi: set et ts=4 sw=2 sts=2: +#ifndef DUNE_COMMON_MPIGUARD_HH +#define DUNE_COMMON_MPIGUARD_HH + +#include <dune/common/mpihelper.hh> +#include <dune/common/collectivecommunication.hh> +#include <dune/common/mpicollectivecommunication.hh> +#include <dune/common/exceptions.hh> + +namespace Dune +{ + +#ifndef DOXYGEN + namespace { + /* + Interface class for the communication needed by MPIGuard + */ + struct GuardCommunicator + { + // cleanup + virtual ~GuardCommunicator() {}; + // all the communication methods we need + virtual int rank() = 0; + virtual int size() = 0; + virtual int sum(int i) = 0; + // create a new GuardCommunicator pointer + template <class C> + static GuardCommunicator * create(const C & c); + }; + + /* + templated implementation of different communication classes + */ + // the default class will always fail, due to the missing implementation of "sum" + template <class Imp> + struct GenericGuardCommunicator + : public GuardCommunicator + {}; + // specialization for CollectiveCommunication + template <class T> + struct GenericGuardCommunicator< CollectiveCommunication<T> > + : public GuardCommunicator + { + const CollectiveCommunication<T> comm; + GenericGuardCommunicator(const CollectiveCommunication<T> & c) : + comm(c) {} + virtual int rank() { return comm.rank(); }; + virtual int size() { return comm.size(); }; + virtual int sum(int i) { return comm.sum(i); } + }; + // specialization for MPI_Comm + template <> + struct GenericGuardCommunicator<MPI_Comm> + : public GenericGuardCommunicator< CollectiveCommunication<MPI_Comm> > + { + GenericGuardCommunicator(const MPI_Comm & c) : + GenericGuardCommunicator< CollectiveCommunication<MPI_Comm> >( + CollectiveCommunication<MPI_Comm>(c)) {} + }; + + template<class C> + GuardCommunicator * GuardCommunicator::create(const C & comm) + { + return new GenericGuardCommunicator<C>(comm); + } + } +#endif + + /*! @brief This exception is thrown if the MPIGuard detects an error on a remote process + @ingroup ParallelCommunication + */ + class MPIGuardError : public ParallelError {}; + + /*! @brief detects a thrown exception and communicates to all other processes + @ingroup ParallelCommunication + + @code + { + MPIGuard guard(...); + + do_something(); + + // tell the guard that you successfully passed a critical operation + guard.finalize(); + // reactivate the guard for the next critical operation + guard.reactivate(); + + int result = do_something_else(); + + // tell the guard the result of your operation + guard.finalize(result == success); + } + @endcode + + You create a MPIGuard object. If an exception is risen on a + process the MPIGuard detects the exception, because the finalize + method was not called. when reaching the finalize call all + other processes are informed that an error occured and the + MPIGuard throws an exception of type MPIGuardError. + + @note You can initialize the MPIGuard from different types of communication objects: + - MPIHelper + - CollectiveCommunication + - MPI_Comm + */ + class MPIGuard + { + GuardCommunicator * comm_; + bool active_; + + // we don't want to copy this class + MPIGuard (const MPIGuard &); + + public: + /*! @brief create an MPIGuard operating on the Communicator of the global Dune::MPIHelper + + @param active should the MPIGuard be active upon creation? + */ + MPIGuard (bool active=true) : + comm_(GuardCommunicator::create( + MPIHelper::getCollectiveCommunication())), + active_(active) + {} + + /*! @brief create an MPIGuard operating on the Communicator of a special Dune::MPIHelper m + + @param m a reference to an MPIHelper + @param active should the MPIGuard be active upon creation? + */ + MPIGuard (MPIHelper & m, bool active=true) : + comm_(GuardCommunicator::create( + m.getCollectiveCommunication())), + active_(active) + {} + + /*! @brief create an MPIGuard operating on an arbitrary communicator. + + Supported types for the communication object are: + - MPIHelper + - CollectiveCommunication + - MPI_Comm + + @param comm reference to a communication object + @param active should the MPIGuard be active upon creation? + */ + template <class C> + MPIGuard (const C & comm, bool active=true) : + comm_(GuardCommunicator::create(comm)), + active_(active) + {} + + /*! @brief destroy the guard and check for undetected exceptions + */ + ~MPIGuard() + { + if (active_) + { + active_ = false; + finalize(false); + } + delete comm_; + } + + /*! @brief reactivate the guard. + + If the guard is still active finalize(true) is called first. + */ + void reactivate() { + if (active_ == true) + finalize(); + active_ = true; + } + + /*! @brief stop the guard. + + If no success parameter is passed, the guard assumes that + everything worked as planned. All errors are communicated + and an exception of type MPIGuardError is thrown if an error + (or exception) occured on any of the processors in the + communicator. + + @param success inform the guard about possible errors + */ + void finalize(bool success = true) + { + int result = success ? 0 : 1; + bool was_active = active_; + active_ = false; + result = comm_->sum(result); + if (result>0 && was_active) + { + DUNE_THROW(MPIGuardError, "Terminating process " + << comm_->rank() << " due to " + << result << " remote error(s)"); + } + } + }; + +} + +#endif // DUNE_COMMON_MPIGUARD_HH diff --git a/dune/common/test/Makefile.am b/dune/common/test/Makefile.am index 3b0ef4733..1438081e4 100644 --- a/dune/common/test/Makefile.am +++ b/dune/common/test/Makefile.am @@ -1,18 +1,38 @@ # -*- tab-width: 4; indent-tabs-mode: nil -*- # $Id$ -TESTPROGS = test-stack arraylisttest shared_ptrtest \ - iteratorfacadetest iteratorfacadetest2 tuplestest fvectortest fmatrixtest \ - poolallocatortest settest gcdlcmtest streamtest \ - bigunsignedinttest mpihelpertest singletontest mpicollcomm \ - utilitytest lrutest \ - smallobject nullptr-test \ - testfassign1 testfassign2 testfassign3 \ - testfassign4 \ - testfassign_fail1 testfassign_fail2 testfassign_fail3 \ - testfassign_fail4 testfassign_fail5 testfassign_fail6 \ - conversiontest bitsetvectortest deprtuplestest \ - float_cmp fassigntest static_assert_test +TESTPROGS = \ + arraylisttest \ + bigunsignedinttest \ + bitsetvectortest \ + conversiontest \ + deprtuplestest \ + gcdlcmtest \ + fassigntest \ + testfassign1 testfassign2 testfassign3 testfassign4 \ + testfassign_fail1 testfassign_fail2 testfassign_fail3 \ + testfassign_fail4 testfassign_fail5 testfassign_fail6 \ + fmatrixtest \ + fvectortest \ + float_cmp \ + iteratorfacadetest \ + iteratorfacadetest2 \ + lrutest \ + mpihelpertest \ + mpicollcomm \ + mpiguard \ + nullptr-test \ + poolallocatortest \ + settest \ + shared_ptrtest \ + singletontest \ + smallobject \ + static_assert_test \ + streamtest \ + tuplestest \ + test-stack \ + utilitytest + # which tests to run COMPILE_XFAIL=$(DUNE_COMMON_BIN)/xfail-compile-tests @@ -97,6 +117,11 @@ mpicollcomm_CPPFLAGS = $(AM_CPPFLAGS) $(DUNEMPICPPFLAGS) mpicollcomm_LDADD = $(DUNEMPILIBS) $(LDADD) mpicollcomm_LDFLAGS = $(AM_LDFLAGS) $(DUNEMPILDFLAGS) +mpiguard_SOURCES = mpiguard.cc +mpiguard_CPPFLAGS = $(AM_CPPFLAGS) $(DUNEMPICPPFLAGS) +mpiguard_LDADD = $(DUNEMPILIBS) $(LDADD) +mpiguard_LDFLAGS = $(AM_LDFLAGS) $(DUNEMPILDFLAGS) + singletontest_SOURCES = singletontest.cc utilitytest_SOURCES = utilitytest.cc diff --git a/dune/common/test/mpiguard.cc b/dune/common/test/mpiguard.cc new file mode 100644 index 000000000..9736e84b4 --- /dev/null +++ b/dune/common/test/mpiguard.cc @@ -0,0 +1,89 @@ +// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- +// vi: set et ts=4 sw=2 sts=2: +#include <config.h> + +#include <dune/common/mpihelper.hh> +#include <dune/common/mpiguard.hh> + +int main(int argc, char** argv) +{ + Dune::MPIHelper & mpihelper = Dune::MPIHelper::instance(argc, argv); + + if (mpihelper.rank() == 0) + std::cout << "---- default constructor" << std::endl; + try + { + // at the end of this block the guard is destroyed and possible exceptions are communicated + { + Dune::MPIGuard guard; + if (mpihelper.rank() > 0) + DUNE_THROW(Dune::Exception, "Fakeproblem on process " << mpihelper.rank()); + guard.finalize(); + } + } + catch (Dune::Exception & e) + { + std::cout << "Error (rank " << mpihelper.rank() << "): " + << e.what() << std::endl; + } + + mpihelper.getCollectiveCommunication().barrier(); + if (mpihelper.rank() == 0) + std::cout << "---- guard(MPI_COMM_WORLD)" << std::endl; + try + { + // at the end of this block the guard is destroyed and possible exceptions are communicated + { + Dune::MPIGuard guard(MPI_COMM_WORLD); + if (mpihelper.rank() > 0) + DUNE_THROW(Dune::Exception, "Fakeproblem on process " << mpihelper.rank()); + guard.finalize(); + } + } + catch (Dune::Exception & e) + { + std::cout << "Error (rank " << mpihelper.rank() << "): " + << e.what() << std::endl; + } + + mpihelper.getCollectiveCommunication().barrier(); + if (mpihelper.rank() == 0) + std::cout << "---- guard(MPIHelper)" << std::endl; + try + { + // at the end of this block the guard is destroyed and possible exceptions are communicated + { + Dune::MPIGuard guard(mpihelper); + if (mpihelper.rank() > 0) + DUNE_THROW(Dune::Exception, "Fakeproblem on process " << mpihelper.rank()); + guard.finalize(); + } + } + catch (Dune::Exception & e) + { + std::cout << "Error (rank " << mpihelper.rank() << "): " + << e.what() << std::endl; + } + + + mpihelper.getCollectiveCommunication().barrier(); + if (mpihelper.rank() == 0) + std::cout << "---- manual error" << std::endl; + try + { + // at the end of this block the guard is destroyed and possible exceptions are communicated + { + Dune::MPIGuard guard; + guard.finalize(mpihelper.rank() > 0); + } + } + catch (Dune::Exception & e) + { + std::cout << "Error (rank " << mpihelper.rank() << "): " + << e.what() << std::endl; + } + + mpihelper.getCollectiveCommunication().barrier(); + if (mpihelper.rank() == 0) + std::cout << "---- done" << std::endl; +} -- GitLab