Skip to content
Snippets Groups Projects
Commit d44c57a8 authored by Christian Engwer's avatar Christian Engwer
Browse files

* implementation and test for MPIGuard

This class detect a thrown exception and communicate to all other
processes.

[[Imported from SVN: r5943]]
parent ddc258a5
No related branches found
No related tags found
No related merge requests found
// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
// vi: set et ts=4 sw=2 sts=2:
#ifndef DUNE_COMMON_MPIGUARD_HH
#define DUNE_COMMON_MPIGUARD_HH
#include <dune/common/mpihelper.hh>
#include <dune/common/collectivecommunication.hh>
#include <dune/common/mpicollectivecommunication.hh>
#include <dune/common/exceptions.hh>
namespace Dune
{
#ifndef DOXYGEN
namespace {
/*
Interface class for the communication needed by MPIGuard
*/
struct GuardCommunicator
{
// cleanup
virtual ~GuardCommunicator() {};
// all the communication methods we need
virtual int rank() = 0;
virtual int size() = 0;
virtual int sum(int i) = 0;
// create a new GuardCommunicator pointer
template <class C>
static GuardCommunicator * create(const C & c);
};
/*
templated implementation of different communication classes
*/
// the default class will always fail, due to the missing implementation of "sum"
template <class Imp>
struct GenericGuardCommunicator
: public GuardCommunicator
{};
// specialization for CollectiveCommunication
template <class T>
struct GenericGuardCommunicator< CollectiveCommunication<T> >
: public GuardCommunicator
{
const CollectiveCommunication<T> comm;
GenericGuardCommunicator(const CollectiveCommunication<T> & c) :
comm(c) {}
virtual int rank() { return comm.rank(); };
virtual int size() { return comm.size(); };
virtual int sum(int i) { return comm.sum(i); }
};
// specialization for MPI_Comm
template <>
struct GenericGuardCommunicator<MPI_Comm>
: public GenericGuardCommunicator< CollectiveCommunication<MPI_Comm> >
{
GenericGuardCommunicator(const MPI_Comm & c) :
GenericGuardCommunicator< CollectiveCommunication<MPI_Comm> >(
CollectiveCommunication<MPI_Comm>(c)) {}
};
template<class C>
GuardCommunicator * GuardCommunicator::create(const C & comm)
{
return new GenericGuardCommunicator<C>(comm);
}
}
#endif
/*! @brief This exception is thrown if the MPIGuard detects an error on a remote process
@ingroup ParallelCommunication
*/
class MPIGuardError : public ParallelError {};
/*! @brief detects a thrown exception and communicates to all other processes
@ingroup ParallelCommunication
@code
{
MPIGuard guard(...);
do_something();
// tell the guard that you successfully passed a critical operation
guard.finalize();
// reactivate the guard for the next critical operation
guard.reactivate();
int result = do_something_else();
// tell the guard the result of your operation
guard.finalize(result == success);
}
@endcode
You create a MPIGuard object. If an exception is risen on a
process the MPIGuard detects the exception, because the finalize
method was not called. when reaching the finalize call all
other processes are informed that an error occured and the
MPIGuard throws an exception of type MPIGuardError.
@note You can initialize the MPIGuard from different types of communication objects:
- MPIHelper
- CollectiveCommunication
- MPI_Comm
*/
class MPIGuard
{
GuardCommunicator * comm_;
bool active_;
// we don't want to copy this class
MPIGuard (const MPIGuard &);
public:
/*! @brief create an MPIGuard operating on the Communicator of the global Dune::MPIHelper
@param active should the MPIGuard be active upon creation?
*/
MPIGuard (bool active=true) :
comm_(GuardCommunicator::create(
MPIHelper::getCollectiveCommunication())),
active_(active)
{}
/*! @brief create an MPIGuard operating on the Communicator of a special Dune::MPIHelper m
@param m a reference to an MPIHelper
@param active should the MPIGuard be active upon creation?
*/
MPIGuard (MPIHelper & m, bool active=true) :
comm_(GuardCommunicator::create(
m.getCollectiveCommunication())),
active_(active)
{}
/*! @brief create an MPIGuard operating on an arbitrary communicator.
Supported types for the communication object are:
- MPIHelper
- CollectiveCommunication
- MPI_Comm
@param comm reference to a communication object
@param active should the MPIGuard be active upon creation?
*/
template <class C>
MPIGuard (const C & comm, bool active=true) :
comm_(GuardCommunicator::create(comm)),
active_(active)
{}
/*! @brief destroy the guard and check for undetected exceptions
*/
~MPIGuard()
{
if (active_)
{
active_ = false;
finalize(false);
}
delete comm_;
}
/*! @brief reactivate the guard.
If the guard is still active finalize(true) is called first.
*/
void reactivate() {
if (active_ == true)
finalize();
active_ = true;
}
/*! @brief stop the guard.
If no success parameter is passed, the guard assumes that
everything worked as planned. All errors are communicated
and an exception of type MPIGuardError is thrown if an error
(or exception) occured on any of the processors in the
communicator.
@param success inform the guard about possible errors
*/
void finalize(bool success = true)
{
int result = success ? 0 : 1;
bool was_active = active_;
active_ = false;
result = comm_->sum(result);
if (result>0 && was_active)
{
DUNE_THROW(MPIGuardError, "Terminating process "
<< comm_->rank() << " due to "
<< result << " remote error(s)");
}
}
};
}
#endif // DUNE_COMMON_MPIGUARD_HH
# -*- tab-width: 4; indent-tabs-mode: nil -*-
# $Id$
TESTPROGS = test-stack arraylisttest shared_ptrtest \
iteratorfacadetest iteratorfacadetest2 tuplestest fvectortest fmatrixtest \
poolallocatortest settest gcdlcmtest streamtest \
bigunsignedinttest mpihelpertest singletontest mpicollcomm \
utilitytest lrutest \
smallobject nullptr-test \
testfassign1 testfassign2 testfassign3 \
testfassign4 \
testfassign_fail1 testfassign_fail2 testfassign_fail3 \
testfassign_fail4 testfassign_fail5 testfassign_fail6 \
conversiontest bitsetvectortest deprtuplestest \
float_cmp fassigntest static_assert_test
TESTPROGS = \
arraylisttest \
bigunsignedinttest \
bitsetvectortest \
conversiontest \
deprtuplestest \
gcdlcmtest \
fassigntest \
testfassign1 testfassign2 testfassign3 testfassign4 \
testfassign_fail1 testfassign_fail2 testfassign_fail3 \
testfassign_fail4 testfassign_fail5 testfassign_fail6 \
fmatrixtest \
fvectortest \
float_cmp \
iteratorfacadetest \
iteratorfacadetest2 \
lrutest \
mpihelpertest \
mpicollcomm \
mpiguard \
nullptr-test \
poolallocatortest \
settest \
shared_ptrtest \
singletontest \
smallobject \
static_assert_test \
streamtest \
tuplestest \
test-stack \
utilitytest
# which tests to run
COMPILE_XFAIL=$(DUNE_COMMON_BIN)/xfail-compile-tests
......@@ -97,6 +117,11 @@ mpicollcomm_CPPFLAGS = $(AM_CPPFLAGS) $(DUNEMPICPPFLAGS)
mpicollcomm_LDADD = $(DUNEMPILIBS) $(LDADD)
mpicollcomm_LDFLAGS = $(AM_LDFLAGS) $(DUNEMPILDFLAGS)
mpiguard_SOURCES = mpiguard.cc
mpiguard_CPPFLAGS = $(AM_CPPFLAGS) $(DUNEMPICPPFLAGS)
mpiguard_LDADD = $(DUNEMPILIBS) $(LDADD)
mpiguard_LDFLAGS = $(AM_LDFLAGS) $(DUNEMPILDFLAGS)
singletontest_SOURCES = singletontest.cc
utilitytest_SOURCES = utilitytest.cc
......
// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
// vi: set et ts=4 sw=2 sts=2:
#include <config.h>
#include <dune/common/mpihelper.hh>
#include <dune/common/mpiguard.hh>
int main(int argc, char** argv)
{
Dune::MPIHelper & mpihelper = Dune::MPIHelper::instance(argc, argv);
if (mpihelper.rank() == 0)
std::cout << "---- default constructor" << std::endl;
try
{
// at the end of this block the guard is destroyed and possible exceptions are communicated
{
Dune::MPIGuard guard;
if (mpihelper.rank() > 0)
DUNE_THROW(Dune::Exception, "Fakeproblem on process " << mpihelper.rank());
guard.finalize();
}
}
catch (Dune::Exception & e)
{
std::cout << "Error (rank " << mpihelper.rank() << "): "
<< e.what() << std::endl;
}
mpihelper.getCollectiveCommunication().barrier();
if (mpihelper.rank() == 0)
std::cout << "---- guard(MPI_COMM_WORLD)" << std::endl;
try
{
// at the end of this block the guard is destroyed and possible exceptions are communicated
{
Dune::MPIGuard guard(MPI_COMM_WORLD);
if (mpihelper.rank() > 0)
DUNE_THROW(Dune::Exception, "Fakeproblem on process " << mpihelper.rank());
guard.finalize();
}
}
catch (Dune::Exception & e)
{
std::cout << "Error (rank " << mpihelper.rank() << "): "
<< e.what() << std::endl;
}
mpihelper.getCollectiveCommunication().barrier();
if (mpihelper.rank() == 0)
std::cout << "---- guard(MPIHelper)" << std::endl;
try
{
// at the end of this block the guard is destroyed and possible exceptions are communicated
{
Dune::MPIGuard guard(mpihelper);
if (mpihelper.rank() > 0)
DUNE_THROW(Dune::Exception, "Fakeproblem on process " << mpihelper.rank());
guard.finalize();
}
}
catch (Dune::Exception & e)
{
std::cout << "Error (rank " << mpihelper.rank() << "): "
<< e.what() << std::endl;
}
mpihelper.getCollectiveCommunication().barrier();
if (mpihelper.rank() == 0)
std::cout << "---- manual error" << std::endl;
try
{
// at the end of this block the guard is destroyed and possible exceptions are communicated
{
Dune::MPIGuard guard;
guard.finalize(mpihelper.rank() > 0);
}
}
catch (Dune::Exception & e)
{
std::cout << "Error (rank " << mpihelper.rank() << "): "
<< e.what() << std::endl;
}
mpihelper.getCollectiveCommunication().barrier();
if (mpihelper.rank() == 0)
std::cout << "---- done" << std::endl;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment