From d44c57a8964cd9793bf9fb81ea883e217cf13339 Mon Sep 17 00:00:00 2001
From: Christian Engwer <christi@dune-project.org>
Date: Wed, 24 Mar 2010 18:52:36 +0000
Subject: [PATCH] * implementation and test for MPIGuard

This class detect a thrown exception and communicate to all other
processes.

[[Imported from SVN: r5943]]
---
 dune/common/mpiguard.hh      | 202 +++++++++++++++++++++++++++++++++++
 dune/common/test/Makefile.am |  49 ++++++---
 dune/common/test/mpiguard.cc |  89 +++++++++++++++
 3 files changed, 328 insertions(+), 12 deletions(-)
 create mode 100644 dune/common/mpiguard.hh
 create mode 100644 dune/common/test/mpiguard.cc

diff --git a/dune/common/mpiguard.hh b/dune/common/mpiguard.hh
new file mode 100644
index 000000000..ebc8eac07
--- /dev/null
+++ b/dune/common/mpiguard.hh
@@ -0,0 +1,202 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_COMMON_MPIGUARD_HH
+#define DUNE_COMMON_MPIGUARD_HH
+
+#include <dune/common/mpihelper.hh>
+#include <dune/common/collectivecommunication.hh>
+#include <dune/common/mpicollectivecommunication.hh>
+#include <dune/common/exceptions.hh>
+
+namespace Dune
+{
+
+#ifndef DOXYGEN
+  namespace {
+    /*
+       Interface class for the communication needed by MPIGuard
+     */
+    struct GuardCommunicator
+    {
+      // cleanup
+      virtual ~GuardCommunicator() {};
+      // all the communication methods we need
+      virtual int rank() = 0;
+      virtual int size() = 0;
+      virtual int sum(int i) = 0;
+      // create a new GuardCommunicator pointer
+      template <class C>
+      static GuardCommunicator * create(const C & c);
+    };
+
+    /*
+       templated implementation of different communication classes
+     */
+    // the default class will always fail, due to the missing implementation of "sum"
+    template <class Imp>
+    struct GenericGuardCommunicator
+      : public GuardCommunicator
+    {};
+    // specialization for CollectiveCommunication
+    template <class T>
+    struct GenericGuardCommunicator< CollectiveCommunication<T> >
+      : public GuardCommunicator
+    {
+      const CollectiveCommunication<T> comm;
+      GenericGuardCommunicator(const CollectiveCommunication<T> & c) :
+        comm(c) {}
+      virtual int rank() { return comm.rank(); };
+      virtual int size() { return comm.size(); };
+      virtual int sum(int i) { return comm.sum(i); }
+    };
+    // specialization for MPI_Comm
+    template <>
+    struct GenericGuardCommunicator<MPI_Comm>
+      : public GenericGuardCommunicator< CollectiveCommunication<MPI_Comm> >
+    {
+      GenericGuardCommunicator(const MPI_Comm & c) :
+        GenericGuardCommunicator< CollectiveCommunication<MPI_Comm> >(
+          CollectiveCommunication<MPI_Comm>(c)) {}
+    };
+
+    template<class C>
+    GuardCommunicator * GuardCommunicator::create(const C & comm)
+    {
+      return new GenericGuardCommunicator<C>(comm);
+    }
+  }
+#endif
+
+  /*! @brief This exception is thrown if the MPIGuard detects an error on a remote process
+      @ingroup ParallelCommunication
+   */
+  class MPIGuardError : public ParallelError {};
+
+  /*! @brief detects a thrown exception and communicates to all other processes
+      @ingroup ParallelCommunication
+
+     @code
+     {
+       MPIGuard guard(...);
+
+       do_something();
+
+       // tell the guard that you successfully passed a critical operation
+       guard.finalize();
+       // reactivate the guard for the next critical operation
+       guard.reactivate();
+
+       int result = do_something_else();
+
+       // tell the guard the result of your operation
+       guard.finalize(result == success);
+     }
+     @endcode
+
+     You create a MPIGuard object. If an exception is risen on a
+     process the MPIGuard detects the exception, because the finalize
+     method was not called.  when reaching the finalize call all
+     other processes are informed that an error occured and the
+     MPIGuard throws an exception of type MPIGuardError.
+
+     @note You can initialize the MPIGuard from different types of communication objects:
+     - MPIHelper
+     - CollectiveCommunication
+     - MPI_Comm
+   */
+  class MPIGuard
+  {
+    GuardCommunicator * comm_;
+    bool active_;
+
+    // we don't want to copy this class
+    MPIGuard (const MPIGuard &);
+
+  public:
+    /*! @brief create an MPIGuard operating on the Communicator of the global Dune::MPIHelper
+
+       @param active should the MPIGuard be active upon creation?
+     */
+    MPIGuard (bool active=true) :
+      comm_(GuardCommunicator::create(
+              MPIHelper::getCollectiveCommunication())),
+      active_(active)
+    {}
+
+    /*! @brief create an MPIGuard operating on the Communicator of a special Dune::MPIHelper m
+
+       @param m a reference to an MPIHelper
+       @param active should the MPIGuard be active upon creation?
+     */
+    MPIGuard (MPIHelper & m, bool active=true) :
+      comm_(GuardCommunicator::create(
+              m.getCollectiveCommunication())),
+      active_(active)
+    {}
+
+    /*! @brief create an MPIGuard operating on an arbitrary communicator.
+
+       Supported types for the communication object are:
+       - MPIHelper
+       - CollectiveCommunication
+       - MPI_Comm
+
+       @param comm reference to a communication object
+       @param active should the MPIGuard be active upon creation?
+     */
+    template <class C>
+    MPIGuard (const C & comm, bool active=true) :
+      comm_(GuardCommunicator::create(comm)),
+      active_(active)
+    {}
+
+    /*! @brief destroy the guard and check for undetected exceptions
+     */
+    ~MPIGuard()
+    {
+      if (active_)
+      {
+        active_ = false;
+        finalize(false);
+      }
+      delete comm_;
+    }
+
+    /*! @brief reactivate the guard.
+
+       If the guard is still active finalize(true) is called first.
+     */
+    void reactivate() {
+      if (active_ == true)
+        finalize();
+      active_ = true;
+    }
+
+    /*! @brief stop the guard.
+
+       If no success parameter is passed, the guard assumes that
+       everything worked as planned.  All errors are communicated
+       and an exception of type MPIGuardError is thrown if an error
+       (or exception) occured on any of the processors in the
+       communicator.
+
+       @param success inform the guard about possible errors
+     */
+    void finalize(bool success = true)
+    {
+      int result = success ? 0 : 1;
+      bool was_active = active_;
+      active_ = false;
+      result = comm_->sum(result);
+      if (result>0 && was_active)
+      {
+        DUNE_THROW(MPIGuardError, "Terminating process "
+                   << comm_->rank() << " due to "
+                   << result << " remote error(s)");
+      }
+    }
+  };
+
+}
+
+#endif // DUNE_COMMON_MPIGUARD_HH
diff --git a/dune/common/test/Makefile.am b/dune/common/test/Makefile.am
index 3b0ef4733..1438081e4 100644
--- a/dune/common/test/Makefile.am
+++ b/dune/common/test/Makefile.am
@@ -1,18 +1,38 @@
 # -*- tab-width: 4; indent-tabs-mode: nil -*-
 # $Id$
 
-TESTPROGS = test-stack arraylisttest shared_ptrtest \
-	iteratorfacadetest iteratorfacadetest2 tuplestest fvectortest fmatrixtest \
-	poolallocatortest settest gcdlcmtest streamtest \
-	bigunsignedinttest mpihelpertest singletontest mpicollcomm \
-	utilitytest lrutest \
-	smallobject nullptr-test \
-	testfassign1 testfassign2 testfassign3 \
-	testfassign4 \
-    testfassign_fail1 testfassign_fail2 testfassign_fail3 \
-    testfassign_fail4 testfassign_fail5 testfassign_fail6 \
-    conversiontest bitsetvectortest deprtuplestest \
-    float_cmp fassigntest static_assert_test
+TESTPROGS = \
+    arraylisttest \
+	bigunsignedinttest \
+    bitsetvectortest \
+    conversiontest \
+    deprtuplestest \
+    gcdlcmtest \
+    fassigntest \
+        testfassign1 testfassign2 testfassign3 testfassign4 \
+        testfassign_fail1 testfassign_fail2 testfassign_fail3 \
+        testfassign_fail4 testfassign_fail5 testfassign_fail6 \
+    fmatrixtest \
+    fvectortest \
+    float_cmp \
+	iteratorfacadetest \
+    iteratorfacadetest2 \
+    lrutest \
+    mpihelpertest \
+    mpicollcomm \
+    mpiguard \
+    nullptr-test \
+	poolallocatortest \
+    settest \
+    shared_ptrtest \
+    singletontest \
+	smallobject \
+    static_assert_test \
+    streamtest \
+    tuplestest \
+    test-stack \
+	utilitytest
+
 # which tests to run
 COMPILE_XFAIL=$(DUNE_COMMON_BIN)/xfail-compile-tests
 
@@ -97,6 +117,11 @@ mpicollcomm_CPPFLAGS = $(AM_CPPFLAGS) $(DUNEMPICPPFLAGS)
 mpicollcomm_LDADD = $(DUNEMPILIBS) $(LDADD)
 mpicollcomm_LDFLAGS = $(AM_LDFLAGS) $(DUNEMPILDFLAGS)
 
+mpiguard_SOURCES = mpiguard.cc
+mpiguard_CPPFLAGS = $(AM_CPPFLAGS) $(DUNEMPICPPFLAGS)
+mpiguard_LDADD = $(DUNEMPILIBS) $(LDADD)
+mpiguard_LDFLAGS = $(AM_LDFLAGS) $(DUNEMPILDFLAGS)
+
 singletontest_SOURCES = singletontest.cc
 
 utilitytest_SOURCES = utilitytest.cc
diff --git a/dune/common/test/mpiguard.cc b/dune/common/test/mpiguard.cc
new file mode 100644
index 000000000..9736e84b4
--- /dev/null
+++ b/dune/common/test/mpiguard.cc
@@ -0,0 +1,89 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#include <config.h>
+
+#include <dune/common/mpihelper.hh>
+#include <dune/common/mpiguard.hh>
+
+int main(int argc, char** argv)
+{
+  Dune::MPIHelper & mpihelper = Dune::MPIHelper::instance(argc, argv);
+
+  if (mpihelper.rank() == 0)
+    std::cout << "---- default constructor" << std::endl;
+  try
+  {
+    // at the end of this block the guard is destroyed and possible exceptions are communicated
+    {
+      Dune::MPIGuard guard;
+      if (mpihelper.rank() > 0)
+        DUNE_THROW(Dune::Exception, "Fakeproblem on process " << mpihelper.rank());
+      guard.finalize();
+    }
+  }
+  catch (Dune::Exception & e)
+  {
+    std::cout << "Error (rank " << mpihelper.rank() << "): "
+              << e.what() << std::endl;
+  }
+
+  mpihelper.getCollectiveCommunication().barrier();
+  if (mpihelper.rank() == 0)
+    std::cout << "---- guard(MPI_COMM_WORLD)" << std::endl;
+  try
+  {
+    // at the end of this block the guard is destroyed and possible exceptions are communicated
+    {
+      Dune::MPIGuard guard(MPI_COMM_WORLD);
+      if (mpihelper.rank() > 0)
+        DUNE_THROW(Dune::Exception, "Fakeproblem on process " << mpihelper.rank());
+      guard.finalize();
+    }
+  }
+  catch (Dune::Exception & e)
+  {
+    std::cout << "Error (rank " << mpihelper.rank() << "): "
+              << e.what() << std::endl;
+  }
+
+  mpihelper.getCollectiveCommunication().barrier();
+  if (mpihelper.rank() == 0)
+    std::cout << "---- guard(MPIHelper)" << std::endl;
+  try
+  {
+    // at the end of this block the guard is destroyed and possible exceptions are communicated
+    {
+      Dune::MPIGuard guard(mpihelper);
+      if (mpihelper.rank() > 0)
+        DUNE_THROW(Dune::Exception, "Fakeproblem on process " << mpihelper.rank());
+      guard.finalize();
+    }
+  }
+  catch (Dune::Exception & e)
+  {
+    std::cout << "Error (rank " << mpihelper.rank() << "): "
+              << e.what() << std::endl;
+  }
+
+
+  mpihelper.getCollectiveCommunication().barrier();
+  if (mpihelper.rank() == 0)
+    std::cout << "---- manual error" << std::endl;
+  try
+  {
+    // at the end of this block the guard is destroyed and possible exceptions are communicated
+    {
+      Dune::MPIGuard guard;
+      guard.finalize(mpihelper.rank() > 0);
+    }
+  }
+  catch (Dune::Exception & e)
+  {
+    std::cout << "Error (rank " << mpihelper.rank() << "): "
+              << e.what() << std::endl;
+  }
+
+  mpihelper.getCollectiveCommunication().barrier();
+  if (mpihelper.rank() == 0)
+    std::cout << "---- done" << std::endl;
+}
-- 
GitLab