diff --git a/dune/istl/novlpschwarz.hh b/dune/istl/novlpschwarz.hh
index 3f1167bb3357edf78594b12111a3105e76eaa0f1..1e4044442583f8a79ce9b01576535dc8be92cd83 100644
--- a/dune/istl/novlpschwarz.hh
+++ b/dune/istl/novlpschwarz.hh
@@ -263,8 +263,10 @@ namespace Dune {
 
   template<class C, class P>
   class NonoverlappingBlockPreconditioner
-    : public Dune::Preconditioner<typename P::domain_type,typename P::range_type> {
+    : public Preconditioner<typename P::domain_type,typename P::range_type> {
     friend struct Amg::ConstructionTraits<NonoverlappingBlockPreconditioner<C,P> >;
+    using X = typename P::domain_type;
+    using Y = typename P::range_type;
   public:
     //! \brief The domain type of the preconditioner.
     typedef typename P::domain_type domain_type;
@@ -280,9 +282,27 @@ namespace Dune {
        \param c The communication object for syncing owner and copy
        data points. (E.~g. OwnerOverlapCommunication )
      */
-    NonoverlappingBlockPreconditioner (P& prec, const communication_type& c)
-      : preconditioner(prec), communication(c)
-    {}
+    /*! \brief Constructor.
+
+       constructor gets all parameters to operate the prec.
+       \param p The sequential preconditioner.
+       \param c The communication object for syncing overlap and copy
+       data points. (E.~g. OwnerOverlapCopyCommunication )
+     */
+    NonoverlappingBlockPreconditioner (Preconditioner<X,Y>& p, const communication_type& c)
+      : _preconditioner(stackobject_to_shared_ptr(p)), _communication(c)
+    {   }
+
+    /*! \brief Constructor.
+
+       constructor gets all parameters to operate the prec.
+       \param p The sequential preconditioner.
+       \param c The communication object for syncing overlap and copy
+       data points. (E.~g. OwnerOverlapCopyCommunication )
+     */
+    NonoverlappingBlockPreconditioner (const std::shared_ptr<Preconditioner<X,Y>>& p, const communication_type& c)
+      : _preconditioner(p), _communication(c)
+    {   }
 
     /*!
        \brief Prepare the preconditioner.
@@ -291,7 +311,7 @@ namespace Dune {
      */
     virtual void pre (domain_type& x, range_type& b)
     {
-      preconditioner.pre(x,b);
+      _preconditioner->pre(x,b);
     }
 
     /*!
@@ -304,8 +324,8 @@ namespace Dune {
       // block preconditioner equivalent to WrappedPreconditioner from
       // pdelab/backend/ovlpistsolverbackend.hh,
       // but not to BlockPreconditioner from schwarz.hh
-      preconditioner.apply(v,d);
-      communication.addOwnerCopyToOwnerCopy(v,v);
+      _preconditioner->apply(v,d);
+      _communication.addOwnerCopyToOwnerCopy(v,v);
     }
 
     /*!
@@ -315,7 +335,7 @@ namespace Dune {
      */
     virtual void post (domain_type& x)
     {
-      preconditioner.post(x);
+      _preconditioner->post(x);
     }
 
     //! Category of the preconditioner (see SolverCategory::Category)
@@ -326,10 +346,10 @@ namespace Dune {
 
   private:
     //! \brief a sequential preconditioner
-    P& preconditioner;
+    std::shared_ptr<Preconditioner<X,Y>>& _preconditioner;
 
     //! \brief the communication object
-    const communication_type& communication;
+    const communication_type& _communication;
   };
 
   /** @} end documentation */
diff --git a/dune/istl/paamg/CMakeLists.txt b/dune/istl/paamg/CMakeLists.txt
index 457db831a569a076f7c6686f058e92d01b9fa824..4bf5dc147676e62e028dd54a794876c24375b6d5 100644
--- a/dune/istl/paamg/CMakeLists.txt
+++ b/dune/istl/paamg/CMakeLists.txt
@@ -14,6 +14,7 @@ install(FILES
   graph.hh
   graphcreator.hh
   hierarchy.hh
+  matrixhierarchy.hh
   indicescoarsener.hh
   kamg.hh
   parameters.hh
diff --git a/dune/istl/paamg/amg.hh b/dune/istl/paamg/amg.hh
index 249c0e09382d8b153d9bfde3495d110e3bf59c5b..62d647eb48a82955c267225281a3dbbc7854fe1b 100644
--- a/dune/istl/paamg/amg.hh
+++ b/dune/istl/paamg/amg.hh
@@ -7,7 +7,7 @@
 #include <dune/common/exceptions.hh>
 #include <dune/istl/paamg/smoother.hh>
 #include <dune/istl/paamg/transfer.hh>
-#include <dune/istl/paamg/hierarchy.hh>
+#include <dune/istl/paamg/matrixhierarchy.hh>
 #include <dune/istl/solvers.hh>
 #include <dune/istl/scalarproducts.hh>
 #include <dune/istl/superlu.hh>
@@ -126,8 +126,6 @@ namespace Dune
        */
       AMG(const AMG& amg);
 
-      ~AMG();
-
       /** \copydoc Preconditioner::pre */
       void pre(Domain& x, Range& b);
 
@@ -181,7 +179,8 @@ namespace Dune
        * @param pinfo The fine level parallel information.
        */
       template<class C>
-      void createHierarchies(C& criterion, Operator& matrix,
+      void createHierarchies(C& criterion,
+                             const std::shared_ptr<const Operator>& matrixptr,
                              const PI& pinfo);
       /**
        * @brief A struct that holds the context of the current level.
@@ -268,11 +267,11 @@ namespace Dune
       /** @brief The solver of the coarsest level. */
       std::shared_ptr<CoarseSolver> solver_;
       /** @brief The right hand side of our problem. */
-      Hierarchy<Range,A>* rhs_;
+      std::shared_ptr<Hierarchy<Range,A>> rhs_;
       /** @brief The left approximate solution of our problem. */
-      Hierarchy<Domain,A>* lhs_;
+      std::shared_ptr<Hierarchy<Domain,A>> lhs_;
       /** @brief The total update for the outer solver. */
-      Hierarchy<Domain,A>* update_;
+      std::shared_ptr<Hierarchy<Domain,A>> update_;
       /** @brief The type of the scalar product for the coarse solver. */
       using ScalarProduct = Dune::ScalarProduct<X>;
       /** @brief Scalar product on the coarse level. */
@@ -305,14 +304,7 @@ namespace Dune
       coarseSmoother_(amg.coarseSmoother_),
       category_(amg.category_),
       verbosity_(amg.verbosity_)
-    {
-      if(amg.rhs_)
-        rhs_=new Hierarchy<Range,A>(*amg.rhs_);
-      if(amg.lhs_)
-        lhs_=new Hierarchy<Domain,A>(*amg.lhs_);
-      if(amg.update_)
-        update_=new Hierarchy<Domain,A>(*amg.update_);
-    }
+    {}
 
     template<class M, class X, class S, class PI, class A>
     AMG<M,X,S,PI,A>::AMG(const OperatorHierarchy& matrices, CoarseSolver& coarseSolver,
@@ -356,28 +348,8 @@ namespace Dune
       // TODO: reestablish compile time checks.
       //static_assert(static_cast<int>(PI::category)==static_cast<int>(S::category),
       //             "Matrix and Solver must match in terms of category!");
-      createHierarchies(criterion, const_cast<Operator&>(matrix), pinfo);
-    }
-
-
-    template<class M, class X, class S, class PI, class A>
-    AMG<M,X,S,PI,A>::~AMG()
-    {
-      if(buildHierarchy_) {
-        if(solver_)
-          solver_.reset();
-        if(coarseSmoother_)
-          coarseSmoother_.reset();
-      }
-      if(lhs_)
-        delete lhs_;
-      lhs_=nullptr;
-      if(update_)
-        delete update_;
-      update_=nullptr;
-      if(rhs_)
-        delete rhs_;
-      rhs_=nullptr;
+      auto matrixptr = stackobject_to_shared_ptr(matrix);
+      createHierarchies(criterion, matrixptr, pinfo);
     }
 
     template <class Matrix,
@@ -447,11 +419,14 @@ namespace Dune
 
     template<class M, class X, class S, class PI, class A>
     template<class C>
-    void AMG<M,X,S,PI,A>::createHierarchies(C& criterion, Operator& matrix,
-                                            const PI& pinfo)
+    void AMG<M,X,S,PI,A>::createHierarchies(C& criterion,
+      const std::shared_ptr<const Operator>& matrixptr,
+      const PI& pinfo)
     {
       Timer watch;
-      matrices_.reset(new OperatorHierarchy(matrix, pinfo));
+      matrices_ = std::make_shared<OperatorHierarchy>(
+        std::const_pointer_cast<Operator>(matrixptr),
+        stackobject_to_shared_ptr(const_cast<PI&>(pinfo)));
 
       matrices_->template build<NegateSet<typename PI::OwnerSet> >(criterion);
 
@@ -481,7 +456,7 @@ namespace Dune
           cargs.setComm(*matrices_->parallelInformation().coarsest());
         }
 
-        coarseSmoother_.reset(ConstructionTraits<Smoother>::construct(cargs));
+        coarseSmoother_ = ConstructionTraits<Smoother>::construct(cargs);
         scalarProduct_ = createScalarProduct<X>(cargs.getComm(),category());
 
         typedef DirectSolverSelector< typename M::matrix_type, X > SolverSelector;
@@ -600,18 +575,9 @@ namespace Dune
       else
         // No smoother to make x consistent! Do it by hand
         matrices_->parallelInformation().coarsest()->copyOwnerToAll(x,x);
-      Range* copy = new Range(b);
-      if(rhs_)
-        delete rhs_;
-      rhs_ = new Hierarchy<Range,A>(copy);
-      Domain* dcopy = new Domain(x);
-      if(lhs_)
-        delete lhs_;
-      lhs_ = new Hierarchy<Domain,A>(dcopy);
-      dcopy = new Domain(x);
-      if(update_)
-        delete update_;
-      update_ = new Hierarchy<Domain,A>(dcopy);
+      rhs_ = std::make_shared<Hierarchy<Range,A>>(std::make_shared<Range>(b));
+      lhs_ = std::make_shared<Hierarchy<Domain,A>>(std::make_shared<Domain>(x));
+      update_ = std::make_shared<Hierarchy<Domain,A>>(std::make_shared<Domain>(x));
       matrices_->coarsenVector(*rhs_);
       matrices_->coarsenVector(*lhs_);
       matrices_->coarsenVector(*update_);
@@ -918,15 +884,9 @@ namespace Dune
             smoother->post(*lhs);
         smoother->post(*lhs);
       }
-      //delete &(*lhs_->finest());
-      delete lhs_;
-      lhs_=nullptr;
-      //delete &(*update_->finest());
-      delete update_;
-      update_=nullptr;
-      //delete &(*rhs_->finest());
-      delete rhs_;
-      rhs_=nullptr;
+      lhs_ = nullptr;
+      update_ = nullptr;
+      rhs_ = nullptr;
     }
 
     template<class M, class X, class S, class PI, class A>
diff --git a/dune/istl/paamg/construction.hh b/dune/istl/paamg/construction.hh
index 70a16727d6845d050627e2001600aeadb6252952..c0273adc560a1e4230ff9e9b0dcf23d7cfe069de 100644
--- a/dune/istl/paamg/construction.hh
+++ b/dune/istl/paamg/construction.hh
@@ -49,20 +49,10 @@ namespace Dune
        * In the default implementation the copy constructor is called.
        * @param args The arguments for the construction.
        */
-      static inline T* construct(Arguments&  args)
+      static inline std::shared_ptr<T> construct(Arguments&  args)
       {
-        return new T();
+        return std::make_shared<T>();
       }
-
-      /**
-       * @brief Destroys an object.
-       * @param t Pointer to the object to destroy.
-       */
-      static inline void deconstruct(T* t)
-      {
-        delete t;
-      }
-
     };
 
     template<class T, class A>
@@ -70,14 +60,9 @@ namespace Dune
     {
     public:
       typedef const int Arguments;
-      static inline BlockVector<T,A>* construct(Arguments& n)
+      static inline std::shared_ptr<BlockVector<T,A>> construct(Arguments& n)
       {
-        return new BlockVector<T,A>(n);
-      }
-
-      static inline void deconstruct(BlockVector<T,A>* t)
-      {
-        delete t;
+        return std::make_shared<BlockVector<T,A>>(n);
       }
     };
 
@@ -143,14 +128,10 @@ namespace Dune
     public:
       typedef OverlappingSchwarzOperatorArgs<M,C> Arguments;
 
-      static inline OverlappingSchwarzOperator<M,X,Y,C>* construct(const Arguments& args)
-      {
-        return new OverlappingSchwarzOperator<M,X,Y,C>(*args.matrix_, *args.comm_);
-      }
-
-      static inline void deconstruct(OverlappingSchwarzOperator<M,X,Y,C>* t)
+      static inline std::shared_ptr<OverlappingSchwarzOperator<M,X,Y,C>> construct(const Arguments& args)
       {
-        delete t;
+        return std::make_shared<OverlappingSchwarzOperator<M,X,Y,C>>
+          (*args.matrix_, *args.comm_);
       }
     };
 
@@ -160,14 +141,10 @@ namespace Dune
     public:
       typedef NonoverlappingOperatorArgs<M,C> Arguments;
 
-      static inline NonoverlappingSchwarzOperator<M,X,Y,C>* construct(const Arguments& args)
+      static inline std::shared_ptr<NonoverlappingSchwarzOperator<M,X,Y,C>> construct(const Arguments& args)
       {
-        return new NonoverlappingSchwarzOperator<M,X,Y,C>(*args.matrix_, *args.comm_);
-      }
-
-      static inline void deconstruct(NonoverlappingSchwarzOperator<M,X,Y,C>* t)
-      {
-        delete t;
+        return std::make_shared<NonoverlappingSchwarzOperator<M,X,Y,C>>
+          (*args.matrix_, *args.comm_);
       }
     };
 
@@ -187,14 +164,9 @@ namespace Dune
     public:
       typedef const MatrixAdapterArgs<M,X,Y> Arguments;
 
-      static inline MatrixAdapter<M,X,Y>* construct(Arguments& args)
-      {
-        return new MatrixAdapter<M,X,Y>(*args.matrix_);
-      }
-
-      static inline void deconstruct(MatrixAdapter<M,X,Y>* m)
+      static inline std::shared_ptr<MatrixAdapter<M,X,Y>> construct(Arguments& args)
       {
-        delete m;
+        return std::make_shared<MatrixAdapter<M,X,Y>>(*args.matrix_);
       }
     };
 
@@ -203,14 +175,9 @@ namespace Dune
     {
     public:
       typedef const SequentialCommunicationArgs Arguments;
-      static inline SequentialInformation* construct(Arguments& args)
+      static inline std::shared_ptr<SequentialInformation> construct(Arguments& args)
       {
-        return new SequentialInformation(args.comm_);
-      }
-
-      static inline void deconstruct(SequentialInformation* si)
-      {
-        delete si;
+        return std::make_shared<SequentialInformation>(args.comm_);
       }
     };
 
@@ -223,14 +190,9 @@ namespace Dune
     public:
       typedef const OwnerOverlapCopyCommunicationArgs Arguments;
 
-      static inline OwnerOverlapCopyCommunication<T1,T2>* construct(Arguments& args)
-      {
-        return new OwnerOverlapCopyCommunication<T1,T2>(args.comm_, args.cat_);
-      }
-
-      static inline void deconstruct(OwnerOverlapCopyCommunication<T1,T2>* com)
+      static inline std::shared_ptr<OwnerOverlapCopyCommunication<T1,T2>> construct(Arguments& args)
       {
-        delete com;
+        return std::make_shared<OwnerOverlapCopyCommunication<T1,T2>>(args.comm_, args.cat_);
       }
     };
 
diff --git a/dune/istl/paamg/fastamg.hh b/dune/istl/paamg/fastamg.hh
index 5b712656d2e6d93fc0cfaa74dbc4b213b2f3d05c..2ecf5a4a0c4d2018ffe199a38cc189e327f152a1 100644
--- a/dune/istl/paamg/fastamg.hh
+++ b/dune/istl/paamg/fastamg.hh
@@ -9,7 +9,7 @@
 #include <dune/common/unused.hh>
 #include <dune/istl/paamg/smoother.hh>
 #include <dune/istl/paamg/transfer.hh>
-#include <dune/istl/paamg/hierarchy.hh>
+#include <dune/istl/paamg/matrixhierarchy.hh>
 #include <dune/istl/solvers.hh>
 #include <dune/istl/scalarproducts.hh>
 #include <dune/istl/superlu.hh>
@@ -167,7 +167,8 @@ namespace Dune
        * @param pinfo The fine level parallel information.
        */
       template<class C>
-      void createHierarchies(C& criterion, Operator& matrix,
+      void createHierarchies(C& criterion,
+                             const std::shared_ptr<const Operator>& matrixptr,
                              const PI& pinfo);
 
       /**
@@ -342,7 +343,8 @@ namespace Dune
       // TODO: reestablish compile time checks.
       //static_assert(static_cast<int>(PI::category)==static_cast<int>(S::category),
       //             "Matrix and Solver must match in terms of category!");
-      createHierarchies(criterion, const_cast<Operator&>(matrix), pinfo);
+      auto matrixptr = stackobject_to_shared_ptr(matrix);
+      createHierarchies(criterion, matrixptr, pinfo);
     }
 
     template<class M, class X, class PI, class A>
@@ -367,11 +369,14 @@ namespace Dune
 
     template<class M, class X, class PI, class A>
     template<class C>
-    void FastAMG<M,X,PI,A>::createHierarchies(C& criterion, Operator& matrix,
-                                            const PI& pinfo)
+    void FastAMG<M,X,PI,A>::createHierarchies(C& criterion,
+      const std::shared_ptr<const Operator>& matrixptr,
+      const PI& pinfo)
     {
       Timer watch;
-      matrices_.reset(new OperatorHierarchy(matrix, pinfo));
+      matrices_ = std::make_shared<OperatorHierarchy>(
+        std::const_pointer_cast<Operator>(matrixptr),
+        stackobject_to_shared_ptr(const_cast<PI&>(pinfo)));
 
       matrices_->template build<NegateSet<typename PI::OwnerSet> >(criterion);
 
@@ -395,7 +400,7 @@ namespace Dune
           cargs.setComm(*matrices_->parallelInformation().coarsest());
         }
 
-        coarseSmoother_.reset(ConstructionTraits<Smoother>::construct(cargs));
+        coarseSmoother_ = ConstructionTraits<Smoother>::construct(cargs);
         scalarProduct_ = createScalarProduct<X>(cargs.getComm(),category());
 
 #if HAVE_SUPERLU|| HAVE_SUITESPARSE_UMFPACK
@@ -483,16 +488,13 @@ namespace Dune
       watch1.reset();
       // No smoother to make x consistent! Do it by hand
       matrices_->parallelInformation().coarsest()->copyOwnerToAll(x,x);
-      Range* copy = new Range(b);
       if(rhs_)
         delete rhs_;
-      rhs_ = new Hierarchy<Range,A>(copy);
-      Domain* dcopy = new Domain(x);
+      rhs_ = new Hierarchy<Range,A>(std::make_shared<Range>(b));
       if(lhs_)
         delete lhs_;
-      lhs_ = new Hierarchy<Domain,A>(dcopy);
-      dcopy = new Domain(x);
-      residual_ = new Hierarchy<Domain,A>(dcopy);
+      lhs_ = new Hierarchy<Domain,A>(std::make_shared<Domain>(x));
+      residual_ = new Hierarchy<Domain,A>(std::make_shared<Domain>(x));
       matrices_->coarsenVector(*rhs_);
       matrices_->coarsenVector(*lhs_);
       matrices_->coarsenVector(*residual_);
diff --git a/dune/istl/paamg/hierarchy.hh b/dune/istl/paamg/hierarchy.hh
index ff59ff0e53b82d0dfbdc0cf5100d7f4166ca5bd4..18a40d771e9353ee2e8a625e3a011d05566369bb 100644
--- a/dune/istl/paamg/hierarchy.hh
+++ b/dune/istl/paamg/hierarchy.hh
@@ -6,28 +6,11 @@
 #include <list>
 #include <memory>
 #include <limits>
-#include <algorithm>
-#include <tuple>
-#include "aggregates.hh"
-#include "graph.hh"
-#include "galerkin.hh"
-#include "renumberer.hh"
-#include "graphcreator.hh"
 #include <dune/common/stdstreams.hh>
 #include <dune/common/unused.hh>
 #include <dune/common/timer.hh>
 #include <dune/common/bigunsignedint.hh>
-#include <dune/istl/bvector.hh>
-#include <dune/common/parallel/indexset.hh>
-#include <dune/istl/matrixutils.hh>
-#include <dune/istl/matrixredistribute.hh>
-#include <dune/istl/paamg/dependency.hh>
-#include <dune/istl/paamg/graph.hh>
-#include <dune/istl/paamg/indicescoarsener.hh>
-#include <dune/istl/paamg/globalaggregates.hh>
 #include <dune/istl/paamg/construction.hh>
-#include <dune/istl/paamg/smoother.hh>
-#include <dune/istl/paamg/transfer.hh>
 
 namespace Dune
 {
@@ -44,17 +27,6 @@ namespace Dune
      * @brief Provides a classes representing the hierarchies in AMG.
      */
 
-    enum {
-      /**
-       * @brief Hard limit for the number of processes allowed.
-       *
-       * This is needed to prevent overflows when calculating
-       * the coarsening rate. Currently set 72,000 which is
-       * enough for JUGENE.
-       */
-      MAX_PROCESSES = 72000
-    };
-
     /**
      * @brief A hierarchy of containers (e.g. matrices or vectors)
      *
@@ -84,16 +56,16 @@ namespace Dune
         friend class LevelIterator<const Hierarchy<T,A>, const T>;
 
         /** @brief The next coarser element in the list. */
-        Element* coarser_;
+        std::weak_ptr<Element> coarser_;
 
         /** @brief The next finer element in the list. */
-        Element* finer_;
+        std::shared_ptr<Element> finer_;
 
         /** @brief Pointer to the element. */
-        MemberType* element_;
+        std::shared_ptr<MemberType> element_;
 
         /** @brief The redistributed version of the element. */
-        MemberType* redistributed_;
+        std::shared_ptr<MemberType> redistributed_;
       };
     public:
 
@@ -106,27 +78,21 @@ namespace Dune
 
       /**
        * @brief Construct a new hierarchy.
-       * @param first The first element in the hierarchy.
+       * @param first std::shared_ptr to the first element in the hierarchy.
        */
-      Hierarchy(MemberType& first);
+      Hierarchy(const std::shared_ptr<MemberType> & first);
 
       /**
-       * @brief Construct a new hierarchy.
-       * @param first Pointer to the first element in the hierarchy.
-       * @warning Hierarchy will be responsible for the memory
-       * management of the pointer.
+       * @brief Construct an empty hierarchy.
        */
-      Hierarchy(MemberType* first);
-
-      /**
-       * @brief Construct a new empty hierarchy.
-       */
-      Hierarchy();
+      Hierarchy() : levels_(0)
+      {}
 
       /**
-       * @brief Copy constructor.
+       * @brief Copy constructor (deep copy!).
        */
       Hierarchy(const Hierarchy& other);
+
       /**
        * @brief Add an element on a coarser level.
        * @param args The arguments needed for the construction.
@@ -159,10 +125,9 @@ namespace Dune
       public:
         /** @brief Constructor. */
         LevelIterator()
-          : element_(0)
         {}
 
-        LevelIterator(Element* element)
+        LevelIterator(std::shared_ptr<Element> element)
           : element_(element)
         {}
 
@@ -205,7 +170,7 @@ namespace Dune
         /** @brief Move to the next coarser level */
         void increment()
         {
-          element_ = element_->coarser_;
+          element_ = std::shared_ptr<Element>(element_->coarser_);
         }
 
         /** @brief Move to the next fine level */
@@ -220,7 +185,7 @@ namespace Dune
          */
         bool isRedistributed() const
         {
-          return element_->redistributed_;
+          return (bool)element_->redistributed_;
         }
 
         /**
@@ -232,7 +197,7 @@ namespace Dune
           assert(element_->redistributed_);
           return *element_->redistributed_;
         }
-        void addRedistributed(T1* t)
+        void addRedistributed(std::shared_ptr<T1> t)
         {
           element_->redistributed_ = t;
         }
@@ -243,7 +208,7 @@ namespace Dune
         }
 
       private:
-        Element* element_;
+        std::shared_ptr<Element> element_;
       };
 
       /** @brief Type of the mutable iterator. */
@@ -283,1026 +248,71 @@ namespace Dune
        */
       std::size_t levels() const;
 
-      /** @brief Destructor. */
-      ~Hierarchy();
-
     private:
+      /** @brief fix memory management of the finest element in the hierarchy
+
+          This object is passed in from outside, so that we have to
+          deal with shared ownership.
+      */
+      std::shared_ptr<MemberType> originalFinest_;
       /** @brief The finest element in the hierarchy. */
-      Element* finest_;
+      std::shared_ptr<Element> finest_;
       /** @brief The coarsest element in the hierarchy. */
-      Element* coarsest_;
-      /** @brief Whether the first element was not allocated by us. */
-      Element* nonAllocated_;
+      std::shared_ptr<Element> coarsest_;
       /** @brief The allocator for the list elements. */
       Allocator allocator_;
       /** @brief The number of levels in the hierarchy. */
       int levels_;
     };
 
-    /**
-     * @brief The hierarchies build by the coarsening process.
-     *
-     * Namely a hierarchy of matrices, index sets, remote indices,
-     * interfaces and communicators.
-     */
-    template<class M, class PI, class A=std::allocator<M> >
-    class MatrixHierarchy
-    {
-    public:
-      /** @brief The type of the matrix operator. */
-      typedef M MatrixOperator;
-
-      /** @brief The type of the matrix. */
-      typedef typename MatrixOperator::matrix_type Matrix;
-
-      /** @brief The type of the index set. */
-      typedef PI ParallelInformation;
-
-      /** @brief The allocator to use. */
-      typedef A Allocator;
-
-      /** @brief The type of the aggregates map we use. */
-      typedef Dune::Amg::AggregatesMap<typename MatrixGraph<Matrix>::VertexDescriptor> AggregatesMap;
-
-      /** @brief The type of the parallel matrix hierarchy. */
-      typedef Dune::Amg::Hierarchy<MatrixOperator,Allocator> ParallelMatrixHierarchy;
-
-      /** @brief The type of the parallel informarion hierarchy. */
-      typedef Dune::Amg::Hierarchy<ParallelInformation,Allocator> ParallelInformationHierarchy;
-
-      /** @brief Allocator for pointers. */
-      typedef typename Allocator::template rebind<AggregatesMap*>::other AAllocator;
-
-      /** @brief The type of the aggregates maps list. */
-      typedef std::list<AggregatesMap*,AAllocator> AggregatesMapList;
-
-      /** @brief The type of the redistribute information. */
-      typedef RedistributeInformation<ParallelInformation> RedistributeInfoType;
-
-      /** @brief Allocator for RedistributeInfoType. */
-      typedef typename Allocator::template rebind<RedistributeInfoType>::other RILAllocator;
-
-      /** @brief The type of the list of redistribute information. */
-      typedef std::list<RedistributeInfoType,RILAllocator> RedistributeInfoList;
-
-      /**
-       * @brief Constructor
-       * @param fineMatrix The matrix to coarsen.
-       * @param pinfo The information about the parallel data decomposition at the first level.
-       */
-      MatrixHierarchy(const MatrixOperator& fineMatrix,
-                      const ParallelInformation& pinfo=ParallelInformation());
-
-
-      ~MatrixHierarchy();
-
-      /**
-       * @brief Build the matrix hierarchy using aggregation.
-       *
-       * @brief criterion The criterion describing the aggregation process.
-       */
-      template<typename O, typename T>
-      void build(const T& criterion);
-
-      /**
-       * @brief Recalculate the galerkin products.
-       *
-       * If the data of the fine matrix changes but not its sparsity pattern
-       * this will recalculate all coarser levels without starting the expensive
-       * aggregation process all over again.
-       */
-      template<class F>
-      void recalculateGalerkin(const F& copyFlags);
-
-      /**
-       * @brief Coarsen the vector hierarchy according to the matrix hierarchy.
-       * @param hierarchy The vector hierarchy to coarsen.
-       */
-      template<class V, class BA, class TA>
-      void coarsenVector(Hierarchy<BlockVector<V,BA>, TA>& hierarchy) const;
-
-      /**
-       * @brief Coarsen the smoother hierarchy according to the matrix hierarchy.
-       * @param smoothers The smoother hierarchy to coarsen.
-       * @param args The arguments for the construction of the coarse level smoothers.
-       */
-      template<class S, class TA>
-      void coarsenSmoother(Hierarchy<S,TA>& smoothers,
-                           const typename SmootherTraits<S>::Arguments& args) const;
-
-      /**
-       * @brief Get the number of levels in the hierarchy.
-       * @return The number of levels.
-       */
-      std::size_t levels() const;
-
-      /**
-       * @brief Get the max number of levels in the hierarchy of processors.
-       * @return The maximum number of levels.
-       */
-      std::size_t maxlevels() const;
-
-      bool hasCoarsest() const;
-
-      /**
-       * @brief Whether the hierarchy was built.
-       * @return true if the MatrixHierarchy::build method was called.
-       */
-      bool isBuilt() const;
-
-      /**
-       * @brief Get the matrix hierarchy.
-       * @return The matrix hierarchy.
-       */
-      const ParallelMatrixHierarchy& matrices() const;
-
-      /**
-       * @brief Get the hierarchy of the parallel data distribution information.
-       * @return The hierarchy of the parallel data distribution information.
-       */
-      const ParallelInformationHierarchy& parallelInformation() const;
-
-      /**
-       * @brief Get the hierarchy of the mappings of the nodes onto aggregates.
-       * @return The hierarchy of the mappings of the nodes onto aggregates.
-       */
-      const AggregatesMapList& aggregatesMaps() const;
-
-      /**
-       * @brief Get the hierarchy of the information about redistributions,
-       * @return The hierarchy of the information about redistributions of the
-       * data to fewer processes.
-       */
-      const RedistributeInfoList& redistributeInformation() const;
-
-      double getProlongationDampingFactor() const
-      {
-        return prolongDamp_;
-      }
-
-      /**
-       * @brief Get the mapping of fine level unknowns to coarse level
-       * aggregates.
-       *
-       * For each fine level unknown i the correcponding data[i] is the
-       * aggregate it belongs to on the coarsest level.
-       *
-       * @param[out] data The mapping of fine level unknowns to coarse level
-       * aggregates.
-       */
-      void getCoarsestAggregatesOnFinest(std::vector<std::size_t>& data) const;
-
-    private:
-      typedef typename ConstructionTraits<MatrixOperator>::Arguments MatrixArgs;
-      typedef typename ConstructionTraits<ParallelInformation>::Arguments CommunicationArgs;
-      /** @brief The list of aggregates maps. */
-      AggregatesMapList aggregatesMaps_;
-      /** @brief The list of redistributes. */
-      RedistributeInfoList redistributes_;
-      /** @brief The hierarchy of parallel matrices. */
-      ParallelMatrixHierarchy matrices_;
-      /** @brief The hierarchy of the parallel information. */
-      ParallelInformationHierarchy parallelInformation_;
-
-      /** @brief Whether the hierarchy was built. */
-      bool built_;
-
-      /** @brief The maximum number of level across all processors.*/
-      int maxlevels_;
-
-      double prolongDamp_;
-
-      /**
-       * @brief functor to print matrix statistics.
-       */
-      template<class Matrix, bool print>
-      struct MatrixStats
-      {
-
-        /**
-         * @brief Print matrix statistics.
-         */
-        static void stats(const Matrix& matrix)
-        {
-          DUNE_UNUSED_PARAMETER(matrix);
-        }
-      };
-
-      template<class Matrix>
-      struct MatrixStats<Matrix,true>
-      {
-        struct calc
-        {
-          typedef typename Matrix::size_type size_type;
-          typedef typename Matrix::row_type matrix_row;
-
-          calc()
-          {
-            min=std::numeric_limits<size_type>::max();
-            max=0;
-            sum=0;
-          }
-
-          void operator()(const matrix_row& row)
-          {
-            min=std::min(min, row.size());
-            max=std::max(max, row.size());
-            sum += row.size();
-          }
-
-          size_type min;
-          size_type max;
-          size_type sum;
-        };
-        /**
-         * @brief Print matrix statistics.
-         */
-        static void stats(const Matrix& matrix)
-        {
-          calc c= for_each(matrix.begin(), matrix.end(), calc());
-          dinfo<<"Matrix row: min="<<c.min<<" max="<<c.max
-               <<" average="<<static_cast<double>(c.sum)/matrix.N()
-               <<std::endl;
-        }
-      };
-    };
-
-    /**
-     * @brief The criterion describing the stop criteria for the coarsening process.
-     */
-    template<class T>
-    class CoarsenCriterion : public T
-    {
-    public:
-      /**
-       * @brief The criterion for tagging connections as strong and nodes as isolated.
-       * This might be e.g. SymmetricCriterion or UnSymmetricCriterion.
-       */
-      typedef T AggregationCriterion;
-
-      /**
-       * @brief Constructor
-       * @param maxLevel The maximum number of levels allowed in the matrix hierarchy (default: 100).
-       * @param coarsenTarget If the number of nodes in the matrix is below this threshold the
-       * coarsening will stop (default: 1000).
-       * @param minCoarsenRate If the coarsening rate falls below this threshold the
-       * coarsening will stop (default: 1.2)
-       * @param prolongDamp The damping factor to apply to the prolongated update (default: 1.6)
-       * @param accumulate Whether to accumulate the data onto fewer processors on coarser levels.
-       */
-      CoarsenCriterion(int maxLevel=100, int coarsenTarget=1000, double minCoarsenRate=1.2,
-                       double prolongDamp=1.6, AccumulationMode accumulate=successiveAccu)
-        : AggregationCriterion(Dune::Amg::Parameters(maxLevel, coarsenTarget, minCoarsenRate, prolongDamp, accumulate))
-      {}
-
-      CoarsenCriterion(const Dune::Amg::Parameters& parms)
-        : AggregationCriterion(parms)
-      {}
-
-    };
-
-    template<typename M, typename C1>
-    bool repartitionAndDistributeMatrix(const M& origMatrix, M& newMatrix,
-                                        SequentialInformation& origSequentialInformationomm,
-                                        SequentialInformation*& newComm,
-                                        RedistributeInformation<SequentialInformation>& ri,
-                                        int nparts, C1& criterion)
-    {
-      DUNE_UNUSED_PARAMETER(origMatrix);
-      DUNE_UNUSED_PARAMETER(newMatrix);
-      DUNE_UNUSED_PARAMETER(origSequentialInformationomm);
-      DUNE_UNUSED_PARAMETER(newComm);
-      DUNE_UNUSED_PARAMETER(ri);
-      DUNE_UNUSED_PARAMETER(nparts);
-      DUNE_UNUSED_PARAMETER(criterion);
-      DUNE_THROW(NotImplemented, "Redistribution does not make sense in sequential code!");
-    }
-
-
-    template<typename M, typename C, typename C1>
-    bool repartitionAndDistributeMatrix(const M& origMatrix, M& newMatrix, C& origComm, C*& newComm,
-                                        RedistributeInformation<C>& ri,
-                                        int nparts, C1& criterion)
-    {
-      Timer time;
-#ifdef AMG_REPART_ON_COMM_GRAPH
-      // Done not repartition the matrix graph, but a graph of the communication scheme.
-      bool existentOnRedist=Dune::commGraphRepartition(origMatrix, origComm, nparts, newComm,
-                                                       ri.getInterface(),
-                                                       criterion.debugLevel()>1);
-
-#else
-      typedef Dune::Amg::MatrixGraph<const M> MatrixGraph;
-      typedef Dune::Amg::PropertiesGraph<MatrixGraph,
-          VertexProperties,
-          EdgeProperties,
-          IdentityMap,
-          IdentityMap> PropertiesGraph;
-      MatrixGraph graph(origMatrix);
-      PropertiesGraph pgraph(graph);
-      buildDependency(pgraph, origMatrix, criterion, false);
-
-#ifdef DEBUG_REPART
-      if(origComm.communicator().rank()==0)
-        std::cout<<"Original matrix"<<std::endl;
-      origComm.communicator().barrier();
-      printGlobalSparseMatrix(origMatrix, origComm, std::cout);
-#endif
-      bool existentOnRedist=Dune::graphRepartition(pgraph, origComm, nparts,
-                                                   newComm, ri.getInterface(),
-                                                   criterion.debugLevel()>1);
-#endif // if else AMG_REPART
-
-      if(origComm.communicator().rank()==0  && criterion.debugLevel()>1)
-        std::cout<<"Repartitioning took "<<time.elapsed()<<" seconds."<<std::endl;
-
-      ri.setSetup();
-
-#ifdef DEBUG_REPART
-      ri.checkInterface(origComm.indexSet(), newComm->indexSet(), origComm.communicator());
-#endif
-
-      redistributeMatrix(const_cast<M&>(origMatrix), newMatrix, origComm, *newComm, ri);
-
-#ifdef DEBUG_REPART
-      if(origComm.communicator().rank()==0)
-        std::cout<<"Original matrix"<<std::endl;
-      origComm.communicator().barrier();
-      if(newComm->communicator().size()>0)
-        printGlobalSparseMatrix(newMatrix, *newComm, std::cout);
-      origComm.communicator().barrier();
-#endif
-
-      if(origComm.communicator().rank()==0  && criterion.debugLevel()>1)
-        std::cout<<"Redistributing matrix took "<<time.elapsed()<<" seconds."<<std::endl;
-      return existentOnRedist;
-
-    }
-
-    template<typename M>
-    bool repartitionAndDistributeMatrix(M& origMatrix, M& newMatrix,
-                                        SequentialInformation& origComm,
-                                        SequentialInformation& newComm,
-                                        RedistributeInformation<SequentialInformation>& ri)
-    {
-      return true;
-    }
-
-    template<class M, class IS, class A>
-    MatrixHierarchy<M,IS,A>::MatrixHierarchy(const MatrixOperator& fineOperator,
-                                             const ParallelInformation& pinfo)
-      : matrices_(const_cast<MatrixOperator&>(fineOperator)),
-        parallelInformation_(const_cast<ParallelInformation&>(pinfo))
-    {
-      if (SolverCategory::category(fineOperator) != SolverCategory::category(pinfo))
-        DUNE_THROW(ISTLError, "MatrixOperator and ParallelInformation must belong to the same category!");
-    }
-
-    template<class M, class IS, class A>
-    template<typename O, typename T>
-    void MatrixHierarchy<M,IS,A>::build(const T& criterion)
-    {
-      prolongDamp_ = criterion.getProlongationDampingFactor();
-      typedef O OverlapFlags;
-      typedef typename ParallelMatrixHierarchy::Iterator MatIterator;
-      typedef typename ParallelInformationHierarchy::Iterator PInfoIterator;
-
-      static const int noints=(Dune::Amg::MAX_PROCESSES/4096>0) ? (Dune::Amg::MAX_PROCESSES/4096) : 1;
-
-      typedef bigunsignedint<sizeof(int)*8*noints> BIGINT;
-      GalerkinProduct<ParallelInformation> productBuilder;
-      MatIterator mlevel = matrices_.finest();
-      MatrixStats<typename M::matrix_type,MINIMAL_DEBUG_LEVEL<=INFO_DEBUG_LEVEL>::stats(mlevel->getmat());
-
-      PInfoIterator infoLevel = parallelInformation_.finest();
-      BIGINT finenonzeros=countNonZeros(mlevel->getmat());
-      finenonzeros = infoLevel->communicator().sum(finenonzeros);
-      BIGINT allnonzeros = finenonzeros;
-
-
-      int level = 0;
-      int rank = 0;
-
-      BIGINT unknowns = mlevel->getmat().N();
-
-      unknowns = infoLevel->communicator().sum(unknowns);
-      double dunknowns=unknowns.todouble();
-      infoLevel->buildGlobalLookup(mlevel->getmat().N());
-      redistributes_.push_back(RedistributeInfoType());
-
-      for(; level < criterion.maxLevel(); ++level, ++mlevel) {
-        assert(matrices_.levels()==redistributes_.size());
-        rank = infoLevel->communicator().rank();
-        if(rank==0 && criterion.debugLevel()>1)
-          std::cout<<"Level "<<level<<" has "<<dunknowns<<" unknowns, "<<dunknowns/infoLevel->communicator().size()
-                   <<" unknowns per proc (procs="<<infoLevel->communicator().size()<<")"<<std::endl;
-
-        MatrixOperator* matrix=&(*mlevel);
-        ParallelInformation* info =&(*infoLevel);
-
-        if((
-#if HAVE_PARMETIS
-             criterion.accumulate()==successiveAccu
-#else
-             false
-#endif
-             || (criterion.accumulate()==atOnceAccu
-                 && dunknowns < 30*infoLevel->communicator().size()))
-           && infoLevel->communicator().size()>1 &&
-           dunknowns/infoLevel->communicator().size() <= criterion.coarsenTarget())
-        {
-          // accumulate to fewer processors
-          Matrix* redistMat= new Matrix();
-          ParallelInformation* redistComm=0;
-          std::size_t nodomains = (std::size_t)std::ceil(dunknowns/(criterion.minAggregateSize()
-                                                                    *criterion.coarsenTarget()));
-          if( nodomains<=criterion.minAggregateSize()/2 ||
-              dunknowns <= criterion.coarsenTarget() )
-            nodomains=1;
-
-          bool existentOnNextLevel =
-            repartitionAndDistributeMatrix(mlevel->getmat(), *redistMat, *infoLevel,
-                                           redistComm, redistributes_.back(), nodomains,
-                                           criterion);
-          BIGINT unknownsRedist = redistMat->N();
-          unknownsRedist = infoLevel->communicator().sum(unknownsRedist);
-          dunknowns= unknownsRedist.todouble();
-          if(redistComm->communicator().rank()==0 && criterion.debugLevel()>1)
-            std::cout<<"Level "<<level<<" (redistributed) has "<<dunknowns<<" unknowns, "<<dunknowns/redistComm->communicator().size()
-                     <<" unknowns per proc (procs="<<redistComm->communicator().size()<<")"<<std::endl;
-          MatrixArgs args(*redistMat, *redistComm);
-          mlevel.addRedistributed(ConstructionTraits<MatrixOperator>::construct(args));
-          assert(mlevel.isRedistributed());
-          infoLevel.addRedistributed(redistComm);
-          infoLevel->freeGlobalLookup();
-
-          if(!existentOnNextLevel)
-            // We do not hold any data on the redistributed partitioning
-            break;
-
-          // Work on the redistributed Matrix from now on
-          matrix = &(mlevel.getRedistributed());
-          info = &(infoLevel.getRedistributed());
-          info->buildGlobalLookup(matrix->getmat().N());
-        }
-
-        rank = info->communicator().rank();
-        if(dunknowns <= criterion.coarsenTarget())
-          // No further coarsening needed
-          break;
-
-        typedef PropertiesGraphCreator<MatrixOperator,ParallelInformation> GraphCreator;
-        typedef typename GraphCreator::PropertiesGraph PropertiesGraph;
-        typedef typename GraphCreator::GraphTuple GraphTuple;
-
-        typedef typename PropertiesGraph::VertexDescriptor Vertex;
-
-        std::vector<bool> excluded(matrix->getmat().N(), false);
-
-        GraphTuple graphs = GraphCreator::create(*matrix, excluded, *info, OverlapFlags());
-
-        AggregatesMap* aggregatesMap=new AggregatesMap(std::get<1>(graphs)->maxVertex()+1);
-
-        aggregatesMaps_.push_back(aggregatesMap);
-
-        Timer watch;
-        watch.reset();
-        int noAggregates, isoAggregates, oneAggregates, skippedAggregates;
-
-        std::tie(noAggregates, isoAggregates, oneAggregates, skippedAggregates) =
-          aggregatesMap->buildAggregates(matrix->getmat(), *(std::get<1>(graphs)), criterion, level==0);
-
-        if(rank==0 && criterion.debugLevel()>2)
-          std::cout<<" Have built "<<noAggregates<<" aggregates totally ("<<isoAggregates<<" isolated aggregates, "<<
-          oneAggregates<<" aggregates of one vertex,  and skipped "<<
-          skippedAggregates<<" aggregates)."<<std::endl;
-#ifdef TEST_AGGLO
-        {
-          // calculate size of local matrix in the distributed direction
-          int start, end, overlapStart, overlapEnd;
-          int procs=info->communicator().rank();
-          int n = UNKNOWNS/procs; // number of unknowns per process
-          int bigger = UNKNOWNS%procs; // number of process with n+1 unknows
-
-          // Compute owner region
-          if(rank<bigger) {
-            start = rank*(n+1);
-            end   = (rank+1)*(n+1);
-          }else{
-            start = bigger + rank * n;
-            end   = bigger + (rank + 1) * n;
-          }
-
-          // Compute overlap region
-          if(start>0)
-            overlapStart = start - 1;
-          else
-            overlapStart = start;
-
-          if(end<UNKNOWNS)
-            overlapEnd = end + 1;
-          else
-            overlapEnd = end;
-
-          assert((UNKNOWNS)*(overlapEnd-overlapStart)==aggregatesMap->noVertices());
-          for(int j=0; j< UNKNOWNS; ++j)
-            for(int i=0; i < UNKNOWNS; ++i)
-            {
-              if(i>=overlapStart && i<overlapEnd)
-              {
-                int no = (j/2)*((UNKNOWNS)/2)+i/2;
-                (*aggregatesMap)[j*(overlapEnd-overlapStart)+i-overlapStart]=no;
-              }
-            }
-        }
-#endif
-        if(criterion.debugLevel()>1 && info->communicator().rank()==0)
-          std::cout<<"aggregating finished."<<std::endl;
-
-        BIGINT gnoAggregates=noAggregates;
-        gnoAggregates = info->communicator().sum(gnoAggregates);
-        double dgnoAggregates = gnoAggregates.todouble();
-#ifdef TEST_AGGLO
-        BIGINT gnoAggregates=((UNKNOWNS)/2)*((UNKNOWNS)/2);
-#endif
-
-        if(criterion.debugLevel()>2 && rank==0)
-          std::cout << "Building "<<dgnoAggregates<<" aggregates took "<<watch.elapsed()<<" seconds."<<std::endl;
-
-        if(dgnoAggregates==0 || dunknowns/dgnoAggregates<criterion.minCoarsenRate())
-        {
-          if(rank==0)
-          {
-            if(dgnoAggregates>0)
-              std::cerr << "Stopped coarsening because of rate breakdown "<<dunknowns<<"/"<<dgnoAggregates
-                        <<"="<<dunknowns/dgnoAggregates<<"<"
-                        <<criterion.minCoarsenRate()<<std::endl;
-            else
-              std::cerr<< "Could not build any aggregates. Probably no connected nodes."<<std::endl;
-          }
-          aggregatesMap->free();
-          delete aggregatesMap;
-          aggregatesMaps_.pop_back();
-
-          if(criterion.accumulate() && mlevel.isRedistributed() && info->communicator().size()>1) {
-            // coarse level matrix was already redistributed, but to more than 1 process
-            // Therefore need to delete the redistribution. Further down it will
-            // then be redistributed to 1 process
-            delete &(mlevel.getRedistributed().getmat());
-            mlevel.deleteRedistributed();
-            delete &(infoLevel.getRedistributed());
-            infoLevel.deleteRedistributed();
-            redistributes_.back().resetSetup();
-          }
-
-          break;
-        }
-        unknowns =  noAggregates;
-        dunknowns = dgnoAggregates;
-
-        CommunicationArgs commargs(info->communicator(),info->category());
-        parallelInformation_.addCoarser(commargs);
-
-        ++infoLevel; // parallel information on coarse level
-
-        typename PropertyMapTypeSelector<VertexVisitedTag,PropertiesGraph>::Type visitedMap =
-          get(VertexVisitedTag(), *(std::get<1>(graphs)));
-
-        watch.reset();
-        int aggregates = IndicesCoarsener<ParallelInformation,OverlapFlags>
-                         ::coarsen(*info,
-                                   *(std::get<1>(graphs)),
-                                   visitedMap,
-                                   *aggregatesMap,
-                                   *infoLevel,
-                                   noAggregates);
-        GraphCreator::free(graphs);
-
-        if(criterion.debugLevel()>2) {
-          if(rank==0)
-            std::cout<<"Coarsening of index sets took "<<watch.elapsed()<<" seconds."<<std::endl;
-        }
-
-        watch.reset();
-
-        infoLevel->buildGlobalLookup(aggregates);
-        AggregatesPublisher<Vertex,OverlapFlags,ParallelInformation>::publish(*aggregatesMap,
-                                                                              *info,
-                                                                              infoLevel->globalLookup());
-
-
-        if(criterion.debugLevel()>2) {
-          if(rank==0)
-            std::cout<<"Communicating global aggregate numbers took "<<watch.elapsed()<<" seconds."<<std::endl;
-        }
-
-        watch.reset();
-        std::vector<bool>& visited=excluded;
-
-        typedef std::vector<bool>::iterator Iterator;
-        typedef IteratorPropertyMap<Iterator, IdentityMap> VisitedMap2;
-        Iterator end = visited.end();
-        for(Iterator iter= visited.begin(); iter != end; ++iter)
-          *iter=false;
-
-        VisitedMap2 visitedMap2(visited.begin(), Dune::IdentityMap());
-
-        typename MatrixOperator::matrix_type* coarseMatrix;
-
-        coarseMatrix = productBuilder.build(*(std::get<0>(graphs)), visitedMap2,
-                                            *info,
-                                            *aggregatesMap,
-                                            aggregates,
-                                            OverlapFlags());
-        dverb<<"Building of sparsity pattern took "<<watch.elapsed()<<std::endl;
-        watch.reset();
-        info->freeGlobalLookup();
-
-        delete std::get<0>(graphs);
-        productBuilder.calculate(matrix->getmat(), *aggregatesMap, *coarseMatrix, *infoLevel, OverlapFlags());
-
-        if(criterion.debugLevel()>2) {
-          if(rank==0)
-            std::cout<<"Calculation entries of Galerkin product took "<<watch.elapsed()<<" seconds."<<std::endl;
-        }
-
-        BIGINT nonzeros = countNonZeros(*coarseMatrix);
-        allnonzeros = allnonzeros + infoLevel->communicator().sum(nonzeros);
-        MatrixArgs args(*coarseMatrix, *infoLevel);
-
-        matrices_.addCoarser(args);
-        redistributes_.push_back(RedistributeInfoType());
-      } // end level loop
-
-
-      infoLevel->freeGlobalLookup();
-
-      built_=true;
-      AggregatesMap* aggregatesMap=new AggregatesMap(0);
-      aggregatesMaps_.push_back(aggregatesMap);
-
-      if(criterion.debugLevel()>0) {
-        if(level==criterion.maxLevel()) {
-          BIGINT unknownsLevel = mlevel->getmat().N();
-          unknownsLevel = infoLevel->communicator().sum(unknownsLevel);
-          double dunknownsLevel = unknownsLevel.todouble();
-          if(rank==0 && criterion.debugLevel()>1) {
-            std::cout<<"Level "<<level<<" has "<<dunknownsLevel<<" unknowns, "<<dunknownsLevel/infoLevel->communicator().size()
-                     <<" unknowns per proc (procs="<<infoLevel->communicator().size()<<")"<<std::endl;
-          }
-        }
-      }
-
-      if(criterion.accumulate() && !redistributes_.back().isSetup() &&
-         infoLevel->communicator().size()>1) {
-#if HAVE_MPI && !HAVE_PARMETIS
-        if(criterion.accumulate()==successiveAccu &&
-           infoLevel->communicator().rank()==0)
-          std::cerr<<"Successive accumulation of data on coarse levels only works with ParMETIS installed."
-                   <<"  Fell back to accumulation to one domain on coarsest level"<<std::endl;
-#endif
-
-        // accumulate to fewer processors
-        Matrix* redistMat= new Matrix();
-        ParallelInformation* redistComm=0;
-        int nodomains = 1;
-
-        repartitionAndDistributeMatrix(mlevel->getmat(), *redistMat, *infoLevel,
-                                       redistComm, redistributes_.back(), nodomains,criterion);
-        MatrixArgs args(*redistMat, *redistComm);
-        BIGINT unknownsRedist = redistMat->N();
-        unknownsRedist = infoLevel->communicator().sum(unknownsRedist);
-
-        if(redistComm->communicator().rank()==0 && criterion.debugLevel()>1) {
-          double dunknownsRedist = unknownsRedist.todouble();
-          std::cout<<"Level "<<level<<" redistributed has "<<dunknownsRedist<<" unknowns, "<<dunknownsRedist/redistComm->communicator().size()
-                   <<" unknowns per proc (procs="<<redistComm->communicator().size()<<")"<<std::endl;
-        }
-        mlevel.addRedistributed(ConstructionTraits<MatrixOperator>::construct(args));
-        infoLevel.addRedistributed(redistComm);
-        infoLevel->freeGlobalLookup();
-      }
-
-      int levels = matrices_.levels();
-      maxlevels_ = parallelInformation_.finest()->communicator().max(levels);
-      assert(matrices_.levels()==redistributes_.size());
-      if(hasCoarsest() && rank==0 && criterion.debugLevel()>1)
-        std::cout<<"operator complexity: "<<allnonzeros.todouble()/finenonzeros.todouble()<<std::endl;
-
-    }
-
-    template<class M, class IS, class A>
-    const typename MatrixHierarchy<M,IS,A>::ParallelMatrixHierarchy&
-    MatrixHierarchy<M,IS,A>::matrices() const
-    {
-      return matrices_;
-    }
-
-    template<class M, class IS, class A>
-    const typename MatrixHierarchy<M,IS,A>::ParallelInformationHierarchy&
-    MatrixHierarchy<M,IS,A>::parallelInformation() const
-    {
-      return parallelInformation_;
-    }
-
-    template<class M, class IS, class A>
-    void MatrixHierarchy<M,IS,A>::getCoarsestAggregatesOnFinest(std::vector<std::size_t>& data) const
-    {
-      int levels=aggregatesMaps().size();
-      int maxlevels=parallelInformation_.finest()->communicator().max(levels);
-      std::size_t size=(*(aggregatesMaps().begin()))->noVertices();
-      // We need an auxiliary vector for the consecutive prolongation.
-      std::vector<std::size_t> tmp;
-      std::vector<std::size_t> *coarse, *fine;
-
-      // make sure the allocated space suffices.
-      tmp.reserve(size);
-      data.reserve(size);
-
-      // Correctly assign coarse and fine for the first prolongation such that
-      // we end up in data in the end.
-      if(levels%2==0) {
-        coarse=&tmp;
-        fine=&data;
-      }else{
-        coarse=&data;
-        fine=&tmp;
-      }
-
-      // Number the unknowns on the coarsest level consecutively for each process.
-      if(levels==maxlevels) {
-        const AggregatesMap& map = *(*(++aggregatesMaps().rbegin()));
-        std::size_t m=0;
-
-        for(typename AggregatesMap::const_iterator iter = map.begin(); iter != map.end(); ++iter)
-          if(*iter< AggregatesMap::ISOLATED)
-            m=std::max(*iter,m);
-
-        coarse->resize(m+1);
-        std::size_t i=0;
-        srand((unsigned)std::clock());
-        std::set<size_t> used;
-        for(typename std::vector<std::size_t>::iterator iter=coarse->begin(); iter != coarse->end();
-            ++iter, ++i)
-        {
-          std::pair<std::set<std::size_t>::iterator,bool> ibpair
-            = used.insert(static_cast<std::size_t>((((double)rand())/(RAND_MAX+1.0)))*coarse->size());
-
-          while(!ibpair.second)
-            ibpair = used.insert(static_cast<std::size_t>((((double)rand())/(RAND_MAX+1.0))*coarse->size()));
-          *iter=*(ibpair.first);
-        }
-      }
-
-      typename ParallelInformationHierarchy::Iterator pinfo = parallelInformation().coarsest();
-      --pinfo;
-
-      // Now consecutively project the numbers to the finest level.
-      for(typename AggregatesMapList::const_reverse_iterator aggregates=++aggregatesMaps().rbegin();
-          aggregates != aggregatesMaps().rend(); ++aggregates,--levels) {
-
-        fine->resize((*aggregates)->noVertices());
-        fine->assign(fine->size(), 0);
-        Transfer<typename AggregatesMap::AggregateDescriptor, std::vector<std::size_t>, ParallelInformation>
-        ::prolongateVector(*(*aggregates), *coarse, *fine, static_cast<std::size_t>(1), *pinfo);
-        --pinfo;
-        std::swap(coarse, fine);
-      }
-
-      // Assertion to check that we really projected to data on the last step.
-      assert(coarse==&data);
-    }
-
-    template<class M, class IS, class A>
-    const typename MatrixHierarchy<M,IS,A>::AggregatesMapList&
-    MatrixHierarchy<M,IS,A>::aggregatesMaps() const
-    {
-      return aggregatesMaps_;
-    }
-    template<class M, class IS, class A>
-    const typename MatrixHierarchy<M,IS,A>::RedistributeInfoList&
-    MatrixHierarchy<M,IS,A>::redistributeInformation() const
-    {
-      return redistributes_;
-    }
-
-    template<class M, class IS, class A>
-    MatrixHierarchy<M,IS,A>::~MatrixHierarchy()
-    {
-      typedef typename AggregatesMapList::reverse_iterator AggregatesMapIterator;
-      typedef typename ParallelMatrixHierarchy::Iterator Iterator;
-      typedef typename ParallelInformationHierarchy::Iterator InfoIterator;
-
-      AggregatesMapIterator amap = aggregatesMaps_.rbegin();
-      InfoIterator info = parallelInformation_.coarsest();
-      for(Iterator level=matrices_.coarsest(), finest=matrices_.finest(); level != finest;  --level, --info, ++amap) {
-        (*amap)->free();
-        delete *amap;
-        delete &level->getmat();
-        if(level.isRedistributed())
-          delete &(level.getRedistributed().getmat());
-      }
-      delete *amap;
-    }
-
-    template<class M, class IS, class A>
-    template<class V, class BA, class TA>
-    void MatrixHierarchy<M,IS,A>::coarsenVector(Hierarchy<BlockVector<V,BA>, TA>& hierarchy) const
-    {
-      assert(hierarchy.levels()==1);
-      typedef typename ParallelMatrixHierarchy::ConstIterator Iterator;
-      typedef typename RedistributeInfoList::const_iterator RIter;
-      RIter redist = redistributes_.begin();
-
-      Iterator matrix = matrices_.finest(), coarsest = matrices_.coarsest();
-      int level=0;
-      if(redist->isSetup())
-        hierarchy.addRedistributedOnCoarsest(matrix.getRedistributed().getmat().N());
-      Dune::dvverb<<"Level "<<level<<" has "<<matrices_.finest()->getmat().N()<<" unknowns!"<<std::endl;
-
-      while(matrix != coarsest) {
-        ++matrix; ++level; ++redist;
-        Dune::dvverb<<"Level "<<level<<" has "<<matrix->getmat().N()<<" unknowns!"<<std::endl;
-
-        hierarchy.addCoarser(matrix->getmat().N());
-        if(redist->isSetup())
-          hierarchy.addRedistributedOnCoarsest(matrix.getRedistributed().getmat().N());
-
-      }
-
-    }
-
-    template<class M, class IS, class A>
-    template<class S, class TA>
-    void MatrixHierarchy<M,IS,A>::coarsenSmoother(Hierarchy<S,TA>& smoothers,
-                                                  const typename SmootherTraits<S>::Arguments& sargs) const
-    {
-      assert(smoothers.levels()==0);
-      typedef typename ParallelMatrixHierarchy::ConstIterator MatrixIterator;
-      typedef typename ParallelInformationHierarchy::ConstIterator PinfoIterator;
-      typedef typename AggregatesMapList::const_iterator AggregatesIterator;
-
-      typename ConstructionTraits<S>::Arguments cargs;
-      cargs.setArgs(sargs);
-      PinfoIterator pinfo = parallelInformation_.finest();
-      AggregatesIterator aggregates = aggregatesMaps_.begin();
-      int level=0;
-      for(MatrixIterator matrix = matrices_.finest(), coarsest = matrices_.coarsest();
-          matrix != coarsest; ++matrix, ++pinfo, ++aggregates, ++level) {
-        cargs.setMatrix(matrix->getmat(), **aggregates);
-        cargs.setComm(*pinfo);
-        smoothers.addCoarser(cargs);
-      }
-      if(maxlevels()>levels()) {
-        // This is not the globally coarsest level and therefore smoothing is needed
-        cargs.setMatrix(matrices_.coarsest()->getmat(), **aggregates);
-        cargs.setComm(*pinfo);
-        smoothers.addCoarser(cargs);
-        ++level;
-      }
-    }
-
-    template<class M, class IS, class A>
-    template<class F>
-    void MatrixHierarchy<M,IS,A>::recalculateGalerkin(const F& copyFlags)
-    {
-      typedef typename AggregatesMapList::iterator AggregatesMapIterator;
-      typedef typename ParallelMatrixHierarchy::Iterator Iterator;
-      typedef typename ParallelInformationHierarchy::Iterator InfoIterator;
-
-      AggregatesMapIterator amap = aggregatesMaps_.begin();
-      BaseGalerkinProduct productBuilder;
-      InfoIterator info = parallelInformation_.finest();
-      typename RedistributeInfoList::iterator riIter = redistributes_.begin();
-      Iterator level = matrices_.finest(), coarsest=matrices_.coarsest();
-      if(level.isRedistributed()) {
-        info->buildGlobalLookup(level->getmat().N());
-        redistributeMatrixEntries(const_cast<Matrix&>(level->getmat()),
-                                  const_cast<Matrix&>(level.getRedistributed().getmat()),
-                                  *info,info.getRedistributed(), *riIter);
-        info->freeGlobalLookup();
-      }
-
-      for(; level!=coarsest; ++amap) {
-        const Matrix& fine = (level.isRedistributed() ? level.getRedistributed() : *level).getmat();
-        ++level;
-        ++info;
-        ++riIter;
-        productBuilder.calculate(fine, *(*amap), const_cast<Matrix&>(level->getmat()), *info, copyFlags);
-        if(level.isRedistributed()) {
-          info->buildGlobalLookup(level->getmat().N());
-          redistributeMatrixEntries(const_cast<Matrix&>(level->getmat()),
-                                    const_cast<Matrix&>(level.getRedistributed().getmat()), *info,
-                                    info.getRedistributed(), *riIter);
-          info->freeGlobalLookup();
-        }
-      }
-    }
-
-    template<class M, class IS, class A>
-    std::size_t MatrixHierarchy<M,IS,A>::levels() const
-    {
-      return matrices_.levels();
-    }
-
-    template<class M, class IS, class A>
-    std::size_t MatrixHierarchy<M,IS,A>::maxlevels() const
-    {
-      return maxlevels_;
-    }
-
-    template<class M, class IS, class A>
-    bool MatrixHierarchy<M,IS,A>::hasCoarsest() const
-    {
-      return levels()==maxlevels() &&
-             (!matrices_.coarsest().isRedistributed() ||matrices_.coarsest()->getmat().N()>0);
-    }
-
-    template<class M, class IS, class A>
-    bool MatrixHierarchy<M,IS,A>::isBuilt() const
-    {
-      return built_;
-    }
-
-    template<class T, class A>
-    Hierarchy<T,A>::Hierarchy()
-      : finest_(0), coarsest_(0), nonAllocated_(0), allocator_(), levels_(0)
-    {}
-
     template<class T, class A>
-    Hierarchy<T,A>::Hierarchy(MemberType& first)
-      : allocator_()
+    Hierarchy<T,A>::Hierarchy(const std::shared_ptr<MemberType> & first)
+      : originalFinest_(first)
     {
-      finest_ = allocator_.allocate(1,0);
-      finest_->element_ = &first;
-      finest_->redistributed_ = nullptr;
-      nonAllocated_ = finest_;
+      finest_ = std::allocate_shared<Element>(allocator_);
+      finest_->element_ = originalFinest_;
       coarsest_ = finest_;
-      coarsest_->coarser_ = coarsest_->finer_ = nullptr;
       levels_ = 1;
     }
 
-    template<class T, class A>
-    Hierarchy<T,A>::Hierarchy(MemberType* first)
-      : allocator_()
-    {
-      finest_ = allocator_.allocate(1,0);
-      finest_->element_ = first;
-      finest_->redistributed_ = nullptr;
-      nonAllocated_ = nullptr;
-      coarsest_ = finest_;
-      coarsest_->coarser_ = coarsest_->finer_ = nullptr;
-      levels_ = 1;
-    }
-    template<class T, class A>
-    Hierarchy<T,A>::~Hierarchy()
-    {
-      while(coarsest_) {
-        Element* current = coarsest_;
-        coarsest_ = coarsest_->finer_;
-        if(current != nonAllocated_) {
-          if(current->redistributed_)
-            ConstructionTraits<T>::deconstruct(current->redistributed_);
-          ConstructionTraits<T>::deconstruct(current->element_);
-        }
-        allocator_.deallocate(current, 1);
-        current=nullptr;
-        //coarsest_->coarser_ = nullptr;
-      }
-    }
-
+    //! \brief deep copy of a given hierarchy
+    #warning do we catually want to support this? This might be very expensive?!
     template<class T, class A>
     Hierarchy<T,A>::Hierarchy(const Hierarchy& other)
-    : nonAllocated_(), allocator_(other.allocator_),
+    : allocator_(other.allocator_),
       levels_(other.levels_)
     {
       if(!other.finest_)
       {
-        finest_=coarsest_=nonAllocated_=nullptr;
+        finest_=coarsest_=nullptr;
         return;
       }
-      finest_=allocator_.allocate(1,0);
-      Element* finer_         = nullptr;
-      Element* current_      = finest_;
-      Element* otherCurrent_ = other.finest_;
+      finest_ = std::allocate_shared<Element>(allocator_);
+      std::shared_ptr<Element> finer_;
+      std::shared_ptr<Element> current_ = finest_;
+      std::weak_ptr<Element> otherWeak_ = other.finest_;
 
-      while(otherCurrent_)
+      while(! otherWeak_.expired())
       {
-        T* t=new T(*(otherCurrent_->element_));
-        current_->element_=t;
+        // create shared_ptr from weak_ptr, we just checked that this is safe
+        std::shared_ptr<Element> otherCurrent_ = std::shared_ptr<Element>(otherWeak_);
+        // clone current level
+        #warning should we use the allocator?
+        current_->element_ =
+          std::make_shared<MemberType>(*(otherCurrent_->element_));
         current_->finer_=finer_;
         if(otherCurrent_->redistributed_)
-          current_->redistributed_ = new T(*otherCurrent_->redistributed_);
-        else
-          current_->redistributed_= nullptr;
+          current_->redistributed_ =
+            std::make_shared<MemberType>(*(otherCurrent_->redistributed_));
         finer_=current_;
-        if(otherCurrent_->coarser_)
+        if(not otherCurrent_->coarser_.expired())
         {
-          current_->coarser_=allocator_.allocate(1,0);
-          current_=current_->coarser_;
-        }else
-          current_->coarser_=nullptr;
-        otherCurrent_=otherCurrent_->coarser_;
+          auto c = std::allocate_shared<Element>(allocator_);
+          current_->coarser_ = c;
+          current_ = c;
+        }
+        // go to coarser level
+        otherWeak_ = otherCurrent_->coarser_;
       }
       coarsest_=current_;
     }
@@ -1323,19 +333,20 @@ namespace Dune
     void Hierarchy<T,A>::addCoarser(Arguments& args)
     {
       if(!coarsest_) {
+        // we have no levels at all...
         assert(!finest_);
-        coarsest_ = allocator_.allocate(1,0);
-        coarsest_->element_ = ConstructionTraits<MemberType>::construct(args);
+        // allocate into the shared_ptr
+        originalFinest_ = ConstructionTraits<MemberType>::construct(args);
+        coarsest_ = std::allocate_shared<Element>(allocator_);
+        coarsest_->element_ = originalFinest_;
         finest_ = coarsest_;
-        coarsest_->finer_ = nullptr;
       }else{
-        coarsest_->coarser_ = allocator_.allocate(1,0);
-        coarsest_->coarser_->finer_ = coarsest_;
-        coarsest_ = coarsest_->coarser_;
+        auto old_coarsest = coarsest_;
+        coarsest_ = std::allocate_shared<Element>(allocator_);
+        coarsest_->finer_ = old_coarsest;
         coarsest_->element_ = ConstructionTraits<MemberType>::construct(args);
+        old_coarsest->coarser_ = coarsest_;
       }
-      coarsest_->redistributed_ = nullptr;
-      coarsest_->coarser_=nullptr;
       ++levels_;
     }
 
@@ -1343,17 +354,19 @@ namespace Dune
     template<class T, class A>
     void Hierarchy<T,A>::addFiner(Arguments& args)
     {
+#warning wouldn't it be better to do this in the constructor?'
       if(!finest_) {
+        // we have no levels at all...
         assert(!coarsest_);
-        finest_ = allocator_.allocate(1,0);
-        finest_->element = ConstructionTraits<T>::construct(args);
+        // allocate into the shared_ptr
+        originalFinest_ = ConstructionTraits<MemberType>::construct(args);
+        finest_ = std::allocate_shared<Element>(allocator_);
+        finest_->element = originalFinest_;
         coarsest_ = finest_;
-        coarsest_->coarser_ = coarsest_->finer_ = nullptr;
       }else{
-        finest_->finer_ = allocator_.allocate(1,0);
+        finest_->finer_ = std::allocate_shared<Element>(allocator_);
         finest_->finer_->coarser_ = finest_;
         finest_ = finest_->finer_;
-        finest_->finer = nullptr;
         finest_->element = ConstructionTraits<T>::construct(args);
       }
       ++levels_;
diff --git a/dune/istl/paamg/matrixhierarchy.hh b/dune/istl/paamg/matrixhierarchy.hh
new file mode 100644
index 0000000000000000000000000000000000000000..9f5f355e9373689f26b8aedc46c9093c30b5b7f1
--- /dev/null
+++ b/dune/istl/paamg/matrixhierarchy.hh
@@ -0,0 +1,974 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_AMG_MATRIXHIERARCHY_HH
+#define DUNE_AMG_MATRIXHIERARCHY_HH
+
+#include <algorithm>
+#include <tuple>
+#include "aggregates.hh"
+#include "graph.hh"
+#include "galerkin.hh"
+#include "renumberer.hh"
+#include "graphcreator.hh"
+#include "hierarchy.hh"
+#include <dune/istl/bvector.hh>
+#include <dune/common/parallel/indexset.hh>
+#include <dune/istl/matrixutils.hh>
+#include <dune/istl/matrixredistribute.hh>
+#include <dune/istl/paamg/dependency.hh>
+#include <dune/istl/paamg/graph.hh>
+#include <dune/istl/paamg/indicescoarsener.hh>
+#include <dune/istl/paamg/globalaggregates.hh>
+#include <dune/istl/paamg/construction.hh>
+#include <dune/istl/paamg/smoother.hh>
+#include <dune/istl/paamg/transfer.hh>
+
+namespace Dune
+{
+  namespace Amg
+  {
+    /**
+     * @addtogroup ISTL_PAAMG
+     *
+     * @{
+     */
+
+    /** @file
+     * @author Markus Blatt
+     * @brief Provides a classes representing the hierarchies in AMG.
+     */
+    enum {
+      /**
+       * @brief Hard limit for the number of processes allowed.
+       *
+       * This is needed to prevent overflows when calculating
+       * the coarsening rate. Currently set 72,000 which is
+       * enough for JUGENE.
+       */
+      MAX_PROCESSES = 72000
+    };
+
+    /**
+     * @brief The hierarchies build by the coarsening process.
+     *
+     * Namely a hierarchy of matrices, index sets, remote indices,
+     * interfaces and communicators.
+     */
+    template<class M, class PI, class A=std::allocator<M> >
+    class MatrixHierarchy
+    {
+    public:
+      /** @brief The type of the matrix operator. */
+      typedef M MatrixOperator;
+
+      /** @brief The type of the matrix. */
+      typedef typename MatrixOperator::matrix_type Matrix;
+
+      /** @brief The type of the index set. */
+      typedef PI ParallelInformation;
+
+      /** @brief The allocator to use. */
+      typedef A Allocator;
+
+      /** @brief The type of the aggregates map we use. */
+      typedef Dune::Amg::AggregatesMap<typename MatrixGraph<Matrix>::VertexDescriptor> AggregatesMap;
+
+      /** @brief The type of the parallel matrix hierarchy. */
+      typedef Dune::Amg::Hierarchy<MatrixOperator,Allocator> ParallelMatrixHierarchy;
+
+      /** @brief The type of the parallel informarion hierarchy. */
+      typedef Dune::Amg::Hierarchy<ParallelInformation,Allocator> ParallelInformationHierarchy;
+
+      /** @brief Allocator for pointers. */
+      typedef typename Allocator::template rebind<AggregatesMap*>::other AAllocator;
+
+      /** @brief The type of the aggregates maps list. */
+      typedef std::list<AggregatesMap*,AAllocator> AggregatesMapList;
+
+      /** @brief The type of the redistribute information. */
+      typedef RedistributeInformation<ParallelInformation> RedistributeInfoType;
+
+      /** @brief Allocator for RedistributeInfoType. */
+      typedef typename Allocator::template rebind<RedistributeInfoType>::other RILAllocator;
+
+      /** @brief The type of the list of redistribute information. */
+      typedef std::list<RedistributeInfoType,RILAllocator> RedistributeInfoList;
+
+      /**
+       * @brief Constructor
+       * @param fineMatrix The matrix to coarsen.
+       * @param pinfo The information about the parallel data decomposition at the first level.
+       */
+      MatrixHierarchy(std::shared_ptr<MatrixOperator> fineMatrix,
+        std::shared_ptr<ParallelInformation> pinfo = std::make_shared<ParallelInformation>());
+
+      ~MatrixHierarchy();
+
+      /**
+       * @brief Build the matrix hierarchy using aggregation.
+       *
+       * @brief criterion The criterion describing the aggregation process.
+       */
+      template<typename O, typename T>
+      void build(const T& criterion);
+
+      /**
+       * @brief Recalculate the galerkin products.
+       *
+       * If the data of the fine matrix changes but not its sparsity pattern
+       * this will recalculate all coarser levels without starting the expensive
+       * aggregation process all over again.
+       */
+      template<class F>
+      void recalculateGalerkin(const F& copyFlags);
+
+      /**
+       * @brief Coarsen the vector hierarchy according to the matrix hierarchy.
+       * @param hierarchy The vector hierarchy to coarsen.
+       */
+      template<class V, class BA, class TA>
+      void coarsenVector(Hierarchy<BlockVector<V,BA>, TA>& hierarchy) const;
+
+      /**
+       * @brief Coarsen the smoother hierarchy according to the matrix hierarchy.
+       * @param smoothers The smoother hierarchy to coarsen.
+       * @param args The arguments for the construction of the coarse level smoothers.
+       */
+      template<class S, class TA>
+      void coarsenSmoother(Hierarchy<S,TA>& smoothers,
+                           const typename SmootherTraits<S>::Arguments& args) const;
+
+      /**
+       * @brief Get the number of levels in the hierarchy.
+       * @return The number of levels.
+       */
+      std::size_t levels() const;
+
+      /**
+       * @brief Get the max number of levels in the hierarchy of processors.
+       * @return The maximum number of levels.
+       */
+      std::size_t maxlevels() const;
+
+      bool hasCoarsest() const;
+
+      /**
+       * @brief Whether the hierarchy was built.
+       * @return true if the MatrixHierarchy::build method was called.
+       */
+      bool isBuilt() const;
+
+      /**
+       * @brief Get the matrix hierarchy.
+       * @return The matrix hierarchy.
+       */
+      const ParallelMatrixHierarchy& matrices() const;
+
+      /**
+       * @brief Get the hierarchy of the parallel data distribution information.
+       * @return The hierarchy of the parallel data distribution information.
+       */
+      const ParallelInformationHierarchy& parallelInformation() const;
+
+      /**
+       * @brief Get the hierarchy of the mappings of the nodes onto aggregates.
+       * @return The hierarchy of the mappings of the nodes onto aggregates.
+       */
+      const AggregatesMapList& aggregatesMaps() const;
+
+      /**
+       * @brief Get the hierarchy of the information about redistributions,
+       * @return The hierarchy of the information about redistributions of the
+       * data to fewer processes.
+       */
+      const RedistributeInfoList& redistributeInformation() const;
+
+      double getProlongationDampingFactor() const
+      {
+        return prolongDamp_;
+      }
+
+      /**
+       * @brief Get the mapping of fine level unknowns to coarse level
+       * aggregates.
+       *
+       * For each fine level unknown i the correcponding data[i] is the
+       * aggregate it belongs to on the coarsest level.
+       *
+       * @param[out] data The mapping of fine level unknowns to coarse level
+       * aggregates.
+       */
+      void getCoarsestAggregatesOnFinest(std::vector<std::size_t>& data) const;
+
+    private:
+      typedef typename ConstructionTraits<MatrixOperator>::Arguments MatrixArgs;
+      typedef typename ConstructionTraits<ParallelInformation>::Arguments CommunicationArgs;
+      /** @brief The list of aggregates maps. */
+      AggregatesMapList aggregatesMaps_;
+      /** @brief The list of redistributes. */
+      RedistributeInfoList redistributes_;
+      /** @brief The hierarchy of parallel matrices. */
+      ParallelMatrixHierarchy matrices_;
+      /** @brief The hierarchy of the parallel information. */
+      ParallelInformationHierarchy parallelInformation_;
+
+      /** @brief Whether the hierarchy was built. */
+      bool built_;
+
+      /** @brief The maximum number of level across all processors.*/
+      int maxlevels_;
+
+      double prolongDamp_;
+
+      /**
+       * @brief functor to print matrix statistics.
+       */
+      template<class Matrix, bool print>
+      struct MatrixStats
+      {
+
+        /**
+         * @brief Print matrix statistics.
+         */
+        static void stats(const Matrix& matrix)
+        {
+          DUNE_UNUSED_PARAMETER(matrix);
+        }
+      };
+
+      template<class Matrix>
+      struct MatrixStats<Matrix,true>
+      {
+        struct calc
+        {
+          typedef typename Matrix::size_type size_type;
+          typedef typename Matrix::row_type matrix_row;
+
+          calc()
+          {
+            min=std::numeric_limits<size_type>::max();
+            max=0;
+            sum=0;
+          }
+
+          void operator()(const matrix_row& row)
+          {
+            min=std::min(min, row.size());
+            max=std::max(max, row.size());
+            sum += row.size();
+          }
+
+          size_type min;
+          size_type max;
+          size_type sum;
+        };
+        /**
+         * @brief Print matrix statistics.
+         */
+        static void stats(const Matrix& matrix)
+        {
+          calc c= for_each(matrix.begin(), matrix.end(), calc());
+          dinfo<<"Matrix row: min="<<c.min<<" max="<<c.max
+               <<" average="<<static_cast<double>(c.sum)/matrix.N()
+               <<std::endl;
+        }
+      };
+    };
+
+    /**
+     * @brief The criterion describing the stop criteria for the coarsening process.
+     */
+    template<class T>
+    class CoarsenCriterion : public T
+    {
+    public:
+      /**
+       * @brief The criterion for tagging connections as strong and nodes as isolated.
+       * This might be e.g. SymmetricCriterion or UnSymmetricCriterion.
+       */
+      typedef T AggregationCriterion;
+
+      /**
+       * @brief Constructor
+       * @param maxLevel The maximum number of levels allowed in the matrix hierarchy (default: 100).
+       * @param coarsenTarget If the number of nodes in the matrix is below this threshold the
+       * coarsening will stop (default: 1000).
+       * @param minCoarsenRate If the coarsening rate falls below this threshold the
+       * coarsening will stop (default: 1.2)
+       * @param prolongDamp The damping factor to apply to the prolongated update (default: 1.6)
+       * @param accumulate Whether to accumulate the data onto fewer processors on coarser levels.
+       */
+      CoarsenCriterion(int maxLevel=100, int coarsenTarget=1000, double minCoarsenRate=1.2,
+                       double prolongDamp=1.6, AccumulationMode accumulate=successiveAccu)
+        : AggregationCriterion(Dune::Amg::Parameters(maxLevel, coarsenTarget, minCoarsenRate, prolongDamp, accumulate))
+      {}
+
+      CoarsenCriterion(const Dune::Amg::Parameters& parms)
+        : AggregationCriterion(parms)
+      {}
+
+    };
+
+    template<typename M, typename C1>
+    bool repartitionAndDistributeMatrix(const M& origMatrix,
+                                        std::shared_ptr<M> newMatrix,
+                                        SequentialInformation& origComm,
+                                        std::shared_ptr<SequentialInformation>& newComm,
+                                        RedistributeInformation<SequentialInformation>& ri,
+                                        int nparts, C1& criterion)
+    {
+      DUNE_UNUSED_PARAMETER(origMatrix);
+      DUNE_UNUSED_PARAMETER(newMatrix);
+      DUNE_UNUSED_PARAMETER(origComm);
+      DUNE_UNUSED_PARAMETER(newComm);
+      DUNE_UNUSED_PARAMETER(ri);
+      DUNE_UNUSED_PARAMETER(nparts);
+      DUNE_UNUSED_PARAMETER(criterion);
+      DUNE_THROW(NotImplemented, "Redistribution does not make sense in sequential code!");
+    }
+
+
+    template<typename M, typename C, typename C1>
+    bool repartitionAndDistributeMatrix(const M& origMatrix,
+                                        std::shared_ptr<M> newMatrix,
+                                        C& origComm,
+                                        std::shared_ptr<C>& newComm,
+                                        RedistributeInformation<C>& ri,
+                                        int nparts, C1& criterion)
+    {
+      Timer time;
+#ifdef AMG_REPART_ON_COMM_GRAPH
+      // Done not repartition the matrix graph, but a graph of the communication scheme.
+      bool existentOnRedist=Dune::commGraphRepartition(origMatrix, origComm, nparts, newComm,
+                                                       ri.getInterface(),
+                                                       criterion.debugLevel()>1);
+
+#else
+      typedef Dune::Amg::MatrixGraph<const M> MatrixGraph;
+      typedef Dune::Amg::PropertiesGraph<MatrixGraph,
+          VertexProperties,
+          EdgeProperties,
+          IdentityMap,
+          IdentityMap> PropertiesGraph;
+      MatrixGraph graph(origMatrix);
+      PropertiesGraph pgraph(graph);
+      buildDependency(pgraph, origMatrix, criterion, false);
+
+#ifdef DEBUG_REPART
+      if(origComm.communicator().rank()==0)
+        std::cout<<"Original matrix"<<std::endl;
+      origComm.communicator().barrier();
+      printGlobalSparseMatrix(origMatrix, origComm, std::cout);
+#endif
+      bool existentOnRedist=Dune::graphRepartition(pgraph, origComm, nparts,
+                                                   newComm, ri.getInterface(),
+                                                   criterion.debugLevel()>1);
+#endif // if else AMG_REPART
+
+      if(origComm.communicator().rank()==0  && criterion.debugLevel()>1)
+        std::cout<<"Repartitioning took "<<time.elapsed()<<" seconds."<<std::endl;
+
+      ri.setSetup();
+
+#ifdef DEBUG_REPART
+      ri.checkInterface(origComm.indexSet(), newComm->indexSet(), origComm.communicator());
+#endif
+
+      redistributeMatrix(const_cast<M&>(origMatrix), *newMatrix, origComm, *newComm, ri);
+
+#ifdef DEBUG_REPART
+      if(origComm.communicator().rank()==0)
+        std::cout<<"Original matrix"<<std::endl;
+      origComm.communicator().barrier();
+      if(newComm->communicator().size()>0)
+        printGlobalSparseMatrix(*newMatrix, *newComm, std::cout);
+      origComm.communicator().barrier();
+#endif
+
+      if(origComm.communicator().rank()==0  && criterion.debugLevel()>1)
+        std::cout<<"Redistributing matrix took "<<time.elapsed()<<" seconds."<<std::endl;
+      return existentOnRedist;
+
+    }
+
+    template<class M, class IS, class A>
+    MatrixHierarchy<M,IS,A>::MatrixHierarchy(std::shared_ptr<MatrixOperator> fineMatrix,
+                                             std::shared_ptr<ParallelInformation> pinfo)
+      : matrices_(fineMatrix),
+        parallelInformation_(pinfo)
+    {
+      if (SolverCategory::category(*fineMatrix) != SolverCategory::category(*pinfo))
+        DUNE_THROW(ISTLError, "MatrixOperator and ParallelInformation must belong to the same category!");
+    }
+
+    template<class M, class IS, class A>
+    template<typename O, typename T>
+    void MatrixHierarchy<M,IS,A>::build(const T& criterion)
+    {
+      prolongDamp_ = criterion.getProlongationDampingFactor();
+      typedef O OverlapFlags;
+      typedef typename ParallelMatrixHierarchy::Iterator MatIterator;
+      typedef typename ParallelInformationHierarchy::Iterator PInfoIterator;
+
+      static const int noints=(Dune::Amg::MAX_PROCESSES/4096>0) ? (Dune::Amg::MAX_PROCESSES/4096) : 1;
+
+      typedef bigunsignedint<sizeof(int)*8*noints> BIGINT;
+      GalerkinProduct<ParallelInformation> productBuilder;
+      MatIterator mlevel = matrices_.finest();
+      MatrixStats<typename M::matrix_type,MINIMAL_DEBUG_LEVEL<=INFO_DEBUG_LEVEL>::stats(mlevel->getmat());
+
+      PInfoIterator infoLevel = parallelInformation_.finest();
+      BIGINT finenonzeros=countNonZeros(mlevel->getmat());
+      finenonzeros = infoLevel->communicator().sum(finenonzeros);
+      BIGINT allnonzeros = finenonzeros;
+
+
+      int level = 0;
+      int rank = 0;
+
+      BIGINT unknowns = mlevel->getmat().N();
+
+      unknowns = infoLevel->communicator().sum(unknowns);
+      double dunknowns=unknowns.todouble();
+      infoLevel->buildGlobalLookup(mlevel->getmat().N());
+      redistributes_.push_back(RedistributeInfoType());
+
+      for(; level < criterion.maxLevel(); ++level, ++mlevel) {
+        assert(matrices_.levels()==redistributes_.size());
+        rank = infoLevel->communicator().rank();
+        if(rank==0 && criterion.debugLevel()>1)
+          std::cout<<"Level "<<level<<" has "<<dunknowns<<" unknowns, "<<dunknowns/infoLevel->communicator().size()
+                   <<" unknowns per proc (procs="<<infoLevel->communicator().size()<<")"<<std::endl;
+
+        MatrixOperator* matrix=&(*mlevel);
+        ParallelInformation* info =&(*infoLevel);
+
+        if((
+#if HAVE_PARMETIS
+             criterion.accumulate()==successiveAccu
+#else
+             false
+#endif
+             || (criterion.accumulate()==atOnceAccu
+                 && dunknowns < 30*infoLevel->communicator().size()))
+           && infoLevel->communicator().size()>1 &&
+           dunknowns/infoLevel->communicator().size() <= criterion.coarsenTarget())
+        {
+          // accumulate to fewer processors
+          std::shared_ptr<Matrix> redistMat = std::make_shared<Matrix>();
+          std::shared_ptr<ParallelInformation> redistComm;
+          std::size_t nodomains = (std::size_t)std::ceil(dunknowns/(criterion.minAggregateSize()
+                                                                    *criterion.coarsenTarget()));
+          if( nodomains<=criterion.minAggregateSize()/2 ||
+              dunknowns <= criterion.coarsenTarget() )
+            nodomains=1;
+
+          bool existentOnNextLevel =
+            repartitionAndDistributeMatrix(mlevel->getmat(), redistMat, *infoLevel,
+                                           redistComm, redistributes_.back(), nodomains,
+                                           criterion);
+          BIGINT unknownsRedist = redistMat->N();
+          unknownsRedist = infoLevel->communicator().sum(unknownsRedist);
+          dunknowns= unknownsRedist.todouble();
+          if(redistComm->communicator().rank()==0 && criterion.debugLevel()>1)
+            std::cout<<"Level "<<level<<" (redistributed) has "<<dunknowns<<" unknowns, "<<dunknowns/redistComm->communicator().size()
+                     <<" unknowns per proc (procs="<<redistComm->communicator().size()<<")"<<std::endl;
+          MatrixArgs args(*redistMat, *redistComm);
+          mlevel.addRedistributed(ConstructionTraits<MatrixOperator>::construct(args));
+          assert(mlevel.isRedistributed());
+          infoLevel.addRedistributed(redistComm);
+          infoLevel->freeGlobalLookup();
+
+          if(!existentOnNextLevel)
+            // We do not hold any data on the redistributed partitioning
+            break;
+
+          // Work on the redistributed Matrix from now on
+          matrix = &(mlevel.getRedistributed());
+          info = &(infoLevel.getRedistributed());
+          info->buildGlobalLookup(matrix->getmat().N());
+        }
+
+        rank = info->communicator().rank();
+        if(dunknowns <= criterion.coarsenTarget())
+          // No further coarsening needed
+          break;
+
+        typedef PropertiesGraphCreator<MatrixOperator,ParallelInformation> GraphCreator;
+        typedef typename GraphCreator::PropertiesGraph PropertiesGraph;
+        typedef typename GraphCreator::GraphTuple GraphTuple;
+
+        typedef typename PropertiesGraph::VertexDescriptor Vertex;
+
+        std::vector<bool> excluded(matrix->getmat().N(), false);
+
+        GraphTuple graphs = GraphCreator::create(*matrix, excluded, *info, OverlapFlags());
+
+        AggregatesMap* aggregatesMap=new AggregatesMap(std::get<1>(graphs)->maxVertex()+1);
+
+        aggregatesMaps_.push_back(aggregatesMap);
+
+        Timer watch;
+        watch.reset();
+        int noAggregates, isoAggregates, oneAggregates, skippedAggregates;
+
+        std::tie(noAggregates, isoAggregates, oneAggregates, skippedAggregates) =
+          aggregatesMap->buildAggregates(matrix->getmat(), *(std::get<1>(graphs)), criterion, level==0);
+
+        if(rank==0 && criterion.debugLevel()>2)
+          std::cout<<" Have built "<<noAggregates<<" aggregates totally ("<<isoAggregates<<" isolated aggregates, "<<
+          oneAggregates<<" aggregates of one vertex,  and skipped "<<
+          skippedAggregates<<" aggregates)."<<std::endl;
+#ifdef TEST_AGGLO
+        {
+          // calculate size of local matrix in the distributed direction
+          int start, end, overlapStart, overlapEnd;
+          int procs=info->communicator().rank();
+          int n = UNKNOWNS/procs; // number of unknowns per process
+          int bigger = UNKNOWNS%procs; // number of process with n+1 unknows
+
+          // Compute owner region
+          if(rank<bigger) {
+            start = rank*(n+1);
+            end   = (rank+1)*(n+1);
+          }else{
+            start = bigger + rank * n;
+            end   = bigger + (rank + 1) * n;
+          }
+
+          // Compute overlap region
+          if(start>0)
+            overlapStart = start - 1;
+          else
+            overlapStart = start;
+
+          if(end<UNKNOWNS)
+            overlapEnd = end + 1;
+          else
+            overlapEnd = end;
+
+          assert((UNKNOWNS)*(overlapEnd-overlapStart)==aggregatesMap->noVertices());
+          for(int j=0; j< UNKNOWNS; ++j)
+            for(int i=0; i < UNKNOWNS; ++i)
+            {
+              if(i>=overlapStart && i<overlapEnd)
+              {
+                int no = (j/2)*((UNKNOWNS)/2)+i/2;
+                (*aggregatesMap)[j*(overlapEnd-overlapStart)+i-overlapStart]=no;
+              }
+            }
+        }
+#endif
+        if(criterion.debugLevel()>1 && info->communicator().rank()==0)
+          std::cout<<"aggregating finished."<<std::endl;
+
+        BIGINT gnoAggregates=noAggregates;
+        gnoAggregates = info->communicator().sum(gnoAggregates);
+        double dgnoAggregates = gnoAggregates.todouble();
+#ifdef TEST_AGGLO
+        BIGINT gnoAggregates=((UNKNOWNS)/2)*((UNKNOWNS)/2);
+#endif
+
+        if(criterion.debugLevel()>2 && rank==0)
+          std::cout << "Building "<<dgnoAggregates<<" aggregates took "<<watch.elapsed()<<" seconds."<<std::endl;
+
+        if(dgnoAggregates==0 || dunknowns/dgnoAggregates<criterion.minCoarsenRate())
+        {
+          if(rank==0)
+          {
+            if(dgnoAggregates>0)
+              std::cerr << "Stopped coarsening because of rate breakdown "<<dunknowns<<"/"<<dgnoAggregates
+                        <<"="<<dunknowns/dgnoAggregates<<"<"
+                        <<criterion.minCoarsenRate()<<std::endl;
+            else
+              std::cerr<< "Could not build any aggregates. Probably no connected nodes."<<std::endl;
+          }
+          aggregatesMap->free();
+          delete aggregatesMap;
+          aggregatesMaps_.pop_back();
+
+          if(criterion.accumulate() && mlevel.isRedistributed() && info->communicator().size()>1) {
+            // coarse level matrix was already redistributed, but to more than 1 process
+            // Therefore need to delete the redistribution. Further down it will
+            // then be redistributed to 1 process
+            delete &(mlevel.getRedistributed().getmat());
+            mlevel.deleteRedistributed();
+            delete &(infoLevel.getRedistributed());
+            infoLevel.deleteRedistributed();
+            redistributes_.back().resetSetup();
+          }
+
+          break;
+        }
+        unknowns =  noAggregates;
+        dunknowns = dgnoAggregates;
+
+        CommunicationArgs commargs(info->communicator(),info->category());
+        parallelInformation_.addCoarser(commargs);
+
+        ++infoLevel; // parallel information on coarse level
+
+        typename PropertyMapTypeSelector<VertexVisitedTag,PropertiesGraph>::Type visitedMap =
+          get(VertexVisitedTag(), *(std::get<1>(graphs)));
+
+        watch.reset();
+        int aggregates = IndicesCoarsener<ParallelInformation,OverlapFlags>
+                         ::coarsen(*info,
+                                   *(std::get<1>(graphs)),
+                                   visitedMap,
+                                   *aggregatesMap,
+                                   *infoLevel,
+                                   noAggregates);
+        GraphCreator::free(graphs);
+
+        if(criterion.debugLevel()>2) {
+          if(rank==0)
+            std::cout<<"Coarsening of index sets took "<<watch.elapsed()<<" seconds."<<std::endl;
+        }
+
+        watch.reset();
+
+        infoLevel->buildGlobalLookup(aggregates);
+        AggregatesPublisher<Vertex,OverlapFlags,ParallelInformation>::publish(*aggregatesMap,
+                                                                              *info,
+                                                                              infoLevel->globalLookup());
+
+
+        if(criterion.debugLevel()>2) {
+          if(rank==0)
+            std::cout<<"Communicating global aggregate numbers took "<<watch.elapsed()<<" seconds."<<std::endl;
+        }
+
+        watch.reset();
+        std::vector<bool>& visited=excluded;
+
+        typedef std::vector<bool>::iterator Iterator;
+        typedef IteratorPropertyMap<Iterator, IdentityMap> VisitedMap2;
+        Iterator end = visited.end();
+        for(Iterator iter= visited.begin(); iter != end; ++iter)
+          *iter=false;
+
+        VisitedMap2 visitedMap2(visited.begin(), Dune::IdentityMap());
+
+        typename MatrixOperator::matrix_type* coarseMatrix;
+
+        coarseMatrix = productBuilder.build(*(std::get<0>(graphs)), visitedMap2,
+                                            *info,
+                                            *aggregatesMap,
+                                            aggregates,
+                                            OverlapFlags());
+        dverb<<"Building of sparsity pattern took "<<watch.elapsed()<<std::endl;
+        watch.reset();
+        info->freeGlobalLookup();
+
+        delete std::get<0>(graphs);
+        productBuilder.calculate(matrix->getmat(), *aggregatesMap, *coarseMatrix, *infoLevel, OverlapFlags());
+
+        if(criterion.debugLevel()>2) {
+          if(rank==0)
+            std::cout<<"Calculation entries of Galerkin product took "<<watch.elapsed()<<" seconds."<<std::endl;
+        }
+
+        BIGINT nonzeros = countNonZeros(*coarseMatrix);
+        allnonzeros = allnonzeros + infoLevel->communicator().sum(nonzeros);
+        MatrixArgs args(*coarseMatrix, *infoLevel);
+
+        matrices_.addCoarser(args);
+        redistributes_.push_back(RedistributeInfoType());
+      } // end level loop
+
+
+      infoLevel->freeGlobalLookup();
+
+      built_=true;
+      AggregatesMap* aggregatesMap=new AggregatesMap(0);
+      aggregatesMaps_.push_back(aggregatesMap);
+
+      if(criterion.debugLevel()>0) {
+        if(level==criterion.maxLevel()) {
+          BIGINT unknownsLevel = mlevel->getmat().N();
+          unknownsLevel = infoLevel->communicator().sum(unknownsLevel);
+          double dunknownsLevel = unknownsLevel.todouble();
+          if(rank==0 && criterion.debugLevel()>1) {
+            std::cout<<"Level "<<level<<" has "<<dunknownsLevel<<" unknowns, "<<dunknownsLevel/infoLevel->communicator().size()
+                     <<" unknowns per proc (procs="<<infoLevel->communicator().size()<<")"<<std::endl;
+          }
+        }
+      }
+
+      if(criterion.accumulate() && !redistributes_.back().isSetup() &&
+         infoLevel->communicator().size()>1) {
+#if HAVE_MPI && !HAVE_PARMETIS
+        if(criterion.accumulate()==successiveAccu &&
+           infoLevel->communicator().rank()==0)
+          std::cerr<<"Successive accumulation of data on coarse levels only works with ParMETIS installed."
+                   <<"  Fell back to accumulation to one domain on coarsest level"<<std::endl;
+#endif
+
+        // accumulate to fewer processors
+        std::shared_ptr<Matrix> redistMat = std::make_shared<Matrix>();
+        std::shared_ptr<ParallelInformation> redistComm;
+        int nodomains = 1;
+
+        repartitionAndDistributeMatrix(mlevel->getmat(), redistMat, *infoLevel,
+                                       redistComm, redistributes_.back(), nodomains,criterion);
+        MatrixArgs args(*redistMat, *redistComm);
+        BIGINT unknownsRedist = redistMat->N();
+        unknownsRedist = infoLevel->communicator().sum(unknownsRedist);
+
+        if(redistComm->communicator().rank()==0 && criterion.debugLevel()>1) {
+          double dunknownsRedist = unknownsRedist.todouble();
+          std::cout<<"Level "<<level<<" redistributed has "<<dunknownsRedist<<" unknowns, "<<dunknownsRedist/redistComm->communicator().size()
+                   <<" unknowns per proc (procs="<<redistComm->communicator().size()<<")"<<std::endl;
+        }
+        mlevel.addRedistributed(ConstructionTraits<MatrixOperator>::construct(args));
+        infoLevel.addRedistributed(redistComm);
+        infoLevel->freeGlobalLookup();
+      }
+
+      int levels = matrices_.levels();
+      maxlevels_ = parallelInformation_.finest()->communicator().max(levels);
+      assert(matrices_.levels()==redistributes_.size());
+      if(hasCoarsest() && rank==0 && criterion.debugLevel()>1)
+        std::cout<<"operator complexity: "<<allnonzeros.todouble()/finenonzeros.todouble()<<std::endl;
+
+    }
+
+    template<class M, class IS, class A>
+    const typename MatrixHierarchy<M,IS,A>::ParallelMatrixHierarchy&
+    MatrixHierarchy<M,IS,A>::matrices() const
+    {
+      return matrices_;
+    }
+
+    template<class M, class IS, class A>
+    const typename MatrixHierarchy<M,IS,A>::ParallelInformationHierarchy&
+    MatrixHierarchy<M,IS,A>::parallelInformation() const
+    {
+      return parallelInformation_;
+    }
+
+    template<class M, class IS, class A>
+    void MatrixHierarchy<M,IS,A>::getCoarsestAggregatesOnFinest(std::vector<std::size_t>& data) const
+    {
+      int levels=aggregatesMaps().size();
+      int maxlevels=parallelInformation_.finest()->communicator().max(levels);
+      std::size_t size=(*(aggregatesMaps().begin()))->noVertices();
+      // We need an auxiliary vector for the consecutive prolongation.
+      std::vector<std::size_t> tmp;
+      std::vector<std::size_t> *coarse, *fine;
+
+      // make sure the allocated space suffices.
+      tmp.reserve(size);
+      data.reserve(size);
+
+      // Correctly assign coarse and fine for the first prolongation such that
+      // we end up in data in the end.
+      if(levels%2==0) {
+        coarse=&tmp;
+        fine=&data;
+      }else{
+        coarse=&data;
+        fine=&tmp;
+      }
+
+      // Number the unknowns on the coarsest level consecutively for each process.
+      if(levels==maxlevels) {
+        const AggregatesMap& map = *(*(++aggregatesMaps().rbegin()));
+        std::size_t m=0;
+
+        for(typename AggregatesMap::const_iterator iter = map.begin(); iter != map.end(); ++iter)
+          if(*iter< AggregatesMap::ISOLATED)
+            m=std::max(*iter,m);
+
+        coarse->resize(m+1);
+        std::size_t i=0;
+        srand((unsigned)std::clock());
+        std::set<size_t> used;
+        for(typename std::vector<std::size_t>::iterator iter=coarse->begin(); iter != coarse->end();
+            ++iter, ++i)
+        {
+          std::pair<std::set<std::size_t>::iterator,bool> ibpair
+            = used.insert(static_cast<std::size_t>((((double)rand())/(RAND_MAX+1.0)))*coarse->size());
+
+          while(!ibpair.second)
+            ibpair = used.insert(static_cast<std::size_t>((((double)rand())/(RAND_MAX+1.0))*coarse->size()));
+          *iter=*(ibpair.first);
+        }
+      }
+
+      typename ParallelInformationHierarchy::Iterator pinfo = parallelInformation().coarsest();
+      --pinfo;
+
+      // Now consecutively project the numbers to the finest level.
+      for(typename AggregatesMapList::const_reverse_iterator aggregates=++aggregatesMaps().rbegin();
+          aggregates != aggregatesMaps().rend(); ++aggregates,--levels) {
+
+        fine->resize((*aggregates)->noVertices());
+        fine->assign(fine->size(), 0);
+        Transfer<typename AggregatesMap::AggregateDescriptor, std::vector<std::size_t>, ParallelInformation>
+        ::prolongateVector(*(*aggregates), *coarse, *fine, static_cast<std::size_t>(1), *pinfo);
+        --pinfo;
+        std::swap(coarse, fine);
+      }
+
+      // Assertion to check that we really projected to data on the last step.
+      assert(coarse==&data);
+    }
+
+    template<class M, class IS, class A>
+    const typename MatrixHierarchy<M,IS,A>::AggregatesMapList&
+    MatrixHierarchy<M,IS,A>::aggregatesMaps() const
+    {
+      return aggregatesMaps_;
+    }
+    template<class M, class IS, class A>
+    const typename MatrixHierarchy<M,IS,A>::RedistributeInfoList&
+    MatrixHierarchy<M,IS,A>::redistributeInformation() const
+    {
+      return redistributes_;
+    }
+
+    template<class M, class IS, class A>
+    MatrixHierarchy<M,IS,A>::~MatrixHierarchy()
+    {
+      typedef typename AggregatesMapList::reverse_iterator AggregatesMapIterator;
+      typedef typename ParallelMatrixHierarchy::Iterator Iterator;
+      typedef typename ParallelInformationHierarchy::Iterator InfoIterator;
+
+      AggregatesMapIterator amap = aggregatesMaps_.rbegin();
+      InfoIterator info = parallelInformation_.coarsest();
+      for(Iterator level=matrices_.coarsest(), finest=matrices_.finest(); level != finest;  --level, --info, ++amap) {
+        (*amap)->free();
+        delete *amap;
+        delete &level->getmat();
+        if(level.isRedistributed())
+          delete &(level.getRedistributed().getmat());
+      }
+      delete *amap;
+    }
+
+    template<class M, class IS, class A>
+    template<class V, class BA, class TA>
+    void MatrixHierarchy<M,IS,A>::coarsenVector(Hierarchy<BlockVector<V,BA>, TA>& hierarchy) const
+    {
+      assert(hierarchy.levels()==1);
+      typedef typename ParallelMatrixHierarchy::ConstIterator Iterator;
+      typedef typename RedistributeInfoList::const_iterator RIter;
+      RIter redist = redistributes_.begin();
+
+      Iterator matrix = matrices_.finest(), coarsest = matrices_.coarsest();
+      int level=0;
+      if(redist->isSetup())
+        hierarchy.addRedistributedOnCoarsest(matrix.getRedistributed().getmat().N());
+      Dune::dvverb<<"Level "<<level<<" has "<<matrices_.finest()->getmat().N()<<" unknowns!"<<std::endl;
+
+      while(matrix != coarsest) {
+        ++matrix; ++level; ++redist;
+        Dune::dvverb<<"Level "<<level<<" has "<<matrix->getmat().N()<<" unknowns!"<<std::endl;
+
+        hierarchy.addCoarser(matrix->getmat().N());
+        if(redist->isSetup())
+          hierarchy.addRedistributedOnCoarsest(matrix.getRedistributed().getmat().N());
+
+      }
+
+    }
+
+    template<class M, class IS, class A>
+    template<class S, class TA>
+    void MatrixHierarchy<M,IS,A>::coarsenSmoother(Hierarchy<S,TA>& smoothers,
+                                                  const typename SmootherTraits<S>::Arguments& sargs) const
+    {
+      assert(smoothers.levels()==0);
+      typedef typename ParallelMatrixHierarchy::ConstIterator MatrixIterator;
+      typedef typename ParallelInformationHierarchy::ConstIterator PinfoIterator;
+      typedef typename AggregatesMapList::const_iterator AggregatesIterator;
+
+      typename ConstructionTraits<S>::Arguments cargs;
+      cargs.setArgs(sargs);
+      PinfoIterator pinfo = parallelInformation_.finest();
+      AggregatesIterator aggregates = aggregatesMaps_.begin();
+      int level=0;
+      for(MatrixIterator matrix = matrices_.finest(), coarsest = matrices_.coarsest();
+          matrix != coarsest; ++matrix, ++pinfo, ++aggregates, ++level) {
+        cargs.setMatrix(matrix->getmat(), **aggregates);
+        cargs.setComm(*pinfo);
+        smoothers.addCoarser(cargs);
+      }
+      if(maxlevels()>levels()) {
+        // This is not the globally coarsest level and therefore smoothing is needed
+        cargs.setMatrix(matrices_.coarsest()->getmat(), **aggregates);
+        cargs.setComm(*pinfo);
+        smoothers.addCoarser(cargs);
+        ++level;
+      }
+    }
+
+    template<class M, class IS, class A>
+    template<class F>
+    void MatrixHierarchy<M,IS,A>::recalculateGalerkin(const F& copyFlags)
+    {
+      typedef typename AggregatesMapList::iterator AggregatesMapIterator;
+      typedef typename ParallelMatrixHierarchy::Iterator Iterator;
+      typedef typename ParallelInformationHierarchy::Iterator InfoIterator;
+
+      AggregatesMapIterator amap = aggregatesMaps_.begin();
+      BaseGalerkinProduct productBuilder;
+      InfoIterator info = parallelInformation_.finest();
+      typename RedistributeInfoList::iterator riIter = redistributes_.begin();
+      Iterator level = matrices_.finest(), coarsest=matrices_.coarsest();
+      if(level.isRedistributed()) {
+        info->buildGlobalLookup(level->getmat().N());
+        redistributeMatrixEntries(const_cast<Matrix&>(level->getmat()),
+                                  const_cast<Matrix&>(level.getRedistributed().getmat()),
+                                  *info,info.getRedistributed(), *riIter);
+        info->freeGlobalLookup();
+      }
+
+      for(; level!=coarsest; ++amap) {
+        const Matrix& fine = (level.isRedistributed() ? level.getRedistributed() : *level).getmat();
+        ++level;
+        ++info;
+        ++riIter;
+        productBuilder.calculate(fine, *(*amap), const_cast<Matrix&>(level->getmat()), *info, copyFlags);
+        if(level.isRedistributed()) {
+          info->buildGlobalLookup(level->getmat().N());
+          redistributeMatrixEntries(const_cast<Matrix&>(level->getmat()),
+                                    const_cast<Matrix&>(level.getRedistributed().getmat()), *info,
+                                    info.getRedistributed(), *riIter);
+          info->freeGlobalLookup();
+        }
+      }
+    }
+
+    template<class M, class IS, class A>
+    std::size_t MatrixHierarchy<M,IS,A>::levels() const
+    {
+      return matrices_.levels();
+    }
+
+    template<class M, class IS, class A>
+    std::size_t MatrixHierarchy<M,IS,A>::maxlevels() const
+    {
+      return maxlevels_;
+    }
+
+    template<class M, class IS, class A>
+    bool MatrixHierarchy<M,IS,A>::hasCoarsest() const
+    {
+      return levels()==maxlevels() &&
+             (!matrices_.coarsest().isRedistributed() ||matrices_.coarsest()->getmat().N()>0);
+    }
+
+    template<class M, class IS, class A>
+    bool MatrixHierarchy<M,IS,A>::isBuilt() const
+    {
+      return built_;
+    }
+
+    /** @} */
+  } // namespace Amg
+} // namespace Dune
+
+#endif // end DUNE_AMG_MATRIXHIERARCHY_HH
diff --git a/dune/istl/paamg/smoother.hh b/dune/istl/paamg/smoother.hh
index 2a8afb06b524bb99ff2b5c11f51dbe4c2eeaf212..11d38e4e4ee9fb159c2ed0a2044aeca64fc70b60 100644
--- a/dune/istl/paamg/smoother.hh
+++ b/dune/istl/paamg/smoother.hh
@@ -179,17 +179,11 @@ namespace Dune
     {
       typedef DefaultConstructionArgs<SeqSSOR<M,X,Y,l> > Arguments;
 
-      static inline SeqSSOR<M,X,Y,l>* construct(Arguments& args)
+      static inline std::shared_ptr<SeqSSOR<M,X,Y,l>> construct(Arguments& args)
       {
-        return new SeqSSOR<M,X,Y,l>(args.getMatrix(), args.getArgs().iterations,
-                                    args.getArgs().relaxationFactor);
+        return std::make_shared<SeqSSOR<M,X,Y,l>>
+          (args.getMatrix(), args.getArgs().iterations, args.getArgs().relaxationFactor);
       }
-
-      static inline void deconstruct(SeqSSOR<M,X,Y,l>* ssor)
-      {
-        delete ssor;
-      }
-
     };
 
 
@@ -201,18 +195,13 @@ namespace Dune
     {
       typedef DefaultConstructionArgs<SeqSOR<M,X,Y,l> > Arguments;
 
-      static inline SeqSOR<M,X,Y,l>* construct(Arguments& args)
-      {
-        return new SeqSOR<M,X,Y,l>(args.getMatrix(), args.getArgs().iterations,
-                                   args.getArgs().relaxationFactor);
-      }
-
-      static inline void deconstruct(SeqSOR<M,X,Y,l>* sor)
+      static inline std::shared_ptr<SeqSOR<M,X,Y,l>> construct(Arguments& args)
       {
-        delete sor;
+        return std::make_shared<SeqSOR<M,X,Y,l>>
+          (args.getMatrix(), args.getArgs().iterations, args.getArgs().relaxationFactor);
       }
-
     };
+
     /**
      * @brief Policy for the construction of the SeqJac smoother
      */
@@ -221,17 +210,11 @@ namespace Dune
     {
       typedef DefaultConstructionArgs<SeqJac<M,X,Y,l> > Arguments;
 
-      static inline SeqJac<M,X,Y,l>* construct(Arguments& args)
+      static inline std::shared_ptr<SeqJac<M,X,Y,l>> construct(Arguments& args)
       {
-        return new SeqJac<M,X,Y,l>(args.getMatrix(), args.getArgs().iterations,
-                                   args.getArgs().relaxationFactor);
+        return std::make_shared<SeqJac<M,X,Y,l>>
+          (args.getMatrix(), args.getArgs().iterations, args.getArgs().relaxationFactor);
       }
-
-      static void deconstruct(SeqJac<M,X,Y,l>* jac)
-      {
-        delete jac;
-      }
-
     };
 
 
@@ -244,17 +227,11 @@ DUNE_NO_DEPRECATED_BEGIN // for deprecated SeqILU0
     {
       typedef DefaultConstructionArgs<SeqILU0<M,X,Y> > Arguments;
 
-      static inline SeqILU0<M,X,Y>* construct(Arguments& args)
+      static inline std::shared_ptr<SeqILU0<M,X,Y>> construct(Arguments& args)
       {
-        return new SeqILU0<M,X,Y>(args.getMatrix(),
-                                  args.getArgs().relaxationFactor);
+        return std::make_shared<SeqILU0<M,X,Y>>
+          (args.getMatrix(), args.getArgs().relaxationFactor);
       }
-
-      static void deconstruct(SeqILU0<M,X,Y>* ilu)
-      {
-        delete ilu;
-      }
-
     };
 DUNE_NO_DEPRECATED_END // for deprecated SeqILU0
 
@@ -290,17 +267,11 @@ DUNE_NO_DEPRECATED_BEGIN // for deprecated SeqILUn
     {
       typedef ConstructionArgs<SeqILUn<M,X,Y> > Arguments;
 
-      static inline SeqILUn<M,X,Y>* construct(Arguments& args)
-      {
-        return new SeqILUn<M,X,Y>(args.getMatrix(), args.getN(),
-                                  args.getArgs().relaxationFactor);
-      }
-
-      static void deconstruct(SeqILUn<M,X,Y>* ilu)
+      static inline std::shared_ptr<SeqILUn<M,X,Y>> construct(Arguments& args)
       {
-        delete ilu;
+        return std::make_shared<SeqILUn<M,X,Y>>
+          (args.getMatrix(), args.getN(), args.getArgs().relaxationFactor);
       }
-
     };
 DUNE_NO_DEPRECATED_END // for deprecated SeqILUn
 
@@ -337,17 +308,11 @@ DUNE_NO_DEPRECATED_END // for deprecated SeqILUn
     {
       typedef ConstructionArgs<SeqILU<M,X,Y> > Arguments;
 
-      static inline SeqILU<M,X,Y>* construct(Arguments& args)
-      {
-        return new SeqILU<M,X,Y>(args.getMatrix(), args.getN(),
-                                 args.getArgs().relaxationFactor);
-      }
-
-      static void deconstruct(SeqILU<M,X,Y>* ilu)
+      static inline std::shared_ptr<SeqILU<M,X,Y>> construct(Arguments& args)
       {
-        delete ilu;
+        return std::make_shared<SeqILU<M,X,Y>>
+          (args.getMatrix(), args.getN(), args.getArgs().relaxationFactor);
       }
-
     };
 
     /**
@@ -358,15 +323,11 @@ DUNE_NO_DEPRECATED_END // for deprecated SeqILUn
     {
       typedef DefaultParallelConstructionArgs<M,C> Arguments;
 
-      static inline ParSSOR<M,X,Y,C>* construct(Arguments& args)
-      {
-        return new ParSSOR<M,X,Y,C>(args.getMatrix(), args.getArgs().iterations,
-                                    args.getArgs().relaxationFactor,
-                                    args.getComm());
-      }
-      static inline void deconstruct(ParSSOR<M,X,Y,C>* ssor)
+      static inline std::shared_ptr<ParSSOR<M,X,Y,C>> construct(Arguments& args)
       {
-        delete ssor;
+        return std::make_shared<ParSSOR<M,X,Y,C>>
+          (args.getMatrix(), args.getArgs().iterations,
+           args.getArgs().relaxationFactor, args.getComm());
       }
     };
 
@@ -375,18 +336,11 @@ DUNE_NO_DEPRECATED_END // for deprecated SeqILUn
     {
       typedef DefaultParallelConstructionArgs<T,C> Arguments;
       typedef ConstructionTraits<T> SeqConstructionTraits;
-      static inline BlockPreconditioner<X,Y,C,T>* construct(Arguments& args)
+      static inline std::shared_ptr<BlockPreconditioner<X,Y,C,T>> construct(Arguments& args)
       {
-        return new BlockPreconditioner<X,Y,C,T>(*SeqConstructionTraits::construct(args),
-                                                args.getComm());
+        auto seqPrec = SeqConstructionTraits::construct(args);
+        return std::make_shared<BlockPreconditioner<X,Y,C,T>> (seqPrec, args.getComm());
       }
-
-      static inline void deconstruct(BlockPreconditioner<X,Y,C,T>* bp)
-      {
-        SeqConstructionTraits::deconstruct(static_cast<T*>(&bp->preconditioner));
-        delete bp;
-      }
-
     };
 
     template<class C, class T>
@@ -394,18 +348,11 @@ DUNE_NO_DEPRECATED_END // for deprecated SeqILUn
     {
       typedef DefaultParallelConstructionArgs<T,C> Arguments;
       typedef ConstructionTraits<T> SeqConstructionTraits;
-      static inline NonoverlappingBlockPreconditioner<C,T>* construct(Arguments& args)
-      {
-        return new NonoverlappingBlockPreconditioner<C,T>(*SeqConstructionTraits::construct(args),
-                                                          args.getComm());
-      }
-
-      static inline void deconstruct(NonoverlappingBlockPreconditioner<C,T>* bp)
+      static inline std::shared_ptr<NonoverlappingBlockPreconditioner<C,T>> construct(Arguments& args)
       {
-        SeqConstructionTraits::deconstruct(static_cast<T*>(&bp->preconditioner));
-        delete bp;
+        auto seqPrec = SeqConstructionTraits::construct(args);
+        return std::make_shared<NonoverlappingBlockPreconditioner<C,T>> (seqPrec, args.getComm());
       }
-
     };
 
     /**
@@ -907,17 +854,13 @@ DUNE_NO_DEPRECATED_END // for deprecated SeqILUn
     {
       typedef ConstructionArgs<SeqOverlappingSchwarz<M,X,TM,TS,TA> > Arguments;
 
-      static inline SeqOverlappingSchwarz<M,X,TM,TS,TA>* construct(Arguments& args)
-      {
-        return new SeqOverlappingSchwarz<M,X,TM,TS,TA>(args.getMatrix(),
-                                                       args.getSubDomains(),
-                                                       args.getArgs().relaxationFactor,
-                                                       args.getArgs().onthefly);
-      }
-
-      static void deconstruct(SeqOverlappingSchwarz<M,X,TM,TS,TA>* schwarz)
+      static inline std::shared_ptr<SeqOverlappingSchwarz<M,X,TM,TS,TA>> construct(Arguments& args)
       {
-        delete schwarz;
+        return std::make_shared<SeqOverlappingSchwarz<M,X,TM,TS,TA>>
+          (args.getMatrix(),
+           args.getSubDomains(),
+           args.getArgs().relaxationFactor,
+           args.getArgs().onthefly);
       }
     };
 
diff --git a/dune/istl/paamg/test/hierarchytest.cc b/dune/istl/paamg/test/hierarchytest.cc
index d7de15259ca5b465f158109f2cf0f53bec76281a..3ac29419f4577f5b4ee70c4571f3c8e4e3829aed 100644
--- a/dune/istl/paamg/test/hierarchytest.cc
+++ b/dune/istl/paamg/test/hierarchytest.cc
@@ -4,7 +4,7 @@
 
 #include "mpi.h"
 #include <dune/common/parallel/mpicollectivecommunication.hh>
-#include <dune/istl/paamg/hierarchy.hh>
+#include <dune/istl/paamg/matrixhierarchy.hh>
 #include <dune/istl/paamg/smoother.hh>
 #include <dune/istl/preconditioners.hh>
 #include <dune/istl/owneroverlapcopy.hh>
@@ -43,8 +43,11 @@ void testHierarchy(int N)
   typedef Dune::Amg::Hierarchy<Vector> VHierarchy;
 
   Operator op(mat, pinfo);
-  Hierarchy hierarchy(op, pinfo);
-  VHierarchy vh(b);
+  Hierarchy hierarchy(
+    Dune::stackobject_to_shared_ptr(op),
+    Dune::stackobject_to_shared_ptr(pinfo));
+  VHierarchy vh(
+    Dune::stackobject_to_shared_ptr(b));
 
   typedef Dune::Amg::CoarsenCriterion<Dune::Amg::SymmetricCriterion<BCRSMat,Dune::Amg::FirstDiagonal> >
   Criterion;
@@ -71,6 +74,7 @@ void testHierarchy(int N)
 
 int main(int argc, char** argv)
 {
+  #warning change to use MPI abstractions...
   MPI_Init(&argc, &argv);
 
   const int BS=1;
diff --git a/dune/istl/paamg/test/pthreadamgtest.cc b/dune/istl/paamg/test/pthreadamgtest.cc
index 8f5bd85d6f497704a6de8dcf8308834a682131c4..745c1d403e01bdc01686b776ca935fa9a0008c5f 100644
--- a/dune/istl/paamg/test/pthreadamgtest.cc
+++ b/dune/istl/paamg/test/pthreadamgtest.cc
@@ -96,15 +96,6 @@ void *solve(void* arg)
   pthread_exit(NULL);
 }
 
-void *solve1(void* arg)
-{
-  thread_arg *amgarg=(thread_arg*) arg;
-  *amgarg->x=0;
-  (*amgarg->amg).apply(*amgarg->x,*amgarg->b);
-  (*amgarg->amg).post(*amgarg->x);
-  return 0;
-}
-
 void *solve2(void* arg)
 {
   thread_arg *amgarg=(thread_arg*) arg;
@@ -199,21 +190,6 @@ void testAMG(int N, int coarsenTarget, int ml)
   }
   void* retval;
 
-  for(int i=0; i < NUM_THREADS; ++i)
-    pthread_join(threads[i], &retval);
-
-  amgs.clear();
-  args.clear();
-  amg.pre(x, b);
-  amgs.resize(NUM_THREADS, amg);
-  for(int i=0; i < NUM_THREADS; ++i)
-  {
-    args[i].amg=&amgs[i];
-    args[i].b=&bs[i];
-    args[i].x=&xs[i];
-    args[i].fop=&fop;
-    pthread_create(&threads[i], NULL, solve1, (void*) &args[i]);
-  }
   for(int i=0; i < NUM_THREADS; ++i)
     pthread_join(threads[i], &retval);
 
diff --git a/dune/istl/paamg/test/pthreadtwoleveltest.cc b/dune/istl/paamg/test/pthreadtwoleveltest.cc
index d96428eb8826203524cafea85968f1f10478bab2..2f8f115e23dd969a32c06db7ee4787939879d051 100644
--- a/dune/istl/paamg/test/pthreadtwoleveltest.cc
+++ b/dune/istl/paamg/test/pthreadtwoleveltest.cc
@@ -77,15 +77,6 @@ void *solve(void* arg)
   pthread_exit(NULL);
 }
 
-void *solve1(void* arg)
-{
-  thread_arg *amgarg=(thread_arg*) arg;
-  *amgarg->x=0;
-  (*amgarg->amg).apply(*amgarg->x,*amgarg->b);
-  (*amgarg->amg).post(*amgarg->x);
-  return 0;
-}
-
 void *solve2(void* arg)
 {
   thread_arg *amgarg=(thread_arg*) arg;
@@ -173,21 +164,6 @@ void testTwoLevelMethod()
   }
   void* retval;
 
-  for(int i=0; i < NUM_THREADS; ++i)
-    pthread_join(threads[i], &retval);
-
-  amgs.clear();
-  args.clear();
-  preconditioner.pre(x, b);
-  amgs.resize(NUM_THREADS, preconditioner);
-  for(int i=0; i < NUM_THREADS; ++i)
-  {
-    args[i].amg=&amgs[i];
-    args[i].b=&bs[i];
-    args[i].x=&xs[i];
-    args[i].fop=&fop;
-    pthread_create(&threads[i], NULL, solve1, (void*) &args[i]);
-  }
   for(int i=0; i < NUM_THREADS; ++i)
     pthread_join(threads[i], &retval);
 
diff --git a/dune/istl/paamg/twolevelmethod.hh b/dune/istl/paamg/twolevelmethod.hh
index 6d82ae31afd209560a8225607485139e2389d7e6..abdfcab6801a97327ae672612d388eab3aef3c54 100644
--- a/dune/istl/paamg/twolevelmethod.hh
+++ b/dune/istl/paamg/twolevelmethod.hh
@@ -162,7 +162,7 @@ public:
     PropertiesGraph pg(mg,Dune::IdentityMap(),Dune::IdentityMap());
     typedef NegateSet<typename ParallelInformation::OwnerSet> OverlapFlags;
 
-    aggregatesMap_.reset(new AggregatesMap(pg.maxVertex()+1));
+    aggregatesMap_ = std::make_shared<AggregatesMap>(pg.maxVertex()+1);
 
     int noAggregates, isoAggregates, oneAggregates, skippedAggregates;
 
@@ -194,7 +194,7 @@ public:
     productBuilder.calculate(fineOperator.getmat(), *aggregatesMap_, *matrix_, pinfo, OverlapFlags());
     this->lhs_.resize(this->matrix_->M());
     this->rhs_.resize(this->matrix_->N());
-    this->operator_.reset(new O(*matrix_));
+    this->operator_ = std::make_shared<O>(*matrix_);
   }
 
   void moveToCoarseLevel(const typename FatherType::FineRangeType& fineRhs)
diff --git a/dune/istl/repartition.hh b/dune/istl/repartition.hh
index cc61b88efa60636fb23e60ae3bad466c98445283..05863afbd894ec2c389f815ecd8b101c6a72bf5b 100644
--- a/dune/istl/repartition.hh
+++ b/dune/istl/repartition.hh
@@ -783,7 +783,7 @@ namespace Dune
   template<class G, class T1, class T2>
   bool buildCommunication(const G& graph, std::vector<int>& realparts,
                           Dune::OwnerOverlapCopyCommunication<T1,T2>& oocomm,
-                          Dune::OwnerOverlapCopyCommunication<T1,T2>*& outcomm,
+                          std::shared_ptr<Dune::OwnerOverlapCopyCommunication<T1,T2>>& outcomm,
                           RedistributeInterface& redistInf,
                           bool verbose=false);
 #if HAVE_PARMETIS
@@ -855,7 +855,7 @@ namespace Dune
   template<class M, class T1, class T2>
   bool commGraphRepartition(const M& mat, Dune::OwnerOverlapCopyCommunication<T1,T2>& oocomm,
                             Metis::idx_t nparts,
-                            Dune::OwnerOverlapCopyCommunication<T1,T2>*& outcomm,
+                            std::shared_ptr<Dune::OwnerOverlapCopyCommunication<T1,T2>>& outcomm,
                             RedistributeInterface& redistInf,
                             bool verbose=false)
   {
@@ -1264,7 +1264,7 @@ namespace Dune
    */
   template<class G, class T1, class T2>
   bool graphRepartition(const G& graph, Dune::OwnerOverlapCopyCommunication<T1,T2>& oocomm, Metis::idx_t nparts,
-                        Dune::OwnerOverlapCopyCommunication<T1,T2>*& outcomm,
+                        std::shared_ptr<Dune::OwnerOverlapCopyCommunication<T1,T2>>& outcomm,
                         RedistributeInterface& redistInf,
                         bool verbose=false)
   {
@@ -1487,7 +1487,7 @@ namespace Dune
   template<class G, class T1, class T2>
   bool buildCommunication(const G& graph,
                           std::vector<int>& setPartition, Dune::OwnerOverlapCopyCommunication<T1,T2>& oocomm,
-                          Dune::OwnerOverlapCopyCommunication<T1,T2>*& outcomm,
+                          std::shared_ptr<Dune::OwnerOverlapCopyCommunication<T1,T2>>& outcomm,
                           RedistributeInterface& redistInf,
                           bool verbose)
   {
@@ -1758,7 +1758,7 @@ namespace Dune
     MPI_Comm outputComm;
 
     MPI_Comm_split(oocomm.communicator(), color, oocomm.communicator().rank(), &outputComm);
-    outcomm = new OOComm(outputComm,SolverCategory::category(oocomm),true);
+    outcomm = std::make_shared<OOComm>(outputComm,SolverCategory::category(oocomm),true);
 
     // translate neighbor ranks.
     int newrank=outcomm->communicator().rank();
@@ -1919,7 +1919,7 @@ namespace Dune
 #else
   template<class G, class P,class T1, class T2, class R>
   bool graphRepartition(const G& graph, P& oocomm, int nparts,
-                        P*& outcomm,
+                        std::shared_ptr<P>& outcomm,
                         R& redistInf,
                         bool v=false)
   {
@@ -1930,7 +1930,7 @@ namespace Dune
 
   template<class G, class P,class T1, class T2, class R>
   bool commGraphRepartition(const G& graph, P& oocomm, int nparts,
-                            P*& outcomm,
+                            std::shared_ptr<P>& outcomm,
                             R& redistInf,
                             bool v=false)
   {
diff --git a/dune/istl/schwarz.hh b/dune/istl/schwarz.hh
index a81d02b5d2032f7a356c85029c27de24fe7ac350..375525e03c0d011c2634f740bf78d5fde09a6daa 100644
--- a/dune/istl/schwarz.hh
+++ b/dune/istl/schwarz.hh
@@ -294,8 +294,19 @@ namespace Dune {
        \param c The communication object for syncing overlap and copy
        data points. (E.~g. OwnerOverlapCopyCommunication )
      */
-    BlockPreconditioner (T& p, const communication_type& c)
-      : preconditioner(p), communication(c)
+    BlockPreconditioner (Preconditioner<X,Y>& p, const communication_type& c)
+      : _preconditioner(stackobject_to_shared_ptr(p)), _communication(c)
+    {   }
+
+    /*! \brief Constructor.
+
+       constructor gets all parameters to operate the prec.
+       \param p The sequential preconditioner.
+       \param c The communication object for syncing overlap and copy
+       data points. (E.~g. OwnerOverlapCopyCommunication )
+     */
+    BlockPreconditioner (const std::shared_ptr<Preconditioner<X,Y>>& p, const communication_type& c)
+      : _preconditioner(p), _communication(c)
     {   }
 
     /*!
@@ -305,8 +316,8 @@ namespace Dune {
      */
     virtual void pre (X& x, Y& b)
     {
-      communication.copyOwnerToAll(x,x);     // make dirichlet values consistent
-      preconditioner.pre(x,b);
+      _communication.copyOwnerToAll(x,x);     // make dirichlet values consistent
+      _preconditioner->pre(x,b);
     }
 
     /*!
@@ -316,15 +327,15 @@ namespace Dune {
      */
     virtual void apply (X& v, const Y& d)
     {
-      preconditioner.apply(v,d);
-      communication.copyOwnerToAll(v,v);
+      _preconditioner->apply(v,d);
+      _communication.copyOwnerToAll(v,v);
     }
 
     template<bool forward>
     void apply (X& v, const Y& d)
     {
-      preconditioner.template apply<forward>(v,d);
-      communication.copyOwnerToAll(v,v);
+      _preconditioner->template apply<forward>(v,d);
+      _communication.copyOwnerToAll(v,v);
     }
 
     /*!
@@ -334,7 +345,7 @@ namespace Dune {
      */
     virtual void post (X& x)
     {
-      preconditioner.post(x);
+      _preconditioner->post(x);
     }
 
     //! Category of the preconditioner (see SolverCategory::Category)
@@ -345,10 +356,10 @@ namespace Dune {
 
   private:
     //! \brief a sequential preconditioner
-    T& preconditioner;
+    std::shared_ptr<Preconditioner<X,Y>> _preconditioner;
 
     //! \brief the communication object
-    const communication_type& communication;
+    const communication_type& _communication;
   };
 
   /** @} end documentation */
diff --git a/dune/istl/test/matrixredisttest.cc b/dune/istl/test/matrixredisttest.cc
index f2991bc1540486dd32236fb56125d638e590b36d..ba52ae5cf86d7d6591f4bb410a7e03c8dd9f281c 100644
--- a/dune/istl/test/matrixredisttest.cc
+++ b/dune/istl/test/matrixredisttest.cc
@@ -59,7 +59,7 @@ int testRepart(int N, int coarsenTarget)
   typedef typename Dune::Amg::MatrixGraph<BCRSMat> MatrixGraph;
 
   MatrixGraph graph(mat);
-  Communication* coarseComm;
+  std::shared_ptr<Communication> coarseComm;
 
   comm.remoteIndices().template rebuild<false>();
 
@@ -112,7 +112,6 @@ int testRepart(int N, int coarsenTarget)
 
   //if(coarseComm->communicator().rank()==0)
   //Dune::printmatrix(std::cout, newMat, "redist", "row");
-  delete coarseComm;
   return ret;
 }