From 4d7d1a6cff74c524ec34909b01141ad1814dc1af Mon Sep 17 00:00:00 2001
From: Christian Engwer <christi@dune-project.org>
Date: Sat, 26 Jan 2019 22:18:44 +0100
Subject: [PATCH] [amg] split hierarchy.hh

Hierarchy and MatrixHierarchy now have separate headers to improve
readability.
---
 dune/istl/paamg/CMakeLists.txt        |   1 +
 dune/istl/paamg/amg.hh                |   2 +-
 dune/istl/paamg/fastamg.hh            |   2 +-
 dune/istl/paamg/hierarchy.hh          | 956 -------------------------
 dune/istl/paamg/matrixhierarchy.hh    | 974 ++++++++++++++++++++++++++
 dune/istl/paamg/test/hierarchytest.cc |   2 +-
 6 files changed, 978 insertions(+), 959 deletions(-)
 create mode 100644 dune/istl/paamg/matrixhierarchy.hh

diff --git a/dune/istl/paamg/CMakeLists.txt b/dune/istl/paamg/CMakeLists.txt
index 457db831a..4bf5dc147 100644
--- a/dune/istl/paamg/CMakeLists.txt
+++ b/dune/istl/paamg/CMakeLists.txt
@@ -14,6 +14,7 @@ install(FILES
   graph.hh
   graphcreator.hh
   hierarchy.hh
+  matrixhierarchy.hh
   indicescoarsener.hh
   kamg.hh
   parameters.hh
diff --git a/dune/istl/paamg/amg.hh b/dune/istl/paamg/amg.hh
index 24856b23a..e3a520a04 100644
--- a/dune/istl/paamg/amg.hh
+++ b/dune/istl/paamg/amg.hh
@@ -7,7 +7,7 @@
 #include <dune/common/exceptions.hh>
 #include <dune/istl/paamg/smoother.hh>
 #include <dune/istl/paamg/transfer.hh>
-#include <dune/istl/paamg/hierarchy.hh>
+#include <dune/istl/paamg/matrixhierarchy.hh>
 #include <dune/istl/solvers.hh>
 #include <dune/istl/scalarproducts.hh>
 #include <dune/istl/superlu.hh>
diff --git a/dune/istl/paamg/fastamg.hh b/dune/istl/paamg/fastamg.hh
index 3bc4bab8b..2ecf5a4a0 100644
--- a/dune/istl/paamg/fastamg.hh
+++ b/dune/istl/paamg/fastamg.hh
@@ -9,7 +9,7 @@
 #include <dune/common/unused.hh>
 #include <dune/istl/paamg/smoother.hh>
 #include <dune/istl/paamg/transfer.hh>
-#include <dune/istl/paamg/hierarchy.hh>
+#include <dune/istl/paamg/matrixhierarchy.hh>
 #include <dune/istl/solvers.hh>
 #include <dune/istl/scalarproducts.hh>
 #include <dune/istl/superlu.hh>
diff --git a/dune/istl/paamg/hierarchy.hh b/dune/istl/paamg/hierarchy.hh
index f113d0043..1a5bee6ab 100644
--- a/dune/istl/paamg/hierarchy.hh
+++ b/dune/istl/paamg/hierarchy.hh
@@ -6,28 +6,11 @@
 #include <list>
 #include <memory>
 #include <limits>
-#include <algorithm>
-#include <tuple>
-#include "aggregates.hh"
-#include "graph.hh"
-#include "galerkin.hh"
-#include "renumberer.hh"
-#include "graphcreator.hh"
 #include <dune/common/stdstreams.hh>
 #include <dune/common/unused.hh>
 #include <dune/common/timer.hh>
 #include <dune/common/bigunsignedint.hh>
-#include <dune/istl/bvector.hh>
-#include <dune/common/parallel/indexset.hh>
-#include <dune/istl/matrixutils.hh>
-#include <dune/istl/matrixredistribute.hh>
-#include <dune/istl/paamg/dependency.hh>
-#include <dune/istl/paamg/graph.hh>
-#include <dune/istl/paamg/indicescoarsener.hh>
-#include <dune/istl/paamg/globalaggregates.hh>
 #include <dune/istl/paamg/construction.hh>
-#include <dune/istl/paamg/smoother.hh>
-#include <dune/istl/paamg/transfer.hh>
 
 namespace Dune
 {
@@ -43,16 +26,6 @@ namespace Dune
      * @author Markus Blatt
      * @brief Provides a classes representing the hierarchies in AMG.
      */
-    enum {
-      /**
-       * @brief Hard limit for the number of processes allowed.
-       *
-       * This is needed to prevent overflows when calculating
-       * the coarsening rate. Currently set 72,000 which is
-       * enough for JUGENE.
-       */
-      MAX_PROCESSES = 72000
-    };
 
     /**
      * @brief A hierarchy of containers (e.g. matrices or vectors)
@@ -295,935 +268,6 @@ namespace Dune
       int levels_;
     };
 
-    /**
-     * @brief The hierarchies build by the coarsening process.
-     *
-     * Namely a hierarchy of matrices, index sets, remote indices,
-     * interfaces and communicators.
-     */
-    template<class M, class PI, class A=std::allocator<M> >
-    class MatrixHierarchy
-    {
-    public:
-      /** @brief The type of the matrix operator. */
-      typedef M MatrixOperator;
-
-      /** @brief The type of the matrix. */
-      typedef typename MatrixOperator::matrix_type Matrix;
-
-      /** @brief The type of the index set. */
-      typedef PI ParallelInformation;
-
-      /** @brief The allocator to use. */
-      typedef A Allocator;
-
-      /** @brief The type of the aggregates map we use. */
-      typedef Dune::Amg::AggregatesMap<typename MatrixGraph<Matrix>::VertexDescriptor> AggregatesMap;
-
-      /** @brief The type of the parallel matrix hierarchy. */
-      typedef Dune::Amg::Hierarchy<MatrixOperator,Allocator> ParallelMatrixHierarchy;
-
-      /** @brief The type of the parallel informarion hierarchy. */
-      typedef Dune::Amg::Hierarchy<ParallelInformation,Allocator> ParallelInformationHierarchy;
-
-      /** @brief Allocator for pointers. */
-      typedef typename Allocator::template rebind<AggregatesMap*>::other AAllocator;
-
-      /** @brief The type of the aggregates maps list. */
-      typedef std::list<AggregatesMap*,AAllocator> AggregatesMapList;
-
-      /** @brief The type of the redistribute information. */
-      typedef RedistributeInformation<ParallelInformation> RedistributeInfoType;
-
-      /** @brief Allocator for RedistributeInfoType. */
-      typedef typename Allocator::template rebind<RedistributeInfoType>::other RILAllocator;
-
-      /** @brief The type of the list of redistribute information. */
-      typedef std::list<RedistributeInfoType,RILAllocator> RedistributeInfoList;
-
-      /**
-       * @brief Constructor
-       * @param fineMatrix The matrix to coarsen.
-       * @param pinfo The information about the parallel data decomposition at the first level.
-       */
-      MatrixHierarchy(std::shared_ptr<MatrixOperator> fineMatrix,
-        std::shared_ptr<ParallelInformation> pinfo = std::make_shared<ParallelInformation>());
-
-      ~MatrixHierarchy();
-
-      /**
-       * @brief Build the matrix hierarchy using aggregation.
-       *
-       * @brief criterion The criterion describing the aggregation process.
-       */
-      template<typename O, typename T>
-      void build(const T& criterion);
-
-      /**
-       * @brief Recalculate the galerkin products.
-       *
-       * If the data of the fine matrix changes but not its sparsity pattern
-       * this will recalculate all coarser levels without starting the expensive
-       * aggregation process all over again.
-       */
-      template<class F>
-      void recalculateGalerkin(const F& copyFlags);
-
-      /**
-       * @brief Coarsen the vector hierarchy according to the matrix hierarchy.
-       * @param hierarchy The vector hierarchy to coarsen.
-       */
-      template<class V, class BA, class TA>
-      void coarsenVector(Hierarchy<BlockVector<V,BA>, TA>& hierarchy) const;
-
-      /**
-       * @brief Coarsen the smoother hierarchy according to the matrix hierarchy.
-       * @param smoothers The smoother hierarchy to coarsen.
-       * @param args The arguments for the construction of the coarse level smoothers.
-       */
-      template<class S, class TA>
-      void coarsenSmoother(Hierarchy<S,TA>& smoothers,
-                           const typename SmootherTraits<S>::Arguments& args) const;
-
-      /**
-       * @brief Get the number of levels in the hierarchy.
-       * @return The number of levels.
-       */
-      std::size_t levels() const;
-
-      /**
-       * @brief Get the max number of levels in the hierarchy of processors.
-       * @return The maximum number of levels.
-       */
-      std::size_t maxlevels() const;
-
-      bool hasCoarsest() const;
-
-      /**
-       * @brief Whether the hierarchy was built.
-       * @return true if the MatrixHierarchy::build method was called.
-       */
-      bool isBuilt() const;
-
-      /**
-       * @brief Get the matrix hierarchy.
-       * @return The matrix hierarchy.
-       */
-      const ParallelMatrixHierarchy& matrices() const;
-
-      /**
-       * @brief Get the hierarchy of the parallel data distribution information.
-       * @return The hierarchy of the parallel data distribution information.
-       */
-      const ParallelInformationHierarchy& parallelInformation() const;
-
-      /**
-       * @brief Get the hierarchy of the mappings of the nodes onto aggregates.
-       * @return The hierarchy of the mappings of the nodes onto aggregates.
-       */
-      const AggregatesMapList& aggregatesMaps() const;
-
-      /**
-       * @brief Get the hierarchy of the information about redistributions,
-       * @return The hierarchy of the information about redistributions of the
-       * data to fewer processes.
-       */
-      const RedistributeInfoList& redistributeInformation() const;
-
-      double getProlongationDampingFactor() const
-      {
-        return prolongDamp_;
-      }
-
-      /**
-       * @brief Get the mapping of fine level unknowns to coarse level
-       * aggregates.
-       *
-       * For each fine level unknown i the correcponding data[i] is the
-       * aggregate it belongs to on the coarsest level.
-       *
-       * @param[out] data The mapping of fine level unknowns to coarse level
-       * aggregates.
-       */
-      void getCoarsestAggregatesOnFinest(std::vector<std::size_t>& data) const;
-
-    private:
-      typedef typename ConstructionTraits<MatrixOperator>::Arguments MatrixArgs;
-      typedef typename ConstructionTraits<ParallelInformation>::Arguments CommunicationArgs;
-      /** @brief The list of aggregates maps. */
-      AggregatesMapList aggregatesMaps_;
-      /** @brief The list of redistributes. */
-      RedistributeInfoList redistributes_;
-      /** @brief The hierarchy of parallel matrices. */
-      ParallelMatrixHierarchy matrices_;
-      /** @brief The hierarchy of the parallel information. */
-      ParallelInformationHierarchy parallelInformation_;
-
-      /** @brief Whether the hierarchy was built. */
-      bool built_;
-
-      /** @brief The maximum number of level across all processors.*/
-      int maxlevels_;
-
-      double prolongDamp_;
-
-      /**
-       * @brief functor to print matrix statistics.
-       */
-      template<class Matrix, bool print>
-      struct MatrixStats
-      {
-
-        /**
-         * @brief Print matrix statistics.
-         */
-        static void stats(const Matrix& matrix)
-        {
-          DUNE_UNUSED_PARAMETER(matrix);
-        }
-      };
-
-      template<class Matrix>
-      struct MatrixStats<Matrix,true>
-      {
-        struct calc
-        {
-          typedef typename Matrix::size_type size_type;
-          typedef typename Matrix::row_type matrix_row;
-
-          calc()
-          {
-            min=std::numeric_limits<size_type>::max();
-            max=0;
-            sum=0;
-          }
-
-          void operator()(const matrix_row& row)
-          {
-            min=std::min(min, row.size());
-            max=std::max(max, row.size());
-            sum += row.size();
-          }
-
-          size_type min;
-          size_type max;
-          size_type sum;
-        };
-        /**
-         * @brief Print matrix statistics.
-         */
-        static void stats(const Matrix& matrix)
-        {
-          calc c= for_each(matrix.begin(), matrix.end(), calc());
-          dinfo<<"Matrix row: min="<<c.min<<" max="<<c.max
-               <<" average="<<static_cast<double>(c.sum)/matrix.N()
-               <<std::endl;
-        }
-      };
-    };
-
-    /**
-     * @brief The criterion describing the stop criteria for the coarsening process.
-     */
-    template<class T>
-    class CoarsenCriterion : public T
-    {
-    public:
-      /**
-       * @brief The criterion for tagging connections as strong and nodes as isolated.
-       * This might be e.g. SymmetricCriterion or UnSymmetricCriterion.
-       */
-      typedef T AggregationCriterion;
-
-      /**
-       * @brief Constructor
-       * @param maxLevel The maximum number of levels allowed in the matrix hierarchy (default: 100).
-       * @param coarsenTarget If the number of nodes in the matrix is below this threshold the
-       * coarsening will stop (default: 1000).
-       * @param minCoarsenRate If the coarsening rate falls below this threshold the
-       * coarsening will stop (default: 1.2)
-       * @param prolongDamp The damping factor to apply to the prolongated update (default: 1.6)
-       * @param accumulate Whether to accumulate the data onto fewer processors on coarser levels.
-       */
-      CoarsenCriterion(int maxLevel=100, int coarsenTarget=1000, double minCoarsenRate=1.2,
-                       double prolongDamp=1.6, AccumulationMode accumulate=successiveAccu)
-        : AggregationCriterion(Dune::Amg::Parameters(maxLevel, coarsenTarget, minCoarsenRate, prolongDamp, accumulate))
-      {}
-
-      CoarsenCriterion(const Dune::Amg::Parameters& parms)
-        : AggregationCriterion(parms)
-      {}
-
-    };
-
-    template<typename M, typename C1>
-    bool repartitionAndDistributeMatrix(const M& origMatrix,
-                                        std::shared_ptr<M> newMatrix,
-                                        SequentialInformation& origComm,
-                                        std::shared_ptr<SequentialInformation>& newComm,
-                                        RedistributeInformation<SequentialInformation>& ri,
-                                        int nparts, C1& criterion)
-    {
-      DUNE_UNUSED_PARAMETER(origMatrix);
-      DUNE_UNUSED_PARAMETER(newMatrix);
-      DUNE_UNUSED_PARAMETER(origComm);
-      DUNE_UNUSED_PARAMETER(newComm);
-      DUNE_UNUSED_PARAMETER(ri);
-      DUNE_UNUSED_PARAMETER(nparts);
-      DUNE_UNUSED_PARAMETER(criterion);
-      DUNE_THROW(NotImplemented, "Redistribution does not make sense in sequential code!");
-    }
-
-
-    #warning make origComm const
-    template<typename M, typename C, typename C1>
-    bool repartitionAndDistributeMatrix(const M& origMatrix,
-                                        std::shared_ptr<M> newMatrix,
-                                        C& origComm,
-                                        std::shared_ptr<C>& newComm,
-                                        RedistributeInformation<C>& ri,
-                                        int nparts, C1& criterion)
-    {
-      Timer time;
-#ifdef AMG_REPART_ON_COMM_GRAPH
-      // Done not repartition the matrix graph, but a graph of the communication scheme.
-      bool existentOnRedist=Dune::commGraphRepartition(origMatrix, origComm, nparts, newComm,
-                                                       ri.getInterface(),
-                                                       criterion.debugLevel()>1);
-
-#else
-      typedef Dune::Amg::MatrixGraph<const M> MatrixGraph;
-      typedef Dune::Amg::PropertiesGraph<MatrixGraph,
-          VertexProperties,
-          EdgeProperties,
-          IdentityMap,
-          IdentityMap> PropertiesGraph;
-      MatrixGraph graph(origMatrix);
-      PropertiesGraph pgraph(graph);
-      buildDependency(pgraph, origMatrix, criterion, false);
-
-#ifdef DEBUG_REPART
-      if(origComm.communicator().rank()==0)
-        std::cout<<"Original matrix"<<std::endl;
-      origComm.communicator().barrier();
-      printGlobalSparseMatrix(origMatrix, origComm, std::cout);
-#endif
-      bool existentOnRedist=Dune::graphRepartition(pgraph, origComm, nparts,
-                                                   newComm, ri.getInterface(),
-                                                   criterion.debugLevel()>1);
-#endif // if else AMG_REPART
-
-      if(origComm.communicator().rank()==0  && criterion.debugLevel()>1)
-        std::cout<<"Repartitioning took "<<time.elapsed()<<" seconds."<<std::endl;
-
-      ri.setSetup();
-
-#ifdef DEBUG_REPART
-      ri.checkInterface(origComm.indexSet(), newComm->indexSet(), origComm.communicator());
-#endif
-
-      redistributeMatrix(const_cast<M&>(origMatrix), *newMatrix, origComm, *newComm, ri);
-
-#ifdef DEBUG_REPART
-      if(origComm.communicator().rank()==0)
-        std::cout<<"Original matrix"<<std::endl;
-      origComm.communicator().barrier();
-      if(newComm->communicator().size()>0)
-        printGlobalSparseMatrix(*newMatrix, *newComm, std::cout);
-      origComm.communicator().barrier();
-#endif
-
-      if(origComm.communicator().rank()==0  && criterion.debugLevel()>1)
-        std::cout<<"Redistributing matrix took "<<time.elapsed()<<" seconds."<<std::endl;
-      return existentOnRedist;
-
-    }
-
-    // template<typename M>
-    // bool repartitionAndDistributeMatrix(M& origMatrix, M& newMatrix,
-    //                                     SequentialInformation& origComm,
-    //                                     SequentialInformation& newComm,
-    //                                     RedistributeInformation<SequentialInformation>& ri)
-    // {
-    //   return true;
-    // }
-
-    template<class M, class IS, class A>
-    MatrixHierarchy<M,IS,A>::MatrixHierarchy(std::shared_ptr<MatrixOperator> fineMatrix,
-                                             std::shared_ptr<ParallelInformation> pinfo)
-      : matrices_(fineMatrix),
-        parallelInformation_(pinfo)
-    {
-      if (SolverCategory::category(*fineMatrix) != SolverCategory::category(*pinfo))
-        DUNE_THROW(ISTLError, "MatrixOperator and ParallelInformation must belong to the same category!");
-    }
-
-    template<class M, class IS, class A>
-    template<typename O, typename T>
-    void MatrixHierarchy<M,IS,A>::build(const T& criterion)
-    {
-      prolongDamp_ = criterion.getProlongationDampingFactor();
-      typedef O OverlapFlags;
-      typedef typename ParallelMatrixHierarchy::Iterator MatIterator;
-      typedef typename ParallelInformationHierarchy::Iterator PInfoIterator;
-
-      static const int noints=(Dune::Amg::MAX_PROCESSES/4096>0) ? (Dune::Amg::MAX_PROCESSES/4096) : 1;
-
-      typedef bigunsignedint<sizeof(int)*8*noints> BIGINT;
-      GalerkinProduct<ParallelInformation> productBuilder;
-      MatIterator mlevel = matrices_.finest();
-      MatrixStats<typename M::matrix_type,MINIMAL_DEBUG_LEVEL<=INFO_DEBUG_LEVEL>::stats(mlevel->getmat());
-
-      PInfoIterator infoLevel = parallelInformation_.finest();
-      BIGINT finenonzeros=countNonZeros(mlevel->getmat());
-      finenonzeros = infoLevel->communicator().sum(finenonzeros);
-      BIGINT allnonzeros = finenonzeros;
-
-
-      int level = 0;
-      int rank = 0;
-
-      BIGINT unknowns = mlevel->getmat().N();
-
-      unknowns = infoLevel->communicator().sum(unknowns);
-      double dunknowns=unknowns.todouble();
-      infoLevel->buildGlobalLookup(mlevel->getmat().N());
-      redistributes_.push_back(RedistributeInfoType());
-
-      for(; level < criterion.maxLevel(); ++level, ++mlevel) {
-        assert(matrices_.levels()==redistributes_.size());
-        rank = infoLevel->communicator().rank();
-        if(rank==0 && criterion.debugLevel()>1)
-          std::cout<<"Level "<<level<<" has "<<dunknowns<<" unknowns, "<<dunknowns/infoLevel->communicator().size()
-                   <<" unknowns per proc (procs="<<infoLevel->communicator().size()<<")"<<std::endl;
-
-        MatrixOperator* matrix=&(*mlevel);
-        ParallelInformation* info =&(*infoLevel);
-
-        if((
-#if HAVE_PARMETIS
-             criterion.accumulate()==successiveAccu
-#else
-             false
-#endif
-             || (criterion.accumulate()==atOnceAccu
-                 && dunknowns < 30*infoLevel->communicator().size()))
-           && infoLevel->communicator().size()>1 &&
-           dunknowns/infoLevel->communicator().size() <= criterion.coarsenTarget())
-        {
-          // accumulate to fewer processors
-          std::shared_ptr<Matrix> redistMat = std::make_shared<Matrix>();
-          std::shared_ptr<ParallelInformation> redistComm;
-          std::size_t nodomains = (std::size_t)std::ceil(dunknowns/(criterion.minAggregateSize()
-                                                                    *criterion.coarsenTarget()));
-          if( nodomains<=criterion.minAggregateSize()/2 ||
-              dunknowns <= criterion.coarsenTarget() )
-            nodomains=1;
-
-          bool existentOnNextLevel =
-            repartitionAndDistributeMatrix(mlevel->getmat(), redistMat, *infoLevel,
-                                           redistComm, redistributes_.back(), nodomains,
-                                           criterion);
-          BIGINT unknownsRedist = redistMat->N();
-          unknownsRedist = infoLevel->communicator().sum(unknownsRedist);
-          dunknowns= unknownsRedist.todouble();
-          if(redistComm->communicator().rank()==0 && criterion.debugLevel()>1)
-            std::cout<<"Level "<<level<<" (redistributed) has "<<dunknowns<<" unknowns, "<<dunknowns/redistComm->communicator().size()
-                     <<" unknowns per proc (procs="<<redistComm->communicator().size()<<")"<<std::endl;
-          MatrixArgs args(*redistMat, *redistComm);
-          mlevel.addRedistributed(ConstructionTraits<MatrixOperator>::construct(args));
-          assert(mlevel.isRedistributed());
-          infoLevel.addRedistributed(redistComm);
-          infoLevel->freeGlobalLookup();
-
-          if(!existentOnNextLevel)
-            // We do not hold any data on the redistributed partitioning
-            break;
-
-          // Work on the redistributed Matrix from now on
-          matrix = &(mlevel.getRedistributed());
-          info = &(infoLevel.getRedistributed());
-          info->buildGlobalLookup(matrix->getmat().N());
-        }
-
-        rank = info->communicator().rank();
-        if(dunknowns <= criterion.coarsenTarget())
-          // No further coarsening needed
-          break;
-
-        typedef PropertiesGraphCreator<MatrixOperator,ParallelInformation> GraphCreator;
-        typedef typename GraphCreator::PropertiesGraph PropertiesGraph;
-        typedef typename GraphCreator::GraphTuple GraphTuple;
-
-        typedef typename PropertiesGraph::VertexDescriptor Vertex;
-
-        std::vector<bool> excluded(matrix->getmat().N(), false);
-
-        GraphTuple graphs = GraphCreator::create(*matrix, excluded, *info, OverlapFlags());
-
-        AggregatesMap* aggregatesMap=new AggregatesMap(std::get<1>(graphs)->maxVertex()+1);
-
-        aggregatesMaps_.push_back(aggregatesMap);
-
-        Timer watch;
-        watch.reset();
-        int noAggregates, isoAggregates, oneAggregates, skippedAggregates;
-
-        std::tie(noAggregates, isoAggregates, oneAggregates, skippedAggregates) =
-          aggregatesMap->buildAggregates(matrix->getmat(), *(std::get<1>(graphs)), criterion, level==0);
-
-        if(rank==0 && criterion.debugLevel()>2)
-          std::cout<<" Have built "<<noAggregates<<" aggregates totally ("<<isoAggregates<<" isolated aggregates, "<<
-          oneAggregates<<" aggregates of one vertex,  and skipped "<<
-          skippedAggregates<<" aggregates)."<<std::endl;
-#ifdef TEST_AGGLO
-        {
-          // calculate size of local matrix in the distributed direction
-          int start, end, overlapStart, overlapEnd;
-          int procs=info->communicator().rank();
-          int n = UNKNOWNS/procs; // number of unknowns per process
-          int bigger = UNKNOWNS%procs; // number of process with n+1 unknows
-
-          // Compute owner region
-          if(rank<bigger) {
-            start = rank*(n+1);
-            end   = (rank+1)*(n+1);
-          }else{
-            start = bigger + rank * n;
-            end   = bigger + (rank + 1) * n;
-          }
-
-          // Compute overlap region
-          if(start>0)
-            overlapStart = start - 1;
-          else
-            overlapStart = start;
-
-          if(end<UNKNOWNS)
-            overlapEnd = end + 1;
-          else
-            overlapEnd = end;
-
-          assert((UNKNOWNS)*(overlapEnd-overlapStart)==aggregatesMap->noVertices());
-          for(int j=0; j< UNKNOWNS; ++j)
-            for(int i=0; i < UNKNOWNS; ++i)
-            {
-              if(i>=overlapStart && i<overlapEnd)
-              {
-                int no = (j/2)*((UNKNOWNS)/2)+i/2;
-                (*aggregatesMap)[j*(overlapEnd-overlapStart)+i-overlapStart]=no;
-              }
-            }
-        }
-#endif
-        if(criterion.debugLevel()>1 && info->communicator().rank()==0)
-          std::cout<<"aggregating finished."<<std::endl;
-
-        BIGINT gnoAggregates=noAggregates;
-        gnoAggregates = info->communicator().sum(gnoAggregates);
-        double dgnoAggregates = gnoAggregates.todouble();
-#ifdef TEST_AGGLO
-        BIGINT gnoAggregates=((UNKNOWNS)/2)*((UNKNOWNS)/2);
-#endif
-
-        if(criterion.debugLevel()>2 && rank==0)
-          std::cout << "Building "<<dgnoAggregates<<" aggregates took "<<watch.elapsed()<<" seconds."<<std::endl;
-
-        if(dgnoAggregates==0 || dunknowns/dgnoAggregates<criterion.minCoarsenRate())
-        {
-          if(rank==0)
-          {
-            if(dgnoAggregates>0)
-              std::cerr << "Stopped coarsening because of rate breakdown "<<dunknowns<<"/"<<dgnoAggregates
-                        <<"="<<dunknowns/dgnoAggregates<<"<"
-                        <<criterion.minCoarsenRate()<<std::endl;
-            else
-              std::cerr<< "Could not build any aggregates. Probably no connected nodes."<<std::endl;
-          }
-          aggregatesMap->free();
-          delete aggregatesMap;
-          aggregatesMaps_.pop_back();
-
-          if(criterion.accumulate() && mlevel.isRedistributed() && info->communicator().size()>1) {
-            // coarse level matrix was already redistributed, but to more than 1 process
-            // Therefore need to delete the redistribution. Further down it will
-            // then be redistributed to 1 process
-            delete &(mlevel.getRedistributed().getmat());
-            mlevel.deleteRedistributed();
-            delete &(infoLevel.getRedistributed());
-            infoLevel.deleteRedistributed();
-            redistributes_.back().resetSetup();
-          }
-
-          break;
-        }
-        unknowns =  noAggregates;
-        dunknowns = dgnoAggregates;
-
-        CommunicationArgs commargs(info->communicator(),info->category());
-        parallelInformation_.addCoarser(commargs);
-
-        ++infoLevel; // parallel information on coarse level
-
-        typename PropertyMapTypeSelector<VertexVisitedTag,PropertiesGraph>::Type visitedMap =
-          get(VertexVisitedTag(), *(std::get<1>(graphs)));
-
-        watch.reset();
-        int aggregates = IndicesCoarsener<ParallelInformation,OverlapFlags>
-                         ::coarsen(*info,
-                                   *(std::get<1>(graphs)),
-                                   visitedMap,
-                                   *aggregatesMap,
-                                   *infoLevel,
-                                   noAggregates);
-        GraphCreator::free(graphs);
-
-        if(criterion.debugLevel()>2) {
-          if(rank==0)
-            std::cout<<"Coarsening of index sets took "<<watch.elapsed()<<" seconds."<<std::endl;
-        }
-
-        watch.reset();
-
-        infoLevel->buildGlobalLookup(aggregates);
-        AggregatesPublisher<Vertex,OverlapFlags,ParallelInformation>::publish(*aggregatesMap,
-                                                                              *info,
-                                                                              infoLevel->globalLookup());
-
-
-        if(criterion.debugLevel()>2) {
-          if(rank==0)
-            std::cout<<"Communicating global aggregate numbers took "<<watch.elapsed()<<" seconds."<<std::endl;
-        }
-
-        watch.reset();
-        std::vector<bool>& visited=excluded;
-
-        typedef std::vector<bool>::iterator Iterator;
-        typedef IteratorPropertyMap<Iterator, IdentityMap> VisitedMap2;
-        Iterator end = visited.end();
-        for(Iterator iter= visited.begin(); iter != end; ++iter)
-          *iter=false;
-
-        VisitedMap2 visitedMap2(visited.begin(), Dune::IdentityMap());
-
-        typename MatrixOperator::matrix_type* coarseMatrix;
-
-        coarseMatrix = productBuilder.build(*(std::get<0>(graphs)), visitedMap2,
-                                            *info,
-                                            *aggregatesMap,
-                                            aggregates,
-                                            OverlapFlags());
-        dverb<<"Building of sparsity pattern took "<<watch.elapsed()<<std::endl;
-        watch.reset();
-        info->freeGlobalLookup();
-
-        delete std::get<0>(graphs);
-        productBuilder.calculate(matrix->getmat(), *aggregatesMap, *coarseMatrix, *infoLevel, OverlapFlags());
-
-        if(criterion.debugLevel()>2) {
-          if(rank==0)
-            std::cout<<"Calculation entries of Galerkin product took "<<watch.elapsed()<<" seconds."<<std::endl;
-        }
-
-        BIGINT nonzeros = countNonZeros(*coarseMatrix);
-        allnonzeros = allnonzeros + infoLevel->communicator().sum(nonzeros);
-        MatrixArgs args(*coarseMatrix, *infoLevel);
-
-        matrices_.addCoarser(args);
-        redistributes_.push_back(RedistributeInfoType());
-      } // end level loop
-
-
-      infoLevel->freeGlobalLookup();
-
-      built_=true;
-      AggregatesMap* aggregatesMap=new AggregatesMap(0);
-      aggregatesMaps_.push_back(aggregatesMap);
-
-      if(criterion.debugLevel()>0) {
-        if(level==criterion.maxLevel()) {
-          BIGINT unknownsLevel = mlevel->getmat().N();
-          unknownsLevel = infoLevel->communicator().sum(unknownsLevel);
-          double dunknownsLevel = unknownsLevel.todouble();
-          if(rank==0 && criterion.debugLevel()>1) {
-            std::cout<<"Level "<<level<<" has "<<dunknownsLevel<<" unknowns, "<<dunknownsLevel/infoLevel->communicator().size()
-                     <<" unknowns per proc (procs="<<infoLevel->communicator().size()<<")"<<std::endl;
-          }
-        }
-      }
-
-      if(criterion.accumulate() && !redistributes_.back().isSetup() &&
-         infoLevel->communicator().size()>1) {
-#if HAVE_MPI && !HAVE_PARMETIS
-        if(criterion.accumulate()==successiveAccu &&
-           infoLevel->communicator().rank()==0)
-          std::cerr<<"Successive accumulation of data on coarse levels only works with ParMETIS installed."
-                   <<"  Fell back to accumulation to one domain on coarsest level"<<std::endl;
-#endif
-
-        // accumulate to fewer processors
-        std::shared_ptr<Matrix> redistMat = std::make_shared<Matrix>();
-        std::shared_ptr<ParallelInformation> redistComm;
-        int nodomains = 1;
-
-        repartitionAndDistributeMatrix(mlevel->getmat(), redistMat, *infoLevel,
-                                       redistComm, redistributes_.back(), nodomains,criterion);
-        MatrixArgs args(*redistMat, *redistComm);
-        BIGINT unknownsRedist = redistMat->N();
-        unknownsRedist = infoLevel->communicator().sum(unknownsRedist);
-
-        if(redistComm->communicator().rank()==0 && criterion.debugLevel()>1) {
-          double dunknownsRedist = unknownsRedist.todouble();
-          std::cout<<"Level "<<level<<" redistributed has "<<dunknownsRedist<<" unknowns, "<<dunknownsRedist/redistComm->communicator().size()
-                   <<" unknowns per proc (procs="<<redistComm->communicator().size()<<")"<<std::endl;
-        }
-        mlevel.addRedistributed(ConstructionTraits<MatrixOperator>::construct(args));
-        infoLevel.addRedistributed(redistComm);
-        infoLevel->freeGlobalLookup();
-      }
-
-      int levels = matrices_.levels();
-      maxlevels_ = parallelInformation_.finest()->communicator().max(levels);
-      assert(matrices_.levels()==redistributes_.size());
-      if(hasCoarsest() && rank==0 && criterion.debugLevel()>1)
-        std::cout<<"operator complexity: "<<allnonzeros.todouble()/finenonzeros.todouble()<<std::endl;
-
-    }
-
-    template<class M, class IS, class A>
-    const typename MatrixHierarchy<M,IS,A>::ParallelMatrixHierarchy&
-    MatrixHierarchy<M,IS,A>::matrices() const
-    {
-      return matrices_;
-    }
-
-    template<class M, class IS, class A>
-    const typename MatrixHierarchy<M,IS,A>::ParallelInformationHierarchy&
-    MatrixHierarchy<M,IS,A>::parallelInformation() const
-    {
-      return parallelInformation_;
-    }
-
-    template<class M, class IS, class A>
-    void MatrixHierarchy<M,IS,A>::getCoarsestAggregatesOnFinest(std::vector<std::size_t>& data) const
-    {
-      int levels=aggregatesMaps().size();
-      int maxlevels=parallelInformation_.finest()->communicator().max(levels);
-      std::size_t size=(*(aggregatesMaps().begin()))->noVertices();
-      // We need an auxiliary vector for the consecutive prolongation.
-      std::vector<std::size_t> tmp;
-      std::vector<std::size_t> *coarse, *fine;
-
-      // make sure the allocated space suffices.
-      tmp.reserve(size);
-      data.reserve(size);
-
-      // Correctly assign coarse and fine for the first prolongation such that
-      // we end up in data in the end.
-      if(levels%2==0) {
-        coarse=&tmp;
-        fine=&data;
-      }else{
-        coarse=&data;
-        fine=&tmp;
-      }
-
-      // Number the unknowns on the coarsest level consecutively for each process.
-      if(levels==maxlevels) {
-        const AggregatesMap& map = *(*(++aggregatesMaps().rbegin()));
-        std::size_t m=0;
-
-        for(typename AggregatesMap::const_iterator iter = map.begin(); iter != map.end(); ++iter)
-          if(*iter< AggregatesMap::ISOLATED)
-            m=std::max(*iter,m);
-
-        coarse->resize(m+1);
-        std::size_t i=0;
-        srand((unsigned)std::clock());
-        std::set<size_t> used;
-        for(typename std::vector<std::size_t>::iterator iter=coarse->begin(); iter != coarse->end();
-            ++iter, ++i)
-        {
-          std::pair<std::set<std::size_t>::iterator,bool> ibpair
-            = used.insert(static_cast<std::size_t>((((double)rand())/(RAND_MAX+1.0)))*coarse->size());
-
-          while(!ibpair.second)
-            ibpair = used.insert(static_cast<std::size_t>((((double)rand())/(RAND_MAX+1.0))*coarse->size()));
-          *iter=*(ibpair.first);
-        }
-      }
-
-      typename ParallelInformationHierarchy::Iterator pinfo = parallelInformation().coarsest();
-      --pinfo;
-
-      // Now consecutively project the numbers to the finest level.
-      for(typename AggregatesMapList::const_reverse_iterator aggregates=++aggregatesMaps().rbegin();
-          aggregates != aggregatesMaps().rend(); ++aggregates,--levels) {
-
-        fine->resize((*aggregates)->noVertices());
-        fine->assign(fine->size(), 0);
-        Transfer<typename AggregatesMap::AggregateDescriptor, std::vector<std::size_t>, ParallelInformation>
-        ::prolongateVector(*(*aggregates), *coarse, *fine, static_cast<std::size_t>(1), *pinfo);
-        --pinfo;
-        std::swap(coarse, fine);
-      }
-
-      // Assertion to check that we really projected to data on the last step.
-      assert(coarse==&data);
-    }
-
-    template<class M, class IS, class A>
-    const typename MatrixHierarchy<M,IS,A>::AggregatesMapList&
-    MatrixHierarchy<M,IS,A>::aggregatesMaps() const
-    {
-      return aggregatesMaps_;
-    }
-    template<class M, class IS, class A>
-    const typename MatrixHierarchy<M,IS,A>::RedistributeInfoList&
-    MatrixHierarchy<M,IS,A>::redistributeInformation() const
-    {
-      return redistributes_;
-    }
-
-    template<class M, class IS, class A>
-    MatrixHierarchy<M,IS,A>::~MatrixHierarchy()
-    {
-      typedef typename AggregatesMapList::reverse_iterator AggregatesMapIterator;
-      typedef typename ParallelMatrixHierarchy::Iterator Iterator;
-      typedef typename ParallelInformationHierarchy::Iterator InfoIterator;
-
-      AggregatesMapIterator amap = aggregatesMaps_.rbegin();
-      InfoIterator info = parallelInformation_.coarsest();
-      for(Iterator level=matrices_.coarsest(), finest=matrices_.finest(); level != finest;  --level, --info, ++amap) {
-        (*amap)->free();
-        delete *amap;
-        delete &level->getmat();
-        if(level.isRedistributed())
-          delete &(level.getRedistributed().getmat());
-      }
-      delete *amap;
-    }
-
-    template<class M, class IS, class A>
-    template<class V, class BA, class TA>
-    void MatrixHierarchy<M,IS,A>::coarsenVector(Hierarchy<BlockVector<V,BA>, TA>& hierarchy) const
-    {
-      assert(hierarchy.levels()==1);
-      typedef typename ParallelMatrixHierarchy::ConstIterator Iterator;
-      typedef typename RedistributeInfoList::const_iterator RIter;
-      RIter redist = redistributes_.begin();
-
-      Iterator matrix = matrices_.finest(), coarsest = matrices_.coarsest();
-      int level=0;
-      if(redist->isSetup())
-        hierarchy.addRedistributedOnCoarsest(matrix.getRedistributed().getmat().N());
-      Dune::dvverb<<"Level "<<level<<" has "<<matrices_.finest()->getmat().N()<<" unknowns!"<<std::endl;
-
-      while(matrix != coarsest) {
-        ++matrix; ++level; ++redist;
-        Dune::dvverb<<"Level "<<level<<" has "<<matrix->getmat().N()<<" unknowns!"<<std::endl;
-
-        hierarchy.addCoarser(matrix->getmat().N());
-        if(redist->isSetup())
-          hierarchy.addRedistributedOnCoarsest(matrix.getRedistributed().getmat().N());
-
-      }
-
-    }
-
-    template<class M, class IS, class A>
-    template<class S, class TA>
-    void MatrixHierarchy<M,IS,A>::coarsenSmoother(Hierarchy<S,TA>& smoothers,
-                                                  const typename SmootherTraits<S>::Arguments& sargs) const
-    {
-      assert(smoothers.levels()==0);
-      typedef typename ParallelMatrixHierarchy::ConstIterator MatrixIterator;
-      typedef typename ParallelInformationHierarchy::ConstIterator PinfoIterator;
-      typedef typename AggregatesMapList::const_iterator AggregatesIterator;
-
-      typename ConstructionTraits<S>::Arguments cargs;
-      cargs.setArgs(sargs);
-      PinfoIterator pinfo = parallelInformation_.finest();
-      AggregatesIterator aggregates = aggregatesMaps_.begin();
-      int level=0;
-      for(MatrixIterator matrix = matrices_.finest(), coarsest = matrices_.coarsest();
-          matrix != coarsest; ++matrix, ++pinfo, ++aggregates, ++level) {
-        cargs.setMatrix(matrix->getmat(), **aggregates);
-        cargs.setComm(*pinfo);
-        smoothers.addCoarser(cargs);
-      }
-      if(maxlevels()>levels()) {
-        // This is not the globally coarsest level and therefore smoothing is needed
-        cargs.setMatrix(matrices_.coarsest()->getmat(), **aggregates);
-        cargs.setComm(*pinfo);
-        smoothers.addCoarser(cargs);
-        ++level;
-      }
-    }
-
-    template<class M, class IS, class A>
-    template<class F>
-    void MatrixHierarchy<M,IS,A>::recalculateGalerkin(const F& copyFlags)
-    {
-      typedef typename AggregatesMapList::iterator AggregatesMapIterator;
-      typedef typename ParallelMatrixHierarchy::Iterator Iterator;
-      typedef typename ParallelInformationHierarchy::Iterator InfoIterator;
-
-      AggregatesMapIterator amap = aggregatesMaps_.begin();
-      BaseGalerkinProduct productBuilder;
-      InfoIterator info = parallelInformation_.finest();
-      typename RedistributeInfoList::iterator riIter = redistributes_.begin();
-      Iterator level = matrices_.finest(), coarsest=matrices_.coarsest();
-      if(level.isRedistributed()) {
-        info->buildGlobalLookup(level->getmat().N());
-        redistributeMatrixEntries(const_cast<Matrix&>(level->getmat()),
-                                  const_cast<Matrix&>(level.getRedistributed().getmat()),
-                                  *info,info.getRedistributed(), *riIter);
-        info->freeGlobalLookup();
-      }
-
-      for(; level!=coarsest; ++amap) {
-        const Matrix& fine = (level.isRedistributed() ? level.getRedistributed() : *level).getmat();
-        ++level;
-        ++info;
-        ++riIter;
-        productBuilder.calculate(fine, *(*amap), const_cast<Matrix&>(level->getmat()), *info, copyFlags);
-        if(level.isRedistributed()) {
-          info->buildGlobalLookup(level->getmat().N());
-          redistributeMatrixEntries(const_cast<Matrix&>(level->getmat()),
-                                    const_cast<Matrix&>(level.getRedistributed().getmat()), *info,
-                                    info.getRedistributed(), *riIter);
-          info->freeGlobalLookup();
-        }
-      }
-    }
-
-    template<class M, class IS, class A>
-    std::size_t MatrixHierarchy<M,IS,A>::levels() const
-    {
-      return matrices_.levels();
-    }
-
-    template<class M, class IS, class A>
-    std::size_t MatrixHierarchy<M,IS,A>::maxlevels() const
-    {
-      return maxlevels_;
-    }
-
-    template<class M, class IS, class A>
-    bool MatrixHierarchy<M,IS,A>::hasCoarsest() const
-    {
-      return levels()==maxlevels() &&
-             (!matrices_.coarsest().isRedistributed() ||matrices_.coarsest()->getmat().N()>0);
-    }
-
-    template<class M, class IS, class A>
-    bool MatrixHierarchy<M,IS,A>::isBuilt() const
-    {
-      return built_;
-    }
-
     template<class T, class A>
     Hierarchy<T,A>::Hierarchy(const std::shared_ptr<MemberType> & first)
       : originalFinest_(first)
diff --git a/dune/istl/paamg/matrixhierarchy.hh b/dune/istl/paamg/matrixhierarchy.hh
new file mode 100644
index 000000000..9f5f355e9
--- /dev/null
+++ b/dune/istl/paamg/matrixhierarchy.hh
@@ -0,0 +1,974 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_AMG_MATRIXHIERARCHY_HH
+#define DUNE_AMG_MATRIXHIERARCHY_HH
+
+#include <algorithm>
+#include <tuple>
+#include "aggregates.hh"
+#include "graph.hh"
+#include "galerkin.hh"
+#include "renumberer.hh"
+#include "graphcreator.hh"
+#include "hierarchy.hh"
+#include <dune/istl/bvector.hh>
+#include <dune/common/parallel/indexset.hh>
+#include <dune/istl/matrixutils.hh>
+#include <dune/istl/matrixredistribute.hh>
+#include <dune/istl/paamg/dependency.hh>
+#include <dune/istl/paamg/graph.hh>
+#include <dune/istl/paamg/indicescoarsener.hh>
+#include <dune/istl/paamg/globalaggregates.hh>
+#include <dune/istl/paamg/construction.hh>
+#include <dune/istl/paamg/smoother.hh>
+#include <dune/istl/paamg/transfer.hh>
+
+namespace Dune
+{
+  namespace Amg
+  {
+    /**
+     * @addtogroup ISTL_PAAMG
+     *
+     * @{
+     */
+
+    /** @file
+     * @author Markus Blatt
+     * @brief Provides a classes representing the hierarchies in AMG.
+     */
+    enum {
+      /**
+       * @brief Hard limit for the number of processes allowed.
+       *
+       * This is needed to prevent overflows when calculating
+       * the coarsening rate. Currently set 72,000 which is
+       * enough for JUGENE.
+       */
+      MAX_PROCESSES = 72000
+    };
+
+    /**
+     * @brief The hierarchies build by the coarsening process.
+     *
+     * Namely a hierarchy of matrices, index sets, remote indices,
+     * interfaces and communicators.
+     */
+    template<class M, class PI, class A=std::allocator<M> >
+    class MatrixHierarchy
+    {
+    public:
+      /** @brief The type of the matrix operator. */
+      typedef M MatrixOperator;
+
+      /** @brief The type of the matrix. */
+      typedef typename MatrixOperator::matrix_type Matrix;
+
+      /** @brief The type of the index set. */
+      typedef PI ParallelInformation;
+
+      /** @brief The allocator to use. */
+      typedef A Allocator;
+
+      /** @brief The type of the aggregates map we use. */
+      typedef Dune::Amg::AggregatesMap<typename MatrixGraph<Matrix>::VertexDescriptor> AggregatesMap;
+
+      /** @brief The type of the parallel matrix hierarchy. */
+      typedef Dune::Amg::Hierarchy<MatrixOperator,Allocator> ParallelMatrixHierarchy;
+
+      /** @brief The type of the parallel informarion hierarchy. */
+      typedef Dune::Amg::Hierarchy<ParallelInformation,Allocator> ParallelInformationHierarchy;
+
+      /** @brief Allocator for pointers. */
+      typedef typename Allocator::template rebind<AggregatesMap*>::other AAllocator;
+
+      /** @brief The type of the aggregates maps list. */
+      typedef std::list<AggregatesMap*,AAllocator> AggregatesMapList;
+
+      /** @brief The type of the redistribute information. */
+      typedef RedistributeInformation<ParallelInformation> RedistributeInfoType;
+
+      /** @brief Allocator for RedistributeInfoType. */
+      typedef typename Allocator::template rebind<RedistributeInfoType>::other RILAllocator;
+
+      /** @brief The type of the list of redistribute information. */
+      typedef std::list<RedistributeInfoType,RILAllocator> RedistributeInfoList;
+
+      /**
+       * @brief Constructor
+       * @param fineMatrix The matrix to coarsen.
+       * @param pinfo The information about the parallel data decomposition at the first level.
+       */
+      MatrixHierarchy(std::shared_ptr<MatrixOperator> fineMatrix,
+        std::shared_ptr<ParallelInformation> pinfo = std::make_shared<ParallelInformation>());
+
+      ~MatrixHierarchy();
+
+      /**
+       * @brief Build the matrix hierarchy using aggregation.
+       *
+       * @brief criterion The criterion describing the aggregation process.
+       */
+      template<typename O, typename T>
+      void build(const T& criterion);
+
+      /**
+       * @brief Recalculate the galerkin products.
+       *
+       * If the data of the fine matrix changes but not its sparsity pattern
+       * this will recalculate all coarser levels without starting the expensive
+       * aggregation process all over again.
+       */
+      template<class F>
+      void recalculateGalerkin(const F& copyFlags);
+
+      /**
+       * @brief Coarsen the vector hierarchy according to the matrix hierarchy.
+       * @param hierarchy The vector hierarchy to coarsen.
+       */
+      template<class V, class BA, class TA>
+      void coarsenVector(Hierarchy<BlockVector<V,BA>, TA>& hierarchy) const;
+
+      /**
+       * @brief Coarsen the smoother hierarchy according to the matrix hierarchy.
+       * @param smoothers The smoother hierarchy to coarsen.
+       * @param args The arguments for the construction of the coarse level smoothers.
+       */
+      template<class S, class TA>
+      void coarsenSmoother(Hierarchy<S,TA>& smoothers,
+                           const typename SmootherTraits<S>::Arguments& args) const;
+
+      /**
+       * @brief Get the number of levels in the hierarchy.
+       * @return The number of levels.
+       */
+      std::size_t levels() const;
+
+      /**
+       * @brief Get the max number of levels in the hierarchy of processors.
+       * @return The maximum number of levels.
+       */
+      std::size_t maxlevels() const;
+
+      bool hasCoarsest() const;
+
+      /**
+       * @brief Whether the hierarchy was built.
+       * @return true if the MatrixHierarchy::build method was called.
+       */
+      bool isBuilt() const;
+
+      /**
+       * @brief Get the matrix hierarchy.
+       * @return The matrix hierarchy.
+       */
+      const ParallelMatrixHierarchy& matrices() const;
+
+      /**
+       * @brief Get the hierarchy of the parallel data distribution information.
+       * @return The hierarchy of the parallel data distribution information.
+       */
+      const ParallelInformationHierarchy& parallelInformation() const;
+
+      /**
+       * @brief Get the hierarchy of the mappings of the nodes onto aggregates.
+       * @return The hierarchy of the mappings of the nodes onto aggregates.
+       */
+      const AggregatesMapList& aggregatesMaps() const;
+
+      /**
+       * @brief Get the hierarchy of the information about redistributions,
+       * @return The hierarchy of the information about redistributions of the
+       * data to fewer processes.
+       */
+      const RedistributeInfoList& redistributeInformation() const;
+
+      double getProlongationDampingFactor() const
+      {
+        return prolongDamp_;
+      }
+
+      /**
+       * @brief Get the mapping of fine level unknowns to coarse level
+       * aggregates.
+       *
+       * For each fine level unknown i the correcponding data[i] is the
+       * aggregate it belongs to on the coarsest level.
+       *
+       * @param[out] data The mapping of fine level unknowns to coarse level
+       * aggregates.
+       */
+      void getCoarsestAggregatesOnFinest(std::vector<std::size_t>& data) const;
+
+    private:
+      typedef typename ConstructionTraits<MatrixOperator>::Arguments MatrixArgs;
+      typedef typename ConstructionTraits<ParallelInformation>::Arguments CommunicationArgs;
+      /** @brief The list of aggregates maps. */
+      AggregatesMapList aggregatesMaps_;
+      /** @brief The list of redistributes. */
+      RedistributeInfoList redistributes_;
+      /** @brief The hierarchy of parallel matrices. */
+      ParallelMatrixHierarchy matrices_;
+      /** @brief The hierarchy of the parallel information. */
+      ParallelInformationHierarchy parallelInformation_;
+
+      /** @brief Whether the hierarchy was built. */
+      bool built_;
+
+      /** @brief The maximum number of level across all processors.*/
+      int maxlevels_;
+
+      double prolongDamp_;
+
+      /**
+       * @brief functor to print matrix statistics.
+       */
+      template<class Matrix, bool print>
+      struct MatrixStats
+      {
+
+        /**
+         * @brief Print matrix statistics.
+         */
+        static void stats(const Matrix& matrix)
+        {
+          DUNE_UNUSED_PARAMETER(matrix);
+        }
+      };
+
+      template<class Matrix>
+      struct MatrixStats<Matrix,true>
+      {
+        struct calc
+        {
+          typedef typename Matrix::size_type size_type;
+          typedef typename Matrix::row_type matrix_row;
+
+          calc()
+          {
+            min=std::numeric_limits<size_type>::max();
+            max=0;
+            sum=0;
+          }
+
+          void operator()(const matrix_row& row)
+          {
+            min=std::min(min, row.size());
+            max=std::max(max, row.size());
+            sum += row.size();
+          }
+
+          size_type min;
+          size_type max;
+          size_type sum;
+        };
+        /**
+         * @brief Print matrix statistics.
+         */
+        static void stats(const Matrix& matrix)
+        {
+          calc c= for_each(matrix.begin(), matrix.end(), calc());
+          dinfo<<"Matrix row: min="<<c.min<<" max="<<c.max
+               <<" average="<<static_cast<double>(c.sum)/matrix.N()
+               <<std::endl;
+        }
+      };
+    };
+
+    /**
+     * @brief The criterion describing the stop criteria for the coarsening process.
+     */
+    template<class T>
+    class CoarsenCriterion : public T
+    {
+    public:
+      /**
+       * @brief The criterion for tagging connections as strong and nodes as isolated.
+       * This might be e.g. SymmetricCriterion or UnSymmetricCriterion.
+       */
+      typedef T AggregationCriterion;
+
+      /**
+       * @brief Constructor
+       * @param maxLevel The maximum number of levels allowed in the matrix hierarchy (default: 100).
+       * @param coarsenTarget If the number of nodes in the matrix is below this threshold the
+       * coarsening will stop (default: 1000).
+       * @param minCoarsenRate If the coarsening rate falls below this threshold the
+       * coarsening will stop (default: 1.2)
+       * @param prolongDamp The damping factor to apply to the prolongated update (default: 1.6)
+       * @param accumulate Whether to accumulate the data onto fewer processors on coarser levels.
+       */
+      CoarsenCriterion(int maxLevel=100, int coarsenTarget=1000, double minCoarsenRate=1.2,
+                       double prolongDamp=1.6, AccumulationMode accumulate=successiveAccu)
+        : AggregationCriterion(Dune::Amg::Parameters(maxLevel, coarsenTarget, minCoarsenRate, prolongDamp, accumulate))
+      {}
+
+      CoarsenCriterion(const Dune::Amg::Parameters& parms)
+        : AggregationCriterion(parms)
+      {}
+
+    };
+
+    template<typename M, typename C1>
+    bool repartitionAndDistributeMatrix(const M& origMatrix,
+                                        std::shared_ptr<M> newMatrix,
+                                        SequentialInformation& origComm,
+                                        std::shared_ptr<SequentialInformation>& newComm,
+                                        RedistributeInformation<SequentialInformation>& ri,
+                                        int nparts, C1& criterion)
+    {
+      DUNE_UNUSED_PARAMETER(origMatrix);
+      DUNE_UNUSED_PARAMETER(newMatrix);
+      DUNE_UNUSED_PARAMETER(origComm);
+      DUNE_UNUSED_PARAMETER(newComm);
+      DUNE_UNUSED_PARAMETER(ri);
+      DUNE_UNUSED_PARAMETER(nparts);
+      DUNE_UNUSED_PARAMETER(criterion);
+      DUNE_THROW(NotImplemented, "Redistribution does not make sense in sequential code!");
+    }
+
+
+    template<typename M, typename C, typename C1>
+    bool repartitionAndDistributeMatrix(const M& origMatrix,
+                                        std::shared_ptr<M> newMatrix,
+                                        C& origComm,
+                                        std::shared_ptr<C>& newComm,
+                                        RedistributeInformation<C>& ri,
+                                        int nparts, C1& criterion)
+    {
+      Timer time;
+#ifdef AMG_REPART_ON_COMM_GRAPH
+      // Done not repartition the matrix graph, but a graph of the communication scheme.
+      bool existentOnRedist=Dune::commGraphRepartition(origMatrix, origComm, nparts, newComm,
+                                                       ri.getInterface(),
+                                                       criterion.debugLevel()>1);
+
+#else
+      typedef Dune::Amg::MatrixGraph<const M> MatrixGraph;
+      typedef Dune::Amg::PropertiesGraph<MatrixGraph,
+          VertexProperties,
+          EdgeProperties,
+          IdentityMap,
+          IdentityMap> PropertiesGraph;
+      MatrixGraph graph(origMatrix);
+      PropertiesGraph pgraph(graph);
+      buildDependency(pgraph, origMatrix, criterion, false);
+
+#ifdef DEBUG_REPART
+      if(origComm.communicator().rank()==0)
+        std::cout<<"Original matrix"<<std::endl;
+      origComm.communicator().barrier();
+      printGlobalSparseMatrix(origMatrix, origComm, std::cout);
+#endif
+      bool existentOnRedist=Dune::graphRepartition(pgraph, origComm, nparts,
+                                                   newComm, ri.getInterface(),
+                                                   criterion.debugLevel()>1);
+#endif // if else AMG_REPART
+
+      if(origComm.communicator().rank()==0  && criterion.debugLevel()>1)
+        std::cout<<"Repartitioning took "<<time.elapsed()<<" seconds."<<std::endl;
+
+      ri.setSetup();
+
+#ifdef DEBUG_REPART
+      ri.checkInterface(origComm.indexSet(), newComm->indexSet(), origComm.communicator());
+#endif
+
+      redistributeMatrix(const_cast<M&>(origMatrix), *newMatrix, origComm, *newComm, ri);
+
+#ifdef DEBUG_REPART
+      if(origComm.communicator().rank()==0)
+        std::cout<<"Original matrix"<<std::endl;
+      origComm.communicator().barrier();
+      if(newComm->communicator().size()>0)
+        printGlobalSparseMatrix(*newMatrix, *newComm, std::cout);
+      origComm.communicator().barrier();
+#endif
+
+      if(origComm.communicator().rank()==0  && criterion.debugLevel()>1)
+        std::cout<<"Redistributing matrix took "<<time.elapsed()<<" seconds."<<std::endl;
+      return existentOnRedist;
+
+    }
+
+    template<class M, class IS, class A>
+    MatrixHierarchy<M,IS,A>::MatrixHierarchy(std::shared_ptr<MatrixOperator> fineMatrix,
+                                             std::shared_ptr<ParallelInformation> pinfo)
+      : matrices_(fineMatrix),
+        parallelInformation_(pinfo)
+    {
+      if (SolverCategory::category(*fineMatrix) != SolverCategory::category(*pinfo))
+        DUNE_THROW(ISTLError, "MatrixOperator and ParallelInformation must belong to the same category!");
+    }
+
+    template<class M, class IS, class A>
+    template<typename O, typename T>
+    void MatrixHierarchy<M,IS,A>::build(const T& criterion)
+    {
+      prolongDamp_ = criterion.getProlongationDampingFactor();
+      typedef O OverlapFlags;
+      typedef typename ParallelMatrixHierarchy::Iterator MatIterator;
+      typedef typename ParallelInformationHierarchy::Iterator PInfoIterator;
+
+      static const int noints=(Dune::Amg::MAX_PROCESSES/4096>0) ? (Dune::Amg::MAX_PROCESSES/4096) : 1;
+
+      typedef bigunsignedint<sizeof(int)*8*noints> BIGINT;
+      GalerkinProduct<ParallelInformation> productBuilder;
+      MatIterator mlevel = matrices_.finest();
+      MatrixStats<typename M::matrix_type,MINIMAL_DEBUG_LEVEL<=INFO_DEBUG_LEVEL>::stats(mlevel->getmat());
+
+      PInfoIterator infoLevel = parallelInformation_.finest();
+      BIGINT finenonzeros=countNonZeros(mlevel->getmat());
+      finenonzeros = infoLevel->communicator().sum(finenonzeros);
+      BIGINT allnonzeros = finenonzeros;
+
+
+      int level = 0;
+      int rank = 0;
+
+      BIGINT unknowns = mlevel->getmat().N();
+
+      unknowns = infoLevel->communicator().sum(unknowns);
+      double dunknowns=unknowns.todouble();
+      infoLevel->buildGlobalLookup(mlevel->getmat().N());
+      redistributes_.push_back(RedistributeInfoType());
+
+      for(; level < criterion.maxLevel(); ++level, ++mlevel) {
+        assert(matrices_.levels()==redistributes_.size());
+        rank = infoLevel->communicator().rank();
+        if(rank==0 && criterion.debugLevel()>1)
+          std::cout<<"Level "<<level<<" has "<<dunknowns<<" unknowns, "<<dunknowns/infoLevel->communicator().size()
+                   <<" unknowns per proc (procs="<<infoLevel->communicator().size()<<")"<<std::endl;
+
+        MatrixOperator* matrix=&(*mlevel);
+        ParallelInformation* info =&(*infoLevel);
+
+        if((
+#if HAVE_PARMETIS
+             criterion.accumulate()==successiveAccu
+#else
+             false
+#endif
+             || (criterion.accumulate()==atOnceAccu
+                 && dunknowns < 30*infoLevel->communicator().size()))
+           && infoLevel->communicator().size()>1 &&
+           dunknowns/infoLevel->communicator().size() <= criterion.coarsenTarget())
+        {
+          // accumulate to fewer processors
+          std::shared_ptr<Matrix> redistMat = std::make_shared<Matrix>();
+          std::shared_ptr<ParallelInformation> redistComm;
+          std::size_t nodomains = (std::size_t)std::ceil(dunknowns/(criterion.minAggregateSize()
+                                                                    *criterion.coarsenTarget()));
+          if( nodomains<=criterion.minAggregateSize()/2 ||
+              dunknowns <= criterion.coarsenTarget() )
+            nodomains=1;
+
+          bool existentOnNextLevel =
+            repartitionAndDistributeMatrix(mlevel->getmat(), redistMat, *infoLevel,
+                                           redistComm, redistributes_.back(), nodomains,
+                                           criterion);
+          BIGINT unknownsRedist = redistMat->N();
+          unknownsRedist = infoLevel->communicator().sum(unknownsRedist);
+          dunknowns= unknownsRedist.todouble();
+          if(redistComm->communicator().rank()==0 && criterion.debugLevel()>1)
+            std::cout<<"Level "<<level<<" (redistributed) has "<<dunknowns<<" unknowns, "<<dunknowns/redistComm->communicator().size()
+                     <<" unknowns per proc (procs="<<redistComm->communicator().size()<<")"<<std::endl;
+          MatrixArgs args(*redistMat, *redistComm);
+          mlevel.addRedistributed(ConstructionTraits<MatrixOperator>::construct(args));
+          assert(mlevel.isRedistributed());
+          infoLevel.addRedistributed(redistComm);
+          infoLevel->freeGlobalLookup();
+
+          if(!existentOnNextLevel)
+            // We do not hold any data on the redistributed partitioning
+            break;
+
+          // Work on the redistributed Matrix from now on
+          matrix = &(mlevel.getRedistributed());
+          info = &(infoLevel.getRedistributed());
+          info->buildGlobalLookup(matrix->getmat().N());
+        }
+
+        rank = info->communicator().rank();
+        if(dunknowns <= criterion.coarsenTarget())
+          // No further coarsening needed
+          break;
+
+        typedef PropertiesGraphCreator<MatrixOperator,ParallelInformation> GraphCreator;
+        typedef typename GraphCreator::PropertiesGraph PropertiesGraph;
+        typedef typename GraphCreator::GraphTuple GraphTuple;
+
+        typedef typename PropertiesGraph::VertexDescriptor Vertex;
+
+        std::vector<bool> excluded(matrix->getmat().N(), false);
+
+        GraphTuple graphs = GraphCreator::create(*matrix, excluded, *info, OverlapFlags());
+
+        AggregatesMap* aggregatesMap=new AggregatesMap(std::get<1>(graphs)->maxVertex()+1);
+
+        aggregatesMaps_.push_back(aggregatesMap);
+
+        Timer watch;
+        watch.reset();
+        int noAggregates, isoAggregates, oneAggregates, skippedAggregates;
+
+        std::tie(noAggregates, isoAggregates, oneAggregates, skippedAggregates) =
+          aggregatesMap->buildAggregates(matrix->getmat(), *(std::get<1>(graphs)), criterion, level==0);
+
+        if(rank==0 && criterion.debugLevel()>2)
+          std::cout<<" Have built "<<noAggregates<<" aggregates totally ("<<isoAggregates<<" isolated aggregates, "<<
+          oneAggregates<<" aggregates of one vertex,  and skipped "<<
+          skippedAggregates<<" aggregates)."<<std::endl;
+#ifdef TEST_AGGLO
+        {
+          // calculate size of local matrix in the distributed direction
+          int start, end, overlapStart, overlapEnd;
+          int procs=info->communicator().rank();
+          int n = UNKNOWNS/procs; // number of unknowns per process
+          int bigger = UNKNOWNS%procs; // number of process with n+1 unknows
+
+          // Compute owner region
+          if(rank<bigger) {
+            start = rank*(n+1);
+            end   = (rank+1)*(n+1);
+          }else{
+            start = bigger + rank * n;
+            end   = bigger + (rank + 1) * n;
+          }
+
+          // Compute overlap region
+          if(start>0)
+            overlapStart = start - 1;
+          else
+            overlapStart = start;
+
+          if(end<UNKNOWNS)
+            overlapEnd = end + 1;
+          else
+            overlapEnd = end;
+
+          assert((UNKNOWNS)*(overlapEnd-overlapStart)==aggregatesMap->noVertices());
+          for(int j=0; j< UNKNOWNS; ++j)
+            for(int i=0; i < UNKNOWNS; ++i)
+            {
+              if(i>=overlapStart && i<overlapEnd)
+              {
+                int no = (j/2)*((UNKNOWNS)/2)+i/2;
+                (*aggregatesMap)[j*(overlapEnd-overlapStart)+i-overlapStart]=no;
+              }
+            }
+        }
+#endif
+        if(criterion.debugLevel()>1 && info->communicator().rank()==0)
+          std::cout<<"aggregating finished."<<std::endl;
+
+        BIGINT gnoAggregates=noAggregates;
+        gnoAggregates = info->communicator().sum(gnoAggregates);
+        double dgnoAggregates = gnoAggregates.todouble();
+#ifdef TEST_AGGLO
+        BIGINT gnoAggregates=((UNKNOWNS)/2)*((UNKNOWNS)/2);
+#endif
+
+        if(criterion.debugLevel()>2 && rank==0)
+          std::cout << "Building "<<dgnoAggregates<<" aggregates took "<<watch.elapsed()<<" seconds."<<std::endl;
+
+        if(dgnoAggregates==0 || dunknowns/dgnoAggregates<criterion.minCoarsenRate())
+        {
+          if(rank==0)
+          {
+            if(dgnoAggregates>0)
+              std::cerr << "Stopped coarsening because of rate breakdown "<<dunknowns<<"/"<<dgnoAggregates
+                        <<"="<<dunknowns/dgnoAggregates<<"<"
+                        <<criterion.minCoarsenRate()<<std::endl;
+            else
+              std::cerr<< "Could not build any aggregates. Probably no connected nodes."<<std::endl;
+          }
+          aggregatesMap->free();
+          delete aggregatesMap;
+          aggregatesMaps_.pop_back();
+
+          if(criterion.accumulate() && mlevel.isRedistributed() && info->communicator().size()>1) {
+            // coarse level matrix was already redistributed, but to more than 1 process
+            // Therefore need to delete the redistribution. Further down it will
+            // then be redistributed to 1 process
+            delete &(mlevel.getRedistributed().getmat());
+            mlevel.deleteRedistributed();
+            delete &(infoLevel.getRedistributed());
+            infoLevel.deleteRedistributed();
+            redistributes_.back().resetSetup();
+          }
+
+          break;
+        }
+        unknowns =  noAggregates;
+        dunknowns = dgnoAggregates;
+
+        CommunicationArgs commargs(info->communicator(),info->category());
+        parallelInformation_.addCoarser(commargs);
+
+        ++infoLevel; // parallel information on coarse level
+
+        typename PropertyMapTypeSelector<VertexVisitedTag,PropertiesGraph>::Type visitedMap =
+          get(VertexVisitedTag(), *(std::get<1>(graphs)));
+
+        watch.reset();
+        int aggregates = IndicesCoarsener<ParallelInformation,OverlapFlags>
+                         ::coarsen(*info,
+                                   *(std::get<1>(graphs)),
+                                   visitedMap,
+                                   *aggregatesMap,
+                                   *infoLevel,
+                                   noAggregates);
+        GraphCreator::free(graphs);
+
+        if(criterion.debugLevel()>2) {
+          if(rank==0)
+            std::cout<<"Coarsening of index sets took "<<watch.elapsed()<<" seconds."<<std::endl;
+        }
+
+        watch.reset();
+
+        infoLevel->buildGlobalLookup(aggregates);
+        AggregatesPublisher<Vertex,OverlapFlags,ParallelInformation>::publish(*aggregatesMap,
+                                                                              *info,
+                                                                              infoLevel->globalLookup());
+
+
+        if(criterion.debugLevel()>2) {
+          if(rank==0)
+            std::cout<<"Communicating global aggregate numbers took "<<watch.elapsed()<<" seconds."<<std::endl;
+        }
+
+        watch.reset();
+        std::vector<bool>& visited=excluded;
+
+        typedef std::vector<bool>::iterator Iterator;
+        typedef IteratorPropertyMap<Iterator, IdentityMap> VisitedMap2;
+        Iterator end = visited.end();
+        for(Iterator iter= visited.begin(); iter != end; ++iter)
+          *iter=false;
+
+        VisitedMap2 visitedMap2(visited.begin(), Dune::IdentityMap());
+
+        typename MatrixOperator::matrix_type* coarseMatrix;
+
+        coarseMatrix = productBuilder.build(*(std::get<0>(graphs)), visitedMap2,
+                                            *info,
+                                            *aggregatesMap,
+                                            aggregates,
+                                            OverlapFlags());
+        dverb<<"Building of sparsity pattern took "<<watch.elapsed()<<std::endl;
+        watch.reset();
+        info->freeGlobalLookup();
+
+        delete std::get<0>(graphs);
+        productBuilder.calculate(matrix->getmat(), *aggregatesMap, *coarseMatrix, *infoLevel, OverlapFlags());
+
+        if(criterion.debugLevel()>2) {
+          if(rank==0)
+            std::cout<<"Calculation entries of Galerkin product took "<<watch.elapsed()<<" seconds."<<std::endl;
+        }
+
+        BIGINT nonzeros = countNonZeros(*coarseMatrix);
+        allnonzeros = allnonzeros + infoLevel->communicator().sum(nonzeros);
+        MatrixArgs args(*coarseMatrix, *infoLevel);
+
+        matrices_.addCoarser(args);
+        redistributes_.push_back(RedistributeInfoType());
+      } // end level loop
+
+
+      infoLevel->freeGlobalLookup();
+
+      built_=true;
+      AggregatesMap* aggregatesMap=new AggregatesMap(0);
+      aggregatesMaps_.push_back(aggregatesMap);
+
+      if(criterion.debugLevel()>0) {
+        if(level==criterion.maxLevel()) {
+          BIGINT unknownsLevel = mlevel->getmat().N();
+          unknownsLevel = infoLevel->communicator().sum(unknownsLevel);
+          double dunknownsLevel = unknownsLevel.todouble();
+          if(rank==0 && criterion.debugLevel()>1) {
+            std::cout<<"Level "<<level<<" has "<<dunknownsLevel<<" unknowns, "<<dunknownsLevel/infoLevel->communicator().size()
+                     <<" unknowns per proc (procs="<<infoLevel->communicator().size()<<")"<<std::endl;
+          }
+        }
+      }
+
+      if(criterion.accumulate() && !redistributes_.back().isSetup() &&
+         infoLevel->communicator().size()>1) {
+#if HAVE_MPI && !HAVE_PARMETIS
+        if(criterion.accumulate()==successiveAccu &&
+           infoLevel->communicator().rank()==0)
+          std::cerr<<"Successive accumulation of data on coarse levels only works with ParMETIS installed."
+                   <<"  Fell back to accumulation to one domain on coarsest level"<<std::endl;
+#endif
+
+        // accumulate to fewer processors
+        std::shared_ptr<Matrix> redistMat = std::make_shared<Matrix>();
+        std::shared_ptr<ParallelInformation> redistComm;
+        int nodomains = 1;
+
+        repartitionAndDistributeMatrix(mlevel->getmat(), redistMat, *infoLevel,
+                                       redistComm, redistributes_.back(), nodomains,criterion);
+        MatrixArgs args(*redistMat, *redistComm);
+        BIGINT unknownsRedist = redistMat->N();
+        unknownsRedist = infoLevel->communicator().sum(unknownsRedist);
+
+        if(redistComm->communicator().rank()==0 && criterion.debugLevel()>1) {
+          double dunknownsRedist = unknownsRedist.todouble();
+          std::cout<<"Level "<<level<<" redistributed has "<<dunknownsRedist<<" unknowns, "<<dunknownsRedist/redistComm->communicator().size()
+                   <<" unknowns per proc (procs="<<redistComm->communicator().size()<<")"<<std::endl;
+        }
+        mlevel.addRedistributed(ConstructionTraits<MatrixOperator>::construct(args));
+        infoLevel.addRedistributed(redistComm);
+        infoLevel->freeGlobalLookup();
+      }
+
+      int levels = matrices_.levels();
+      maxlevels_ = parallelInformation_.finest()->communicator().max(levels);
+      assert(matrices_.levels()==redistributes_.size());
+      if(hasCoarsest() && rank==0 && criterion.debugLevel()>1)
+        std::cout<<"operator complexity: "<<allnonzeros.todouble()/finenonzeros.todouble()<<std::endl;
+
+    }
+
+    template<class M, class IS, class A>
+    const typename MatrixHierarchy<M,IS,A>::ParallelMatrixHierarchy&
+    MatrixHierarchy<M,IS,A>::matrices() const
+    {
+      return matrices_;
+    }
+
+    template<class M, class IS, class A>
+    const typename MatrixHierarchy<M,IS,A>::ParallelInformationHierarchy&
+    MatrixHierarchy<M,IS,A>::parallelInformation() const
+    {
+      return parallelInformation_;
+    }
+
+    template<class M, class IS, class A>
+    void MatrixHierarchy<M,IS,A>::getCoarsestAggregatesOnFinest(std::vector<std::size_t>& data) const
+    {
+      int levels=aggregatesMaps().size();
+      int maxlevels=parallelInformation_.finest()->communicator().max(levels);
+      std::size_t size=(*(aggregatesMaps().begin()))->noVertices();
+      // We need an auxiliary vector for the consecutive prolongation.
+      std::vector<std::size_t> tmp;
+      std::vector<std::size_t> *coarse, *fine;
+
+      // make sure the allocated space suffices.
+      tmp.reserve(size);
+      data.reserve(size);
+
+      // Correctly assign coarse and fine for the first prolongation such that
+      // we end up in data in the end.
+      if(levels%2==0) {
+        coarse=&tmp;
+        fine=&data;
+      }else{
+        coarse=&data;
+        fine=&tmp;
+      }
+
+      // Number the unknowns on the coarsest level consecutively for each process.
+      if(levels==maxlevels) {
+        const AggregatesMap& map = *(*(++aggregatesMaps().rbegin()));
+        std::size_t m=0;
+
+        for(typename AggregatesMap::const_iterator iter = map.begin(); iter != map.end(); ++iter)
+          if(*iter< AggregatesMap::ISOLATED)
+            m=std::max(*iter,m);
+
+        coarse->resize(m+1);
+        std::size_t i=0;
+        srand((unsigned)std::clock());
+        std::set<size_t> used;
+        for(typename std::vector<std::size_t>::iterator iter=coarse->begin(); iter != coarse->end();
+            ++iter, ++i)
+        {
+          std::pair<std::set<std::size_t>::iterator,bool> ibpair
+            = used.insert(static_cast<std::size_t>((((double)rand())/(RAND_MAX+1.0)))*coarse->size());
+
+          while(!ibpair.second)
+            ibpair = used.insert(static_cast<std::size_t>((((double)rand())/(RAND_MAX+1.0))*coarse->size()));
+          *iter=*(ibpair.first);
+        }
+      }
+
+      typename ParallelInformationHierarchy::Iterator pinfo = parallelInformation().coarsest();
+      --pinfo;
+
+      // Now consecutively project the numbers to the finest level.
+      for(typename AggregatesMapList::const_reverse_iterator aggregates=++aggregatesMaps().rbegin();
+          aggregates != aggregatesMaps().rend(); ++aggregates,--levels) {
+
+        fine->resize((*aggregates)->noVertices());
+        fine->assign(fine->size(), 0);
+        Transfer<typename AggregatesMap::AggregateDescriptor, std::vector<std::size_t>, ParallelInformation>
+        ::prolongateVector(*(*aggregates), *coarse, *fine, static_cast<std::size_t>(1), *pinfo);
+        --pinfo;
+        std::swap(coarse, fine);
+      }
+
+      // Assertion to check that we really projected to data on the last step.
+      assert(coarse==&data);
+    }
+
+    template<class M, class IS, class A>
+    const typename MatrixHierarchy<M,IS,A>::AggregatesMapList&
+    MatrixHierarchy<M,IS,A>::aggregatesMaps() const
+    {
+      return aggregatesMaps_;
+    }
+    template<class M, class IS, class A>
+    const typename MatrixHierarchy<M,IS,A>::RedistributeInfoList&
+    MatrixHierarchy<M,IS,A>::redistributeInformation() const
+    {
+      return redistributes_;
+    }
+
+    template<class M, class IS, class A>
+    MatrixHierarchy<M,IS,A>::~MatrixHierarchy()
+    {
+      typedef typename AggregatesMapList::reverse_iterator AggregatesMapIterator;
+      typedef typename ParallelMatrixHierarchy::Iterator Iterator;
+      typedef typename ParallelInformationHierarchy::Iterator InfoIterator;
+
+      AggregatesMapIterator amap = aggregatesMaps_.rbegin();
+      InfoIterator info = parallelInformation_.coarsest();
+      for(Iterator level=matrices_.coarsest(), finest=matrices_.finest(); level != finest;  --level, --info, ++amap) {
+        (*amap)->free();
+        delete *amap;
+        delete &level->getmat();
+        if(level.isRedistributed())
+          delete &(level.getRedistributed().getmat());
+      }
+      delete *amap;
+    }
+
+    template<class M, class IS, class A>
+    template<class V, class BA, class TA>
+    void MatrixHierarchy<M,IS,A>::coarsenVector(Hierarchy<BlockVector<V,BA>, TA>& hierarchy) const
+    {
+      assert(hierarchy.levels()==1);
+      typedef typename ParallelMatrixHierarchy::ConstIterator Iterator;
+      typedef typename RedistributeInfoList::const_iterator RIter;
+      RIter redist = redistributes_.begin();
+
+      Iterator matrix = matrices_.finest(), coarsest = matrices_.coarsest();
+      int level=0;
+      if(redist->isSetup())
+        hierarchy.addRedistributedOnCoarsest(matrix.getRedistributed().getmat().N());
+      Dune::dvverb<<"Level "<<level<<" has "<<matrices_.finest()->getmat().N()<<" unknowns!"<<std::endl;
+
+      while(matrix != coarsest) {
+        ++matrix; ++level; ++redist;
+        Dune::dvverb<<"Level "<<level<<" has "<<matrix->getmat().N()<<" unknowns!"<<std::endl;
+
+        hierarchy.addCoarser(matrix->getmat().N());
+        if(redist->isSetup())
+          hierarchy.addRedistributedOnCoarsest(matrix.getRedistributed().getmat().N());
+
+      }
+
+    }
+
+    template<class M, class IS, class A>
+    template<class S, class TA>
+    void MatrixHierarchy<M,IS,A>::coarsenSmoother(Hierarchy<S,TA>& smoothers,
+                                                  const typename SmootherTraits<S>::Arguments& sargs) const
+    {
+      assert(smoothers.levels()==0);
+      typedef typename ParallelMatrixHierarchy::ConstIterator MatrixIterator;
+      typedef typename ParallelInformationHierarchy::ConstIterator PinfoIterator;
+      typedef typename AggregatesMapList::const_iterator AggregatesIterator;
+
+      typename ConstructionTraits<S>::Arguments cargs;
+      cargs.setArgs(sargs);
+      PinfoIterator pinfo = parallelInformation_.finest();
+      AggregatesIterator aggregates = aggregatesMaps_.begin();
+      int level=0;
+      for(MatrixIterator matrix = matrices_.finest(), coarsest = matrices_.coarsest();
+          matrix != coarsest; ++matrix, ++pinfo, ++aggregates, ++level) {
+        cargs.setMatrix(matrix->getmat(), **aggregates);
+        cargs.setComm(*pinfo);
+        smoothers.addCoarser(cargs);
+      }
+      if(maxlevels()>levels()) {
+        // This is not the globally coarsest level and therefore smoothing is needed
+        cargs.setMatrix(matrices_.coarsest()->getmat(), **aggregates);
+        cargs.setComm(*pinfo);
+        smoothers.addCoarser(cargs);
+        ++level;
+      }
+    }
+
+    template<class M, class IS, class A>
+    template<class F>
+    void MatrixHierarchy<M,IS,A>::recalculateGalerkin(const F& copyFlags)
+    {
+      typedef typename AggregatesMapList::iterator AggregatesMapIterator;
+      typedef typename ParallelMatrixHierarchy::Iterator Iterator;
+      typedef typename ParallelInformationHierarchy::Iterator InfoIterator;
+
+      AggregatesMapIterator amap = aggregatesMaps_.begin();
+      BaseGalerkinProduct productBuilder;
+      InfoIterator info = parallelInformation_.finest();
+      typename RedistributeInfoList::iterator riIter = redistributes_.begin();
+      Iterator level = matrices_.finest(), coarsest=matrices_.coarsest();
+      if(level.isRedistributed()) {
+        info->buildGlobalLookup(level->getmat().N());
+        redistributeMatrixEntries(const_cast<Matrix&>(level->getmat()),
+                                  const_cast<Matrix&>(level.getRedistributed().getmat()),
+                                  *info,info.getRedistributed(), *riIter);
+        info->freeGlobalLookup();
+      }
+
+      for(; level!=coarsest; ++amap) {
+        const Matrix& fine = (level.isRedistributed() ? level.getRedistributed() : *level).getmat();
+        ++level;
+        ++info;
+        ++riIter;
+        productBuilder.calculate(fine, *(*amap), const_cast<Matrix&>(level->getmat()), *info, copyFlags);
+        if(level.isRedistributed()) {
+          info->buildGlobalLookup(level->getmat().N());
+          redistributeMatrixEntries(const_cast<Matrix&>(level->getmat()),
+                                    const_cast<Matrix&>(level.getRedistributed().getmat()), *info,
+                                    info.getRedistributed(), *riIter);
+          info->freeGlobalLookup();
+        }
+      }
+    }
+
+    template<class M, class IS, class A>
+    std::size_t MatrixHierarchy<M,IS,A>::levels() const
+    {
+      return matrices_.levels();
+    }
+
+    template<class M, class IS, class A>
+    std::size_t MatrixHierarchy<M,IS,A>::maxlevels() const
+    {
+      return maxlevels_;
+    }
+
+    template<class M, class IS, class A>
+    bool MatrixHierarchy<M,IS,A>::hasCoarsest() const
+    {
+      return levels()==maxlevels() &&
+             (!matrices_.coarsest().isRedistributed() ||matrices_.coarsest()->getmat().N()>0);
+    }
+
+    template<class M, class IS, class A>
+    bool MatrixHierarchy<M,IS,A>::isBuilt() const
+    {
+      return built_;
+    }
+
+    /** @} */
+  } // namespace Amg
+} // namespace Dune
+
+#endif // end DUNE_AMG_MATRIXHIERARCHY_HH
diff --git a/dune/istl/paamg/test/hierarchytest.cc b/dune/istl/paamg/test/hierarchytest.cc
index 7ac7673f8..3ac29419f 100644
--- a/dune/istl/paamg/test/hierarchytest.cc
+++ b/dune/istl/paamg/test/hierarchytest.cc
@@ -4,7 +4,7 @@
 
 #include "mpi.h"
 #include <dune/common/parallel/mpicollectivecommunication.hh>
-#include <dune/istl/paamg/hierarchy.hh>
+#include <dune/istl/paamg/matrixhierarchy.hh>
 #include <dune/istl/paamg/smoother.hh>
 #include <dune/istl/preconditioners.hh>
 #include <dune/istl/owneroverlapcopy.hh>
-- 
GitLab