diff --git a/istl/paamg/test/Makefile.am b/istl/paamg/test/Makefile.am
index 8fa9c0eb1608075544049d051fe115cac7345fce..abdfc96d44a49e88ccee4c38252a6076f0fcffc6 100644
--- a/istl/paamg/test/Makefile.am
+++ b/istl/paamg/test/Makefile.am
@@ -18,24 +18,26 @@ check_PROGRAMS = $(TESTPROGS) $(NORMALTESTS)
 graphtest_SOURCES = graphtest.cc
 
 transfertest_SOURCES = transfertest.cc
-transfertest_CXXFLAGS = $(MPI_CPPFLAGS)
-transfertest_LDFLAGS = $(MPI_LDFLAGS) $(MPI_LIBS)
+transfertest_CPPFLAGS = $(AM_CPPFLAGS) $(MPI_CPPFLAGS)
+transfertest_LDFLAGS = $(AM_LDFLAGS) $(MPI_LDFLAGS)
 
 galerkintest_SOURCES = galerkintest.cc anisotropic.hh
-galerkintest_CXXFLAGS = $(MPI_CPPFLAGS)
-galerkintest_LDFLAGS = $(MPI_LDFLAGS) $(MPI_LIBS)
+galerkintest_CXXFLAGS = $(AM_CPPFLAGS)  $(MPI_CPPFLAGS)
+galerkintest_LDFLAGS = $(AM_LDFLAGS) $(MPI_LDFLAGS)
 
 hierarchytest_SOURCES = hierarchytest.cc anisotropic.hh
-hierarchytest_CXXFLAGS = $(MPI_CPPFLAGS)
-hierarchytest_LDFLAGS = $(MPI_LDFLAGS) $(MPI_LIBS)
+hierarchytest_CPPFLAGS = $(AM_CPPFLAGS) $(MPI_CPPFLAGS)
+hierarchytest_LDADD =  $(AM_LIBS) $(MPI_LIBS)
+hierarchytest_LDFLAGS = $(AM_LDFLAGS) $(MPI_LDFLAGS)
 
 amgtest_SOURCES = amgtest.cc
-amgtest_CXXFLAGS = $(AM_CPPFLAGS) $(SUPERLU_CPPFLAGS)
 amgtest_CPPFLAGS = $(AM_CPPFLAGS) $(SUPERLU_CPPFLAGS)
-amgtest_LDFLAGS = $(MPI_LDFLAGS) $(MPI_LIBS) $(SUPERLU_LIBS)
+amgtest_LDFLAGS = $(AM_LDFLAGS) $(SUPERLU_LDFLAGS)
+amgtest_LDADD = $(SUPERLU_LIBS)
 
 pamgtest_SOURCES = parallelamgtest.cc
-pamgtest_CXXFLAGS = $(MPI_CPPFLAGS) $(SUPERLU_CPPFLAGS)
-pamgtest_LDFLAGS = $(MPI_LDFLAGS) $(MPI_LIBS) $(SUPERLU_LIBS)
+pamgtest_CPPFLAGS = $(AM_CPPFLAGS) $(MPI_CPPFLAGS) $(SUPERLU_CPPFLAGS)
+pamgtest_LDFLAGS = $(AM_LDFLAGS) $(MPI_LDFLAGS)
+pamgtest_LDADD = $(MPI_LIBS) $(SUPERLU_LIBS)
 
 include $(top_srcdir)/am/global-rules
diff --git a/istl/paamg/test/amgtest.cc b/istl/paamg/test/amgtest.cc
index 4ed534f183fa1422d331d5e2d7d0d005b177b11b..ebb22d2e892c9f34b32372b1f84c0813613da07e 100644
--- a/istl/paamg/test/amgtest.cc
+++ b/istl/paamg/test/amgtest.cc
@@ -28,6 +28,7 @@ int main(int argc, char** argv)
 {
 
   const int BS=1;
+
   int N=500/BS;
   int coarsenTarget=1200;
   int ml=10;
@@ -37,6 +38,7 @@ int main(int argc, char** argv)
 
   if(argc>2)
     coarsenTarget = atoi(argv[2]);
+
   if(argc>3)
     ml = atoi(argv[3]);
 
@@ -91,6 +93,11 @@ int main(int argc, char** argv)
 
   smootherArgs.iterations = 1;
 
+  //smootherArgs.overlap=SmootherArgs::vertex;
+  //smootherArgs.overlap=SmootherArgs::none;
+  //smootherArgs.overlap=SmootherArgs::aggregate;
+
+  smootherArgs.relaxationFactor = 1;
 
   Criterion criterion(15,coarsenTarget);
   criterion.setMaxDistance(4);
diff --git a/istl/paamg/test/anisotropic.hh b/istl/paamg/test/anisotropic.hh
index 08c6061d5383d2dfd9093aa463fb929a5ee1518d..2fd60bb44d2b9a544531c8d48e477338f723105f 100644
--- a/istl/paamg/test/anisotropic.hh
+++ b/istl/paamg/test/anisotropic.hh
@@ -104,7 +104,7 @@ void fillValues(int N, M& mat, int overlapStart, int overlapEnd, int start, int
 
     ColIterator endi = (*i).end();
 
-    if(x<start || x >= end || x==0 || x==N-1 || y==0 || y==N-1) {
+    if(x<start || x >= end) { // || x==0 || x==N-1 || y==0 || y==N-1){
       // overlap node is dirichlet border
       ColIterator j = (*i).begin();
 
@@ -139,7 +139,7 @@ void setBoundary(Dune::BlockVector<Dune::FieldVector<double,BS> >& lhs,
 
     if(x==0 || y ==0 || x==n-1 || y==n-1) {
       double h = 1.0 / ((double) (n-1));
-      lhs[i->local()]=rhs[i->local()]=0;((double)x)*((double)y)*h*h;
+      lhs[i->local()]=rhs[i->local()]=0; //((double)x)*((double)y)*h*h;
     }
   }
 }
@@ -163,7 +163,7 @@ void setBoundary(Dune::BlockVector<Dune::FieldVector<double,BS> >& lhs,
         else
           y = ((double) j)*h;
 
-        lhs[j*N+i]=rhs[j*N+i]=x*y;
+        lhs[j*N+i]=rhs[j*N+i]=0; //x*y;
       }
 }
 
@@ -176,7 +176,7 @@ Dune::BCRSMatrix<Dune::FieldMatrix<double,BS,BS> > setupAnisotropic2d(int N, Dun
   typedef Dune::FieldMatrix<double,BS,BS> Block;
   typedef Dune::BCRSMatrix<Block> BCRSMat;
 
-  // calculate size of lokal matrix in the distributed direction
+  // calculate size of local matrix in the distributed direction
   int start, end, overlapStart, overlapEnd;
 
   int n = N/procs; // number of unknowns per process
@@ -187,8 +187,8 @@ Dune::BCRSMatrix<Dune::FieldMatrix<double,BS,BS> > setupAnisotropic2d(int N, Dun
     start = rank*(n+1);
     end   = (rank+1)*(n+1);
   }else{
-    start = bigger + rank * n;
-    end   = bigger + (rank + 1) * n;
+    start = bigger*(n+1) + rank * n;
+    end   = bigger*(n+1) + (rank + 1) * n;
   }
 
   // Compute overlap region
diff --git a/istl/paamg/test/parallelamgtest.cc b/istl/paamg/test/parallelamgtest.cc
index b1a2a41fd8e45c5b69ea0bb5d7e40b5a9b1396c5..1abc1f5e17773550f5282918aecb64be633961fa 100644
--- a/istl/paamg/test/parallelamgtest.cc
+++ b/istl/paamg/test/parallelamgtest.cc
@@ -1,6 +1,9 @@
 // -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 // vi: set et ts=4 sw=2 sts=2:
 #include "config.h"
+#ifdef TEST_AGGLO
+#define UNKNOWNS 10
+#endif
 #include "anisotropic.hh"
 #include <dune/common/timer.hh>
 #include <dune/istl/paamg/amg.hh>
@@ -11,7 +14,7 @@
 #include <dune/common/mpicollectivecommunication.hh>
 #include <string>
 
-template<class T>
+template<class T, class C>
 class DoubleStepPreconditioner
   : public Dune::Preconditioner<typename T::domain_type, typename T::range_type>
 {
@@ -21,8 +24,8 @@ public:
 
   enum {category = T::category};
 
-  DoubleStepPreconditioner(T& preconditioner_)
-    : preconditioner(&preconditioner_)
+  DoubleStepPreconditioner(T& preconditioner_, C& comm)
+    : preconditioner(&preconditioner_), comm_(comm)
   {}
 
   virtual void pre (X& x, Y& b)
@@ -33,7 +36,7 @@ public:
   virtual void apply(X& v, const Y& d)
   {
     preconditioner->apply(v,d);
-    preconditioner->apply(v,d);
+    comm_.copyOwnerToAll(v,v);
   }
 
   virtual void post (X& x)
@@ -42,6 +45,7 @@ public:
   }
 private:
   T* preconditioner;
+  C& comm_;
 };
 
 
@@ -85,6 +89,8 @@ void testAmg(int N, int coarsenTarget)
   typedef Dune::OverlappingSchwarzOperator<BCRSMat,Vector,Vector,Communication> Operator;
   int n;
 
+  N/=BS;
+
   Communication comm(MPI_COMM_WORLD);
 
   BCRSMat mat = setupAnisotropic2d<BS>(N, comm.indexSet(), comm.communicator(), &n, 1);
@@ -121,7 +127,9 @@ void testAmg(int N, int coarsenTarget)
   typedef Dune::Amg::CoarsenCriterion<Dune::Amg::SymmetricCriterion<BCRSMat,Dune::Amg::FirstDiagonal> >
   Criterion;
   //typedef Dune::SeqSSOR<BCRSMat,Vector,Vector> Smoother;
-  typedef Dune::SeqJac<BCRSMat,Vector,Vector> Smoother;
+  //typedef Dune::SeqJac<BCRSMat,Vector,Vector> Smoother;
+  typedef Dune::SeqILU0<BCRSMat,Vector,Vector> Smoother;
+  //typedef Dune::SeqILUn<BCRSMat,Vector,Vector> Smoother;
   typedef Dune::BlockPreconditioner<Vector,Vector,Communication,Smoother> ParSmoother;
   //typedef Dune::ParSSOR<BCRSMat,Vector,Vector,Communication> ParSmoother;
   typedef typename Dune::Amg::SmootherTraits<ParSmoother>::Arguments SmootherArgs;
@@ -143,44 +151,44 @@ void testAmg(int N, int coarsenTarget)
   //  criterion.setMaxLevel(1);
 
   typedef Dune::Amg::AMG<Operator,Vector,ParSmoother,Communication> AMG;
+  /*
+     AMG amg(fop, criterion, smootherArgs, 1, 2, comm);
 
-  AMG amg(fop, criterion, smootherArgs, 1, 2, comm);
+     buildtime = watch.elapsed();
 
-  buildtime = watch.elapsed();
+     if(rank==0)
+     std::cout<<"Building hierarchy took "<<buildtime<<" seconds"<<std::endl;
 
-  if(rank==0)
-    std::cout<<"Building hierarchy took "<<buildtime<<" seconds"<<std::endl;
+     Dune::CGSolver<Vector> amgCG(fop, sp, amg, 10e-8, 300, (rank==0)?2:0);
+     watch.reset();
 
-  Dune::CGSolver<Vector> amgCG(fop, sp, amg, 10e-8, 300, (rank==0) ? 2 : 0);
-  watch.reset();
+     amgCG.apply(x,b,r);
+     MPI_Barrier(MPI_COMM_WORLD);
 
-  amgCG.apply(x,b,r);
-  MPI_Barrier(MPI_COMM_WORLD);
+     double solvetime = watch.elapsed();
 
-  double solvetime = watch.elapsed();
+     if(!r.converged && rank==0)
+     std::cerr<<" AMG Cg solver did not converge!"<<std::endl;
 
-  if(!r.converged && rank==0)
-    std::cerr<<" AMG Cg solver did not converge!"<<std::endl;
+     if(rank==0){
+     std::cout<<"AMG solving took "<<solvetime<<" seconds"<<std::endl;
 
-  if(rank==0) {
-    std::cout<<"AMG solving took "<<solvetime<<" seconds"<<std::endl;
+     std::cout<<"AMG building took "<<(buildtime/r.elapsed*r.iterations)<<" iterations"<<std::endl;
+     std::cout<<"AMG building together with slving took "<<buildtime+solvetime<<std::endl;
+     }
+   */
+  Smoother ssm(fop.getmat(),1);
+  ParSmoother sm(ssm,comm);
+  DoubleStepPreconditioner<Smoother,Communication> dsp(ssm,comm);
+  Dune::CGSolver<Vector> cg(fop, sp, sm, 10e-08, 10, (rank==0) ? 2 : 0);
 
-    std::cout<<"AMG building took "<<(buildtime/r.elapsed*r.iterations)<<" iterations"<<std::endl;
-    std::cout<<"AMG building together with slving took "<<buildtime+solvetime<<std::endl;
-  }
-  /*
-     Smoother ssm(fop.getmat(),1,1);
-     ParSmoother sm(ssm,comm);
-     //DoubleStepPreconditioner<ParSmoother> dsp(sm);
-     Dune::LoopSolver<Vector> cg(fop, sp, sm, 10e-08, 30, (rank==0)?2:0);
+  watch.reset();
 
-     watch.reset();
+  cg.apply(x1,b1,r1);
 
-     cg.apply(x1,b1,r1);
+  if(!r1.converged && rank==0)
+    std::cerr<<" Cg solver did not converge!"<<std::endl;
 
-     if(!r1.converged && rank==0)
-     std::cerr<<" Cg solver did not converge!"<<std::endl;
-   */
   //std::cout<<"CG solving took "<<watch.elapsed()<<" seconds"<<std::endl;
 }
 
@@ -213,9 +221,9 @@ int main(int argc, char** argv)
   MPI_Errhandler_create(MPI_err_handler, &handler);
   MPI_Errhandler_set(MPI_COMM_WORLD, handler);
 
-  int N=500;
+  int N=100;
 
-  int coarsenTarget=1200;
+  int coarsenTarget=200;
 
   if(argc>1)
     N = atoi(argv[1]);
@@ -223,7 +231,10 @@ int main(int argc, char** argv)
   if(argc>2)
     coarsenTarget = atoi(argv[2]);
 
-  AMGTester<1,1>::test(N, coarsenTarget);
+#ifdef TEST_AGGLO
+  N=UNKNOWNS;
+#endif
+  AMGTester<1,6>::test(N, coarsenTarget);
   //AMGTester<1,5>::test(N, coarsenTarget);
   //  AMGTester<10,10>::test(N, coarsenTarget);