diff --git a/dune/istl/paamg/amg.hh b/dune/istl/paamg/amg.hh
index 2a987a1470a3b1675924084655ce5e7efcd031ee..98b671cf5a6af50deb6b5bd9541b00005fe77623 100644
--- a/dune/istl/paamg/amg.hh
+++ b/dune/istl/paamg/amg.hh
@@ -378,7 +378,9 @@ namespace Dune
       enum SolverType { umfpack, superlu, none };
 
       static constexpr SolverType solver =
-#if HAVE_SUITESPARSE_UMFPACK
+#if DISABLE_AMG_DIRECTSOLVER
+        none;
+#elif HAVE_SUITESPARSE_UMFPACK
         UMFPackMethodChooser< field_type > :: valid ? umfpack : none ;
 #elif HAVE_SUPERLU
         superlu ;
diff --git a/dune/istl/solvers.hh b/dune/istl/solvers.hh
index 38080e797194df4d02b2ff58f30bbd9a383a4620..11747263285eb2d01e6cbf7fec8a001edde4c231 100644
--- a/dune/istl/solvers.hh
+++ b/dune/istl/solvers.hh
@@ -20,6 +20,8 @@
 #include "preconditioner.hh"
 #include <dune/common/deprecated.hh>
 #include <dune/common/exceptions.hh>
+#include <dune/common/conditional.hh>
+#include <dune/common/rangeutilities.hh>
 #include <dune/common/timer.hh>
 #include <dune/common/ftraits.hh>
 #include <dune/common/typetraits.hh>
@@ -29,7 +31,6 @@ namespace Dune {
       @{
    */
 
-
   /** \file
 
       \brief   Implementations of the inverse operator interface.
@@ -37,9 +38,7 @@ namespace Dune {
       This file provides various preconditioned Krylov methods.
    */
 
-
-
-   //=====================================================================
+  //=====================================================================
   // Implementation of this interface
   //=====================================================================
 
@@ -170,7 +169,7 @@ namespace Dune {
           this->printOutput(std::cout,i,defnew,def);
         //std::cout << i << " " << defnew << " " << defnew/def << std::endl;
         def = defnew;               // update norm
-        if (def<def0*_reduction || def<1E-30)    // convergence check
+        if (all_true(def<def0*_reduction) || max_value(def)<1E-30)    // convergence check
         {
           res.converged  = true;
           break;
@@ -189,7 +188,7 @@ namespace Dune {
 
       // fill statistics
       res.iterations = i;
-      res.reduction = def/def0;
+      res.reduction = max_value(def/def0);
       res.conv_rate  = pow(res.reduction,1.0/i);
       res.elapsed = watch.elapsed();
 
@@ -312,7 +311,7 @@ namespace Dune {
           this->printOutput(std::cout,i,defnew,def);
 
         def = defnew;               // update norm
-        if (def<def0*_reduction || def<1E-30)    // convergence check
+        if (all_true(def<def0*_reduction) || max_value(def)<1E-30)    // convergence check
         {
           res.converged  = true;
           break;
@@ -327,8 +326,8 @@ namespace Dune {
 
       _prec.post(x);                  // postprocess preconditioner
       res.iterations = i;               // fill statistics
-      res.reduction = static_cast<double>(def/def0);
-      res.conv_rate  = static_cast<double>(pow(res.reduction,1.0/i));
+      res.reduction = static_cast<double>(max_value(def/def0));
+      res.conv_rate  = pow(res.reduction,1.0/i);
       res.elapsed = watch.elapsed();
       if (_verbose>0)                 // final print
         std::cout << "=== rate=" << res.conv_rate
@@ -429,7 +428,7 @@ namespace Dune {
 
       real_type def0 = _sp.norm(b); // compute norm
 
-      if (!isfinite(def0)) // check for inf or NaN
+      if (!all_true(isfinite(def0))) // check for inf or NaN
       {
         if (_verbose>0)
           std::cout << "=== CGSolver: abort due to infinite or NaN initial defect"
@@ -438,7 +437,7 @@ namespace Dune {
                    << " is infinite or NaN");
       }
 
-      if (def0<1E-30)    // convergence check
+      if (max_value(def0)<1E-30)    // convergence check
       {
         res.converged  = true;
         res.iterations = 0;               // fill statistics
@@ -457,7 +456,7 @@ namespace Dune {
         std::cout << "=== CGSolver" << std::endl;
         if (_verbose>1) {
           this->printHeader(std::cout);
-          this->printOutput(std::cout,real_type(0),def0);
+          this->printOutput(std::cout,0,def0);
         }
       }
 
@@ -485,10 +484,10 @@ namespace Dune {
         real_type defnew=_sp.norm(b); // comp defect norm
 
         if (_verbose>1)             // print
-          this->printOutput(std::cout,real_type(i),defnew,def);
+          this->printOutput(std::cout,i,defnew,def);
 
         def = defnew;               // update norm
-        if (!isfinite(def)) // check for inf or NaN
+        if (!all_true(isfinite(def))) // check for inf or NaN
         {
           if (_verbose>0)
             std::cout << "=== CGSolver: abort due to infinite or NaN defect"
@@ -497,7 +496,7 @@ namespace Dune {
                      "CGSolver: defect=" << def << " is infinite or NaN");
         }
 
-        if (def<def0*_reduction || def<1E-30)    // convergence check
+        if (all_true(def<def0*_reduction) || max_value(def)<1E-30)    // convergence check
         {
           res.converged  = true;
           break;
@@ -517,12 +516,12 @@ namespace Dune {
       i=std::min(_maxit,i);
 
       if (_verbose==1)                // printing for non verbose
-        this->printOutput(std::cout,real_type(i),def);
+        this->printOutput(std::cout,i,def);
 
       _prec.post(x);                  // postprocess preconditioner
-      res.iterations = i;               // fill statistics
-      res.reduction = static_cast<double>(def/def0);
-      res.conv_rate  = static_cast<double>(pow(res.reduction,1.0/i));
+      res.iterations = i;             // fill statistics
+      res.reduction = static_cast<double>(max_value(max_value(def/def0)));
+      res.conv_rate  = pow(res.reduction,1.0/i);
       res.elapsed = watch.elapsed();
 
       if (_verbose>0)                 // final print
@@ -621,6 +620,7 @@ namespace Dune {
     {
       using std::abs;
       const real_type EPSILON=1e-80;
+      using std::abs;
       double it;
       field_type rho, rho_new, alpha, beta, h, omega;
       real_type norm, norm_old, norm_0;
@@ -668,7 +668,7 @@ namespace Dune {
         }
       }
 
-      if ( norm < (_reduction * norm_0)  || norm<1E-30)
+      if ( all_true(norm < (_reduction * norm_0))  || max_value(norm)<1E-30)
       {
         res.converged = 1;
         _prec.post(x);                  // postprocess preconditioner
@@ -693,13 +693,13 @@ namespace Dune {
         rho_new = _sp.dot(rt,r);
 
         // look if breakdown occurred
-        if (abs(rho) <= EPSILON)
+        if (all_true(abs(rho) <= EPSILON))
           DUNE_THROW(SolverAbort,"breakdown in BiCGSTAB - rho "
-                     << rho << " <= EPSILON " << EPSILON
+                     << rho << " <= EPSILON " << max_value(EPSILON)
                      << " after " << it << " iterations");
-        if (abs(omega) <= EPSILON)
+        if (all_true(abs(omega) <= EPSILON))
           DUNE_THROW(SolverAbort,"breakdown in BiCGSTAB - omega "
-                     << omega << " <= EPSILON " << EPSILON
+                     << omega << " <= EPSILON " << max_value(EPSILON)
                      << " after " << it << " iterations");
 
 
@@ -723,9 +723,9 @@ namespace Dune {
         // alpha = rho_new / < rt, v >
         h = _sp.dot(rt,v);
 
-        if (abs(h) < EPSILON)
+        if ( all_true(abs(h) < EPSILON) )
           DUNE_THROW(SolverAbort,"abs(h) < EPSILON in BiCGSTAB - abs(h) "
-                     << abs(h) << " < EPSILON " << EPSILON
+                     << abs(h) << " < EPSILON " << max_value(EPSILON)
                      << " after " << it << " iterations");
 
         alpha = rho_new / h;
@@ -748,7 +748,7 @@ namespace Dune {
           this->printOutput(std::cout,it,norm,norm_old);
         }
 
-        if ( norm < (_reduction * norm_0) )
+        if ( all_true(norm < (_reduction * norm_0)) )
         {
           res.converged = 1;
           break;
@@ -787,7 +787,7 @@ namespace Dune {
           this->printOutput(std::cout,it,norm,norm_old);
         }
 
-        if ( norm < (_reduction * norm_0)  || norm<1E-30)
+        if ( all_true(norm < (_reduction * norm_0))  || max_value(norm)<1E-30)
         {
           res.converged = 1;
           break;
@@ -804,8 +804,8 @@ namespace Dune {
 
       _prec.post(x);                  // postprocess preconditioner
       res.iterations = static_cast<int>(std::ceil(it));              // fill statistics
-      res.reduction = static_cast<double>(norm/norm_0);
-      res.conv_rate  = static_cast<double>(pow(res.reduction,1.0/it));
+      res.reduction = static_cast<double>(max_value(norm/norm_0));
+      res.conv_rate  = pow(res.reduction,1.0/it);
       res.elapsed = watch.elapsed();
       if (_verbose>0)                 // final print
         std::cout << "=== rate=" << res.conv_rate
@@ -918,7 +918,7 @@ namespace Dune {
       }
 
       // check for convergence
-      if(def0 < 1e-30 ) {
+      if( max_value(def0) < 1e-30 ) {
         res.converged = true;
         res.iterations = 0;
         res.reduction = 0;
@@ -1045,7 +1045,8 @@ namespace Dune {
             this->printOutput(std::cout,i,defnew,def);
 
           def = defnew;
-          if(def < def0*_reduction || def < 1e-30 || i == _maxit ) {
+          if(all_true(def < def0*_reduction)
+              || max_value(def) < 1e-30 || i == _maxit ) {
             res.converged = true;
             break;
           }
@@ -1058,8 +1059,8 @@ namespace Dune {
         _prec.post(x);
         // fill statistics
         res.iterations = i;
-        res.reduction = static_cast<double>(def/def0);
-        res.conv_rate = static_cast<double>(pow(res.reduction,1.0/i));
+        res.reduction = static_cast<double>(max_value(def/def0));
+        res.conv_rate = pow(res.reduction,1.0/i);
         res.elapsed = watch.elapsed();
 
         // final print
@@ -1086,35 +1087,50 @@ namespace Dune {
 
   private:
 
+    // helper function to extract the real value of a real or complex number
+    inline
+    real_type to_real(const real_type & v)
+    {
+      return v;
+    }
+
+    inline
+    real_type to_real(const std::complex<real_type> & v)
+    {
+      return v.real();
+    }
+
     void generateGivensRotation(field_type &dx, field_type &dy, real_type &cs, field_type &sn)
     {
       using std::sqrt;
       using std::abs;
+      using std::max;
+      using std::min;
+      const real_type eps = 1e-15;
       real_type norm_dx = abs(dx);
       real_type norm_dy = abs(dy);
-      if(norm_dy < 1e-15) {
-        cs = 1.0;
-        sn = 0.0;
-      } else if(norm_dx < 1e-15) {
-        cs = 0.0;
-        sn = 1.0;
-      } else if(norm_dy > norm_dx) {
-        real_type temp = norm_dx/norm_dy;
-        cs = 1.0/sqrt(1.0 + temp*temp);
-        sn = cs;
-        cs *= temp;
-        sn *= dx/norm_dx;
-        // dy is real in exact arithmetic
-        // so we don't need to conjugate here
-        sn *= dy/norm_dy;
-      } else {
-        real_type temp = norm_dy/norm_dx;
-        cs = 1.0/sqrt(1.0 + temp*temp);
-        sn = cs;
-        sn *= dy/dx;
-        // dy and dx is real in exact arithmetic
-        // so we don't have to conjugate both of them
-      }
+      real_type norm_max = max(norm_dx, norm_dy);
+      real_type norm_min = min(norm_dx, norm_dy);
+      real_type temp = norm_min/norm_max;
+      // we rewrite the code in a vectorizable fashion
+      cs = cond(norm_dy < eps,
+        real_type(1.0),
+        cond(norm_dx < eps,
+          real_type(0.0),
+          cond(norm_dy > norm_dx,
+            real_type(1.0)/sqrt(1.0 + temp*temp)*temp,
+            // dy and dx are real in exact arithmetic
+            // thus dx*dy is real so we can explicitly enforce it
+            real_type(1.0)/sqrt(1.0 + temp*temp)*to_real(dx*dy)/norm_dx/norm_dy)));
+      sn = cond(norm_dy < eps,
+        field_type(0.0),
+        cond(norm_dx < eps,
+          field_type(1.0),
+          cond(norm_dy > norm_dx,
+            field_type(1.0)/sqrt(1.0 + temp*temp),
+            // dy and dx is real in exact arithmetic
+            // so we don't have to conjugate both of them
+            field_type(1.0)/sqrt(1.0 + temp*temp)*dy/dx)));
     }
 
     SeqScalarProduct<X> ssp;
@@ -1236,7 +1252,7 @@ namespace Dune {
      */
     virtual void apply (X& x, Y& b, InverseOperatorResult& res)
     {
-      apply(x,b,_reduction,res);
+      apply(x,b,max_value(_reduction),res);
     }
 
     /*!
@@ -1289,7 +1305,7 @@ namespace Dune {
           }
         }
 
-      if(norm_0 < EPSILON) {
+      if(all_true(norm_0 < EPSILON)) {
         _W.post(x);
         res.converged = true;
         if(_verbose > 0) // final print
@@ -1321,7 +1337,7 @@ namespace Dune {
             w.axpy(-H[k][i],v[k]);
           }
           H[i+1][i] = _sp.norm(w);
-          if(abs(H[i+1][i]) < EPSILON)
+          if(all_true(abs(H[i+1][i]) < EPSILON))
             DUNE_THROW(SolverAbort,
                        "breakdown in GMRes - |w| == 0.0 after " << j << " iterations");
 
@@ -1349,7 +1365,7 @@ namespace Dune {
           norm_old = norm;
 
           // check convergence
-          if(norm < reduction * norm_0)
+          if(all_true(norm < reduction * norm_0))
             res.converged = true;
 
         } // end for
@@ -1385,8 +1401,8 @@ namespace Dune {
 
       // save solver statistics
       res.iterations = j-1; // it has to be j-1!!!
-      res.reduction = static_cast<double>(norm/norm_0);
-      res.conv_rate = static_cast<double>(pow(res.reduction,1.0/(j-1)));
+      res.reduction = static_cast<double>(max_value(norm/norm_0));
+      res.conv_rate = pow(res.reduction,1.0/(j-1));
       res.elapsed = watch.elapsed();
 
       if(_verbose>0)
@@ -1426,41 +1442,57 @@ namespace Dune {
     }
 
     template<typename T>
-    typename std::enable_if<std::is_same<field_type,real_type>::value,T>::type conjugate(const T& t) {
+    typename enable_if<std::is_same<field_type,real_type>::value,T>::type conjugate(const T& t) {
       return t;
     }
 
     template<typename T>
-    typename std::enable_if<!std::is_same<field_type,real_type>::value,T>::type conjugate(const T& t) {
+    typename enable_if<!std::is_same<field_type,real_type>::value,T>::type conjugate(const T& t) {
+      using std::conj;
       return conj(t);
     }
 
+    // helper function to extract the real value of a real or complex number
+    inline
+    real_type to_real(const real_type & v)
+    {
+      return v;
+    }
+
+    inline
+    real_type to_real(const std::complex<real_type> & v)
+    {
+      return v.real();
+    }
+
     void
     generatePlaneRotation(field_type &dx, field_type &dy, real_type &cs, field_type &sn)
     {
       using std::sqrt;
       using std::abs;
+      using std::max;
+      using std::min;
+      const real_type eps = 1e-15;
       real_type norm_dx = abs(dx);
       real_type norm_dy = abs(dy);
-      if(norm_dy < 1e-15) {
-        cs = 1.0;
-        sn = 0.0;
-      } else if(norm_dx < 1e-15) {
-        cs = 0.0;
-        sn = 1.0;
-      } else if(norm_dy > norm_dx) {
-        real_type temp = norm_dx/norm_dy;
-        cs = 1.0/sqrt(1.0 + temp*temp);
-        sn = cs;
-        cs *= temp;
-        sn *= dx/norm_dx;
-        sn *= conjugate(dy)/norm_dy;
-      } else {
-        real_type temp = norm_dy/norm_dx;
-        cs = 1.0/sqrt(1.0 + temp*temp);
-        sn = cs;
-        sn *= conjugate(dy/dx);
-      }
+      real_type norm_max = max(norm_dx, norm_dy);
+      real_type norm_min = min(norm_dx, norm_dy);
+      real_type temp = norm_min/norm_max;
+      // we rewrite the code in a vectorizable fashion
+      cs = cond(norm_dy < eps,
+        real_type(1.0),
+        cond(norm_dx < eps,
+          real_type(0.0),
+          cond(norm_dy > norm_dx,
+            real_type(1.0)/sqrt(1.0 + temp*temp)*temp,
+            real_type(1.0)/sqrt(1.0 + temp*temp)*to_real(dx*conjugate(dy))/norm_dx/norm_dy)));
+      sn = cond(norm_dy < eps,
+        field_type(0.0),
+        cond(norm_dx < eps,
+          field_type(1.0),
+          cond(norm_dy > norm_dx,
+            field_type(1.0)/sqrt(1.0 + temp*temp),
+            field_type(1.0)/sqrt(1.0 + temp*temp)*conjugate(dy/dx))));
     }
 
 
@@ -1566,7 +1598,7 @@ namespace Dune {
       p[0].reset(new X(x));
 
       real_type def0 = _sp.norm(b);    // compute norm
-      if (def0<1E-30)        // convergence check
+      if ( max_value(def0) < 1E-30 )   // convergence check
       {
         res.converged  = true;
         res.iterations = 0;                     // fill statistics
@@ -1609,7 +1641,7 @@ namespace Dune {
       if (_verbose>1)                 // print
         this->printOutput(std::cout,++i,defnew,def);
       def = defnew;                   // update norm
-      if (def<def0*_reduction || def<1E-30)        // convergence check
+      if (all_true(def<def0*_reduction) || max_value(def)<1E-30) // convergence check
       {
         res.converged  = true;
         if (_verbose>0)                       // final print
@@ -1655,7 +1687,7 @@ namespace Dune {
             this->printOutput(std::cout,++i,defNew,def);
 
           def = defNew;                       // update norm
-          if (def<def0*_reduction || def<1E-30)            // convergence check
+          if (all_true(def<def0*_reduction) || max_value(def)<1E-30) // convergence check
           {
             res.converged  = true;
             break;
@@ -1674,7 +1706,7 @@ namespace Dune {
 
       // fill statistics
       res.iterations = i;
-      res.reduction = def/def0;
+      res.reduction = max_value(def/def0);
       res.conv_rate  = pow(res.reduction,1.0/i);
       res.elapsed = watch.elapsed();
 
diff --git a/dune/istl/test/CMakeLists.txt b/dune/istl/test/CMakeLists.txt
index 1bc7bd38bcb9f52fb0cf7ca7a812a784e6dab280..a78ee0b83118583ad19faa45f63b7c671e5a51dd 100644
--- a/dune/istl/test/CMakeLists.txt
+++ b/dune/istl/test/CMakeLists.txt
@@ -20,6 +20,11 @@ dune_add_test(SOURCES matrixutilstest.cc)
 
 dune_add_test(SOURCES matrixtest.cc)
 
+if(Vc_FOUND)
+dune_add_test(SOURCES multirhstest.cc)
+  add_dune_vc_flags(multirhstest)
+endif(Vc_FOUND)
+
 dune_add_test(SOURCES bvectortest.cc)
 
 dune_add_test(SOURCES vbvectortest.cc)
@@ -50,35 +55,35 @@ dune_add_test(SOURCES solvertest.cc)
 dune_add_test(SOURCES solveraborttest.cc)
 
 # Pardiso tests
-dune_add_test(SOURCES test_pardiso.cc)
+  dune_add_test(SOURCES test_pardiso.cc)
 
 # SuperLU tests
-dune_add_test(NAME superlustest
-              SOURCES superlutest.cc
-              COMPILE_DEFINITIONS SUPERLU_NTYPE=0)
+  dune_add_test(NAME superlustest
+                SOURCES superlutest.cc
+                COMPILE_DEFINITIONS SUPERLU_NTYPE=0)
 
-dune_add_test(SOURCES superlutest.cc)
+  dune_add_test(SOURCES superlutest.cc)
 
-dune_add_test(NAME superluctest
-              SOURCES superlutest.cc
-              COMPILE_DEFINITIONS SUPERLU_NTYPE=2)
+  dune_add_test(NAME superluctest
+                SOURCES superlutest.cc
+                COMPILE_DEFINITIONS SUPERLU_NTYPE=2)
 
-dune_add_test(NAME superluztest
-              SOURCES superlutest.cc
-              COMPILE_DEFINITIONS SUPERLU_NTYPE=3)
+  dune_add_test(NAME superluztest
+                SOURCES superlutest.cc
+                COMPILE_DEFINITIONS SUPERLU_NTYPE=3)
 
-dune_add_test(SOURCES complexrhstest.cc
-              COMPILE_DEFINITIONS SUPERLU_NTYPE=3)
+  dune_add_test(SOURCES complexrhstest.cc
+                COMPILE_DEFINITIONS SUPERLU_NTYPE=3)
 
 # SuiteSparse tests
-dune_add_test(SOURCES umfpacktest.cc)
-set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "umfpack_decomp")
+  dune_add_test(SOURCES umfpacktest.cc)
+  set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "umfpack_decomp")
 
 dune_add_test(SOURCES ldltest.cc)
 
 dune_add_test(SOURCES spqrtest.cc)
 
-dune_add_test(SOURCES overlappingschwarztest.cc)
+  dune_add_test(SOURCES overlappingschwarztest.cc)
 
 # MPI tests
 dune_add_test(SOURCES matrixredisttest.cc
@@ -87,6 +92,6 @@ dune_add_test(SOURCES matrixredisttest.cc
 dune_add_test(SOURCES vectorcommtest.cc
               CMAKE_GUARD MPI_FOUND)
 
-dune_add_test(SOURCES matrixmarkettest.cc)
+  dune_add_test(SOURCES matrixmarkettest.cc)
 
 exclude_from_headercheck(complexdata.hh)
diff --git a/dune/istl/test/multirhstest.cc b/dune/istl/test/multirhstest.cc
new file mode 100644
index 0000000000000000000000000000000000000000..06654bc3c0baa32e9e8ad26600f7333e5ecfb712
--- /dev/null
+++ b/dune/istl/test/multirhstest.cc
@@ -0,0 +1,191 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+
+// start with including some headers
+#include "config.h"
+
+#define DISABLE_AMG_DIRECTSOLVER 1
+
+// #undef HAVE_VC
+
+#include <iostream>               // for input/output to shell
+#include <fstream>                // for input/output to files
+#include <vector>                 // STL vector class
+#include <complex>
+
+#include <cmath>                 // Yes, we do some math here
+#include <sys/times.h>            // for timing measurements
+
+#include <dune/common/classname.hh>
+#include <dune/common/fvector.hh>
+#include <dune/common/fmatrix.hh>
+#include <dune/common/simd.hh>
+#include <dune/common/timer.hh>
+#include <dune/istl/istlexception.hh>
+#include <dune/istl/basearray.hh>
+#include <dune/istl/bvector.hh>
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/operators.hh>
+#include <dune/istl/solvers.hh>
+#include <dune/istl/preconditioners.hh>
+#include <dune/istl/paamg/amg.hh>
+#include <dune/istl/paamg/pinfo.hh>
+
+#include "laplacian.hh"
+
+template<typename T>
+struct Random {
+  static T gen()
+  {
+    return drand48();
+  }
+};
+
+#if HAVE_VC
+template<typename T, typename A>
+struct Random<Vc::Vector<T,A>> {
+  static Vc::Vector<T,A> gen()
+  {
+    return Vc::Vector<T,A>::Random();
+  }
+};
+
+template<typename T, std::size_t N, typename V, std::size_t M>
+struct Random<Vc::SimdArray<T,N,V,M>> {
+  static Vc::SimdArray<T,N,V,M> gen()
+  {
+    return Vc::SimdArray<T,N,V,M>::Random();
+  }
+};
+#endif
+
+template <typename V>
+V detectVectorType(Dune::LinearOperator<V,V> &);
+
+template<typename Operator, typename Solver>
+void run_test (std::string precName, std::string solverName, Operator & op, Solver & solver, unsigned int N, unsigned int Runs)
+{
+  using Vector = decltype(detectVectorType(op));
+  using FT = typename Vector::field_type;
+
+  Dune::Timer t;
+  std::cout << "Trying " << solverName << "(" << precName << ")"
+            << " with " << Dune::className<FT>() << std::endl;
+  for (unsigned int run = 0; run < Runs; run++) {
+    // set up system
+    Vector x(N),b(N);
+    for (unsigned int i=0; i<N; i++)
+      x[i] += Random<FT>::gen();
+    b=0; op.apply(x,b);    // set right hand side accordingly
+    x=1;                   // initial guess
+
+    // call the solver
+    Dune::InverseOperatorResult r;
+    solver.apply(x,b,r);
+  }
+  std::cout << Runs << " run(s) took " << t.stop() << std::endl;
+}
+
+template<typename Operator, typename Prec>
+void test_all_solvers(std::string precName, Operator & op, Prec & prec, unsigned int N, unsigned int Runs)
+{
+  using Vector = decltype(detectVectorType(op));
+
+  double reduction = 1e-4;
+  int verb = 1;
+  Dune::LoopSolver<Vector> loop(op,prec,reduction,18000,verb);
+  Dune::CGSolver<Vector> cg(op,prec,reduction,8000,verb);
+  Dune::BiCGSTABSolver<Vector> bcgs(op,prec,reduction,8000,verb);
+  Dune::GradientSolver<Vector> grad(op,prec,reduction,18000,verb);
+  Dune::RestartedGMResSolver<Vector> gmres(op,prec,reduction,40,8000,verb);
+  Dune::MINRESSolver<Vector> minres(op,prec,reduction,8000,verb);
+  Dune::GeneralizedPCGSolver<Vector> gpcg(op,prec,reduction,8000,verb);
+
+  // run_test(precName, "Loop",           op,loop,N,Runs);
+  run_test(precName, "CG",             op,cg,N,Runs);
+  run_test(precName, "BiCGStab",       op,bcgs,N,Runs);
+  run_test(precName, "Gradient",       op,grad,N,Runs);
+  run_test(precName, "RestartedGMRes", op,gmres,N,Runs);
+  run_test(precName, "MINRes",         op,minres,N,Runs);
+  run_test(precName, "GeneralizedPCG", op,gpcg,N,Runs);
+}
+
+template<typename FT>
+void test_all(unsigned int Runs = 1)
+{
+  // define Types
+  typedef Dune::FieldVector<FT,1> VB;
+  typedef Dune::FieldMatrix<double,1,1> MB;
+  typedef Dune::BlockVector<VB> Vector;
+  typedef Dune::BCRSMatrix<MB> Matrix;
+
+  // size
+  unsigned int size = 100;
+  unsigned int N = size*size;
+
+  // make a compressed row matrix with five point stencil
+  Matrix A;
+  setupLaplacian(A,size);
+  typedef Dune::MatrixAdapter<Matrix,Vector,Vector> Operator;
+  Operator op(A);        // make linear operator from A
+
+  // create all preconditioners
+  Dune::SeqJac<Matrix,Vector,Vector> jac(A,1,0.1);          // Jacobi preconditioner
+  Dune::SeqGS<Matrix,Vector,Vector> gs(A,1,0.1);          // GS preconditioner
+  Dune::SeqSOR<Matrix,Vector,Vector> sor(A,1,0.1);  // SOR preconditioner
+  Dune::SeqSSOR<Matrix,Vector,Vector> ssor(A,1,0.1);      // SSOR preconditioner
+  Dune::SeqILU0<Matrix,Vector,Vector> ilu0(A,0.1);        // preconditioner object
+  Dune::SeqILUn<Matrix,Vector,Vector> ilu1(A,1,0.1);     // preconditioner object
+
+  // AMG
+  typedef Dune::Amg::RowSum Norm;
+  typedef Dune::Amg::CoarsenCriterion<Dune::Amg::UnSymmetricCriterion<Matrix,Norm> >
+          Criterion;
+  typedef Dune::SeqSSOR<Matrix,Vector,Vector> Smoother;
+  typedef typename Dune::Amg::SmootherTraits<Smoother>::Arguments SmootherArgs;
+  SmootherArgs smootherArgs;
+  smootherArgs.iterations = 1;
+  smootherArgs.relaxationFactor = 1;
+  unsigned int coarsenTarget = 1000;
+  unsigned int maxLevel = 10;
+  Criterion criterion(15,coarsenTarget);
+  criterion.setDefaultValuesIsotropic(2);
+  criterion.setAlpha(.67);
+  criterion.setBeta(1.0e-4);
+  criterion.setMaxLevel(maxLevel);
+  criterion.setSkipIsolated(false);
+  criterion.setNoPreSmoothSteps(1);
+  criterion.setNoPostSmoothSteps(1);
+  Dune::SeqScalarProduct<Vector> sp;
+  typedef Dune::Amg::AMG<Operator,Vector,Smoother> AMG;
+  Smoother smoother(A,1,1);
+  AMG amg(op, criterion, smootherArgs);
+
+  // run the sub-tests
+  test_all_solvers("Jacobi",      op,jac,N,Runs);
+  test_all_solvers("GaussSeidel", op,gs,N,Runs);
+  test_all_solvers("SOR",         op,sor,N,Runs);
+  test_all_solvers("SSOR",        op,ssor,N,Runs);
+  test_all_solvers("ILU0",        op,ilu0,N,Runs);
+  test_all_solvers("ILU1",        op,ilu1,N,Runs);
+  test_all_solvers("AMG",         op,amg,N,Runs);
+}
+
+int main (int argc, char ** argv)
+{
+  test_all<float>();
+  test_all<double>();
+#if HAVE_VC
+  // test_all<Vc::float_v>();
+  test_all<Vc::double_v>();
+  test_all<Vc::Vector<double, Vc::VectorAbi::Scalar>>();
+  test_all<Vc::SimdArray<double,2>>();
+  test_all<Vc::SimdArray<double,2,Vc::Vector<double, Vc::VectorAbi::Scalar>,1>>();
+  test_all<Vc::SimdArray<double,8>>();
+  test_all<Vc::SimdArray<double,8,Vc::Vector<double, Vc::VectorAbi::Scalar>,1>>();
+#endif
+
+  test_all<double>(8);
+
+  return 0;
+}