diff --git a/dune/istl/btdmatrix.hh b/dune/istl/btdmatrix.hh
index 72ebb6789125a951ec8bcb6ed43d1859f6ab8a61..a32b48f7a2ddae4c2ff48b5378d8cf0639ea2cff 100644
--- a/dune/istl/btdmatrix.hh
+++ b/dune/istl/btdmatrix.hh
@@ -102,11 +102,10 @@ namespace Dune {
       return *this;
     }
 
-    /** \brief Use the Thomas algorithm to solve the system Ax=b
+    /** \brief Use the Thomas algorithm to solve the system Ax=b in O(n) time
      *
      * \exception ISTLError if the matrix is singular
      *
-     * \todo Implementation currently only works for scalar matrices
      */
     template <class V>
     void solve (V& x, const V& rhs) const {
@@ -119,27 +118,52 @@ namespace Dune {
 
       // Make copies of the rhs and the right matrix band
       V d = rhs;
-      V c(this->N()-1);
+      std::vector<block_type> c(this->N()-1);
       for (size_t i=0; i<this->N()-1; i++)
         c[i] = (*this)[i][i+1];
 
       /* Modify the coefficients. */
-      c[0] /= (*this)[0][0];    /* Division by zero risk. */
-      d[0] /= (*this)[0][0];    /* Division by zero would imply a singular matrix. */
+      block_type a_00_inv;
+      FMatrixHelp::invertMatrix((*this)[0][0], a_00_inv);
+
+      //c[0] /= (*this)[0][0];	/* Division by zero risk. */
+      block_type c_0_tmp = c[0];
+      FMatrixHelp::multMatrix(a_00_inv, c_0_tmp, c[0]);
+
+      // d = a^{-1} d        /* Division by zero would imply a singular matrix. */
+      typename V::block_type d_0_tmp = d[0];
+      (*this)[0][0].solve(d[0], d_0_tmp);
 
       for (unsigned int i = 1; i < this->N(); i++) {
 
-        double id = 1.0 / ((*this)[i][i] - c[i-1] * (*this)[i][i-1]);      /* Division by zero risk. */
-        if (i<c.size())
-          c[i] *= id;                                                  /* Last value calculated is redundant. */
-        d[i] = (d[i] - d[i-1] * (*this)[i][i-1]) * id;
+        // id = ( a_ii - c_{i-1} a_{i, i-1} ) ^{-1}
+        block_type tmp;
+        FMatrixHelp::multMatrix(c[i-1], (*this)[i][i-1], tmp);
+        block_type id = (*this)[i][i];
+        id -= tmp;
+        id.invert();         /* Division by zero risk. */
+
+        if (i<c.size()) {
+          // c[i] *= id
+          tmp = c[i];
+          FMatrixHelp::multMatrix(tmp, id, c[i]);                      /* Last value calculated is redundant. */
+        }
+
+        // d[i] = (d[i] - d[i-1] * (*this)[i][i-1]) * id;
+        (*this)[i][i-1].mmv(d[i-1], d[i]);
+        typename V::block_type tmpVec = d[i];
+        id.mv(tmpVec, d[i]);
+        //d[i] *= id;
 
       }
 
       /* Now back substitute. */
       x[this->N() - 1] = d[this->N() - 1];
-      for (int i = this->N() - 2; i >= 0; i--)
-        x[i] = d[i] - c[i] * x[i + 1];
+      for (int i = this->N() - 2; i >= 0; i--) {
+        //x[i] = d[i] - c[i] * x[i + 1];
+        x[i] = d[i];
+        c[i].mmv(x[i+1], x[i]);
+      }
 
     }
 
diff --git a/dune/istl/test/matrixtest.cc b/dune/istl/test/matrixtest.cc
index 28e31847f906c7d84c9d52099d99918621d8ab6a..f9105a18070e7683094bb7118b2f5d634ec30ed8 100644
--- a/dune/istl/test/matrixtest.cc
+++ b/dune/istl/test/matrixtest.cc
@@ -354,28 +354,54 @@ int main()
 
   // ////////////////////////////////////////////////////////////////////////
   //   Test the BTDMatrix class -- a dynamic block-tridiagonal matrix
+  //   a) the scalar case
   // ////////////////////////////////////////////////////////////////////////
 
-  BTDMatrix<FieldMatrix<double,1,1> > btdMatrix(4);
+  BTDMatrix<FieldMatrix<double,1,1> > btdMatrixScalar(4);
   typedef BTDMatrix<FieldMatrix<double,1,1> >::size_type size_type;
 
-  btdMatrix = 4.0;
+  btdMatrixScalar = 4.0;
 
-  testSuperMatrix(btdMatrix);
+  testSuperMatrix(btdMatrixScalar);
+
+  btdMatrixScalar = 0.0;
+  for (size_type i=0; i<btdMatrixScalar.N(); i++)    // diagonal
+    btdMatrixScalar[i][i] = 1+i;
+
+  for (size_type i=0; i<btdMatrixScalar.N()-1; i++)
+    btdMatrixScalar[i][i+1] = 2+i;               // first off-diagonal
+
+  testSolve<BTDMatrix<FieldMatrix<double,1,1> >, BlockVector<FieldVector<double,1> > >(btdMatrixScalar);
+
+  // test a 1x1 BTDMatrix, because that is a special case
+  BTDMatrix<FieldMatrix<double,1,1> > btdMatrixScalar_1x1(1);
+  btdMatrixScalar_1x1 = 1.0;
+  testSuperMatrix(btdMatrixScalar_1x1);
+
+  // ////////////////////////////////////////////////////////////////////////
+  //   Test the BTDMatrix class -- a dynamic block-tridiagonal matrix
+  //   b) the block-valued case
+  // ////////////////////////////////////////////////////////////////////////
+
+  BTDMatrix<FieldMatrix<double,2,2> > btdMatrix(4);
+  typedef BTDMatrix<FieldMatrix<double,2,2> >::size_type size_type;
 
   btdMatrix = 0.0;
   for (size_type i=0; i<btdMatrix.N(); i++)    // diagonal
-    btdMatrix[i][i] = 1+i;
+    btdMatrix[i][i] = ScaledIdentityMatrix<double,2>(1+i);
 
   for (size_type i=0; i<btdMatrix.N()-1; i++)
-    btdMatrix[i][i+1] = 2+i;               // first off-diagonal
+    btdMatrix[i][i+1] = ScaledIdentityMatrix<double,2>(2+i);               // first off-diagonal
+
+  BTDMatrix<FieldMatrix<double,2,2> > btdMatrixThrowAway = btdMatrix;    // the test method overwrites the matrix
+  testSuperMatrix(btdMatrixThrowAway);
 
-  testSolve<BTDMatrix<FieldMatrix<double,1,1> >, BlockVector<FieldVector<double,1> > >(btdMatrix);
+  testSolve<BTDMatrix<FieldMatrix<double,2,2> >, BlockVector<FieldVector<double,2> > >(btdMatrix);
 
   // test a 1x1 BTDMatrix, because that is a special case
-  BTDMatrix<FieldMatrix<double,1,1> > btdMatrixScalar(1);
-  btdMatrixScalar = 1.0;
-  testSuperMatrix(btdMatrixScalar);
+  BTDMatrix<FieldMatrix<double,2,2> > btdMatrix_1x1(1);
+  btdMatrix_1x1 = 1.0;
+  testSuperMatrix(btdMatrix_1x1);
 
   // ////////////////////////////////////////////////////////////////////////
   //   Test the FieldMatrix class