From 4b0385859ea1bd0a863fb86d67ceb866b61a4946 Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.r.kempf@gmail.com>
Date: Thu, 19 Sep 2013 15:28:29 +0200
Subject: [PATCH] [FEATURE] Add UMFPack support to dune-istl

This commit contains
- a wrapper for the C library UMFPack
- tests for both CMake and autotools to find UMFPack on the system
- a unit test for UMFPack
- a new MatrixType ColCompMatrix which is a base class for a column
  compressed matrix. SuperLUMatrix now inherits from this base class.
  This way, no code is duplicated for the very similar interface in
  UMFPack.
- the SuperLU-specific part of OverlappingSchwarz code is abstracted
  to work with either SuperLU or UMFPack
---
 cmake/modules/AddUMFPackFlags.cmake      |  18 +
 cmake/modules/CMakeLists.txt             |   4 +-
 cmake/modules/DuneIstlMacros.cmake       |   2 +
 cmake/modules/FindUMFPack.cmake          | 112 ++++
 cmake/modules/Makefile.am                |   7 +-
 config.h.cmake                           |   3 +
 dune/istl/CMakeLists.txt                 |   2 +
 dune/istl/Makefile.am                    |   5 +-
 dune/istl/colcompmatrix.hh               | 597 +++++++++++++++++++++
 dune/istl/overlappingschwarz.hh          | 138 ++---
 dune/istl/solvertype.hh                  |  12 +
 dune/istl/superlu.hh                     |  19 +-
 dune/istl/supermatrix.hh                 | 655 +++--------------------
 dune/istl/test/CMakeLists.txt            |  30 +-
 dune/istl/test/Makefile.am               |  34 +-
 dune/istl/test/overlappingschwarztest.cc |  17 +-
 dune/istl/test/umfpacktest.cc            |  80 +++
 dune/istl/umfpack.hh                     | 460 ++++++++++++++++
 m4/dune_istl.m4                          |   1 +
 m4/umfpack.m4                            | 141 +++++
 20 files changed, 1660 insertions(+), 677 deletions(-)
 create mode 100644 cmake/modules/AddUMFPackFlags.cmake
 create mode 100644 cmake/modules/FindUMFPack.cmake
 create mode 100644 dune/istl/colcompmatrix.hh
 create mode 100644 dune/istl/test/umfpacktest.cc
 create mode 100644 dune/istl/umfpack.hh
 create mode 100644 m4/umfpack.m4

diff --git a/cmake/modules/AddUMFPackFlags.cmake b/cmake/modules/AddUMFPackFlags.cmake
new file mode 100644
index 00000000..5ed6cdc2
--- /dev/null
+++ b/cmake/modules/AddUMFPackFlags.cmake
@@ -0,0 +1,18 @@
+# module providing convenience mehtods for compiling bianries with UMFPack support
+#
+# Provides the following functions:
+#
+# add_dune_umfpack_flags(target1 target2...)
+#
+# adds UMFPack flags to the targets for compilation and linking
+function(add_dune_umfpack_flags _targets)
+  if(UMFPACK_FOUND)
+    foreach(_target ${_targets})
+      target_link_libraries(${_target} ${UMFPACK_DUNE_LIBRARIES})
+      get_target_property(_props ${_target} COMPILE_FLAGS)
+      string(REPLACE "_props-NOTFOUND" "" _props "${_props}")
+      set_target_properties(${_target} PROPERTIES COMPILE_FLAGS
+        "${_props} ${UMFPACK_DUNE_COMPILE_FLAGS} -DENABLE_UMFPACK")
+    endforeach(_target ${_targets})
+  endif(UMFPACK_FOUND)
+endfunction(add_dune_umfpack_flags)
\ No newline at end of file
diff --git a/cmake/modules/CMakeLists.txt b/cmake/modules/CMakeLists.txt
index b9bbda94..84d034ce 100644
--- a/cmake/modules/CMakeLists.txt
+++ b/cmake/modules/CMakeLists.txt
@@ -1,7 +1,9 @@
 set(modules
   AddSuperLUFlags.cmake
+  AddUMFPackFlags.cmake
   DuneIstlMacros.cmake
-  FindSuperLU.cmake)
+  FindSuperLU.cmake
+  FindUMFPack.cmake)
 
 install(FILES ${modules} DESTINATION
   ${DUNE_INSTALL_MODULEDIR})
diff --git a/cmake/modules/DuneIstlMacros.cmake b/cmake/modules/DuneIstlMacros.cmake
index d9d87a37..3f4d7594 100644
--- a/cmake/modules/DuneIstlMacros.cmake
+++ b/cmake/modules/DuneIstlMacros.cmake
@@ -3,3 +3,5 @@ find_package(ParMETIS)
 include(AddParMETISFlags)
 find_package(SuperLU)
 include(AddSuperLUFlags)
+find_package(UMFPack)
+include(AddUMFPackFlags)
diff --git a/cmake/modules/FindUMFPack.cmake b/cmake/modules/FindUMFPack.cmake
new file mode 100644
index 00000000..8e60b435
--- /dev/null
+++ b/cmake/modules/FindUMFPack.cmake
@@ -0,0 +1,112 @@
+# Module that checks whether UMFPack is available.
+#
+# Variables used by this module which you may want to set:
+# UMFPACK_ROOT   Path list to search for UMFPack
+#
+# Sets the following variables
+#
+# UMFPACK_FOUND          True if UMFPack was found and usable
+# UMFPACK_INCLUDE_DIRS   Path to the UMFPack include dirs
+# UMFPACK_LIBRARIES      Name of the UMFPack libraries
+#
+
+find_package(BLAS QUIET REQUIRED)
+if(NOT BLAS_FOUND)
+  message(WARNING "UMFPack requires BLAS which was not found, skipping the test.")
+  return()
+endif()
+
+find_library(AMD_LIBRARY
+  NAMES "amd"
+  PATHS ${UMFPACK_ROOT}
+  PATH_SUFFIXES "lib" "lib32" "lib64" "AMD" "AMD/Lib"
+  NO_DEFAULT_PATH
+)
+
+find_library(AMD_LIBRARY
+  NAMES "amd"
+  PATH_SUFFIXES "lib" "lib32" "lib64" "AMD" "AMD/Lib"
+)
+
+if(NOT AMD_LIBRARY)
+  message(WARNING "UMFPack requires AMD which was not found, skipping the test.")
+  return()
+endif()
+
+#look for header files at positions given by the user
+find_path(UMFPACK_INCLUDE_DIR
+  NAMES "umfpack.h"
+  PATHS ${UMFPACK_ROOT}
+  PATH_SUFFIXES "umfpack" "include/umfpack" "suitesparse" "include" "src" "UMFPACK" "UMFPACK/Include"
+  NO_DEFAULT_PATH
+)
+#now also look for default paths
+find_path(UMFPACK_INCLUDE_DIR
+  NAMES "umfpack.h"
+  PATH_SUFFIXES "umfpack" "include/umfpack" "suitesparse" "include" "UMFPACK" "UMFPACK/Include"
+)
+
+#look for library at positions given by the user
+find_library(UMFPACK_LIBRARY
+  NAMES "umfpack"
+  PATHS ${UMFPACK_ROOT}
+  PATH_SUFFIXES "lib" "lib32" "lib64" "UMFPACK" "UMFPACK/Lib"
+  NO_DEFAULT_PATH
+)
+#now  also include the deafult paths
+find_library(UMFPACK_LIBRARY
+  NAMES "umfpack"
+  PATH_SUFFIXES "lib" "lib32" "lib64" "UMFPACK" "UMFPACK/Lib"
+)
+
+if(UMFPACK_INCLUDE_DIR)
+  set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} ${UMFPACK_INCLUDE_DIR} ${AMD_INCLUDE_DIR})
+endif()
+if(UMFPACK_LIBRARY)
+  set(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} ${UMFPACK_LIBRARY})
+endif()
+if(BLAS_LIBRARIES)
+  set(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} ${BLAS_LIBRARIES})
+endif()
+if(AMD_LIBRARY)
+  set(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} ${AMD_LIBRARY})
+endif()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(
+  "UMFPack"
+  DEFAULT_MSG
+  UMFPACK_INCLUDE_DIR
+  UMFPACK_LIBRARY
+)
+
+mark_as_advanced(UMFPACK_INCLUDE_DIR UMFPACK_LIBRARY)
+
+if(UMFPACK_FOUND)
+  set(UMFPACK_INCLUDE_DIRS ${UMFPACK_INCLUDE_DIR})
+  set(UMFPACK_LIBRARIES ${UMFPACK_LIBRARY})
+  file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
+    "Determining location of UMFPack succeded:\n"
+    "Include directory: ${UMFPACK_INCLUDE_DIRS}\n"
+    "Library directory: ${UMFPACK_LIBRARIES}\n\n")
+  set(UMFPACK_DUNE_COMPILE_FLAGS "-I${UMFPACK_INCLUDE_DIRS}"
+    CACHE STRING "Compile Flags used by DUNE when compiling with UMFPack programs")
+  set(UMFPACK_DUNE_LIBRARIES ${UMFPACK_LIBRARIES} ${BLAS_LIBRARIES} ${AMD_LIBRARY}
+    CACHE STRING "LIbraries used by DUNE when linking UMFPack programs")
+else(UMFPACK_FOUND)
+  file(APPEND ${CMAKE_BINARY_DIR}${CMAKES_FILES_DIRECTORY}/CMakeError.log
+    "Determing location of UMFPack failed:\n"
+    "Include directory: ${UMFPACK_INCLUDE_DIRS}\n"
+    "Library directory: ${UMFPACK_LIBRARIES}\n\n")
+endif(UMFPACK_FOUND)
+
+#set HAVE_UMFPACK for config.h
+set(HAVE_UMFPACK UMFPACK_FOUND)
+
+#add all umfpack related flags to ALL_PKG_FLAGS, this must happen regardless of a target using add_dune_umfpack_flags
+if(UMFPACK_FOUND)
+  set_property(GLOBAL APPEND PROPERTY ALL_PKG_FLAGS "${UMFPACK_DUNE_COMPILE_FLAGS}")
+  foreach(dir "${UMFPACK_INCLUDE_DIRS}")
+    set_property(GLOBAL APPEND PROPERTY ALL_PKG_FLAGS "-I${dir}")
+  endforeach()
+endif()
\ No newline at end of file
diff --git a/cmake/modules/Makefile.am b/cmake/modules/Makefile.am
index 1c8e622f..9c3e667f 100644
--- a/cmake/modules/Makefile.am
+++ b/cmake/modules/Makefile.am
@@ -1,5 +1,8 @@
-MODULES = DuneIstlMacros.cmake \
- FindSuperLU.cmake
+MODULES = AddSuperLUFlags.cmake \
+ AddUMFPackFlags.cmake \
+ DuneIstlMacros.cmake \
+ FindSuperLU.cmake \
+ FindUMFPack.cmake
 
 modulesdir= $(datadir)/cmake/modules
 dist_modules_DATA = ${MODULES}
diff --git a/config.h.cmake b/config.h.cmake
index d7894dc2..5c460704 100644
--- a/config.h.cmake
+++ b/config.h.cmake
@@ -42,6 +42,9 @@
 /* Define to ENABLE_SUPERLU if the SuperLU library is available */
 #cmakedefine HAVE_SUPERLU ENABLE_SUPERLU
 
+/* Define to if the UMFPack library is available */
+#cmakedefine HAVE_UMFPACK ENABLE_UMFPACK
+
 /* define to 1 because older versions of SuperLU are no longer supported*/
 #define SUPERLU_POST_2005_VERSION 1
 
diff --git a/dune/istl/CMakeLists.txt b/dune/istl/CMakeLists.txt
index f96b0548..e9db20ef 100644
--- a/dune/istl/CMakeLists.txt
+++ b/dune/istl/CMakeLists.txt
@@ -9,6 +9,7 @@ install(FILES
    bdmatrix.hh
    btdmatrix.hh
    bvector.hh
+   colcompmatrix.hh
    diagonalmatrix.hh
    gsetc.hh
    ilu.hh
@@ -40,5 +41,6 @@ install(FILES
    solvertype.hh
    superlu.hh
    supermatrix.hh
+   umfpack.hh
    vbvector.hh
    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/dune/istl)
diff --git a/dune/istl/Makefile.am b/dune/istl/Makefile.am
index f36255c2..06c2c313 100644
--- a/dune/istl/Makefile.am
+++ b/dune/istl/Makefile.am
@@ -8,6 +8,7 @@ istl_HEADERS = basearray.hh \
 	bdmatrix.hh \
 	btdmatrix.hh \
 	bvector.hh \
+	colcompmatrix.hh \
 	diagonalmatrix.hh \
 	gsetc.hh \
 	ilu.hh \
@@ -39,8 +40,8 @@ istl_HEADERS = basearray.hh \
 	solvertype.hh \
 	superlu.hh \
 	supermatrix.hh \
-	vbvector.hh
-
+	vbvector.hh \
+	umfpack.hh
 
 include $(top_srcdir)/am/global-rules
 
diff --git a/dune/istl/colcompmatrix.hh b/dune/istl/colcompmatrix.hh
new file mode 100644
index 00000000..e4cc3bde
--- /dev/null
+++ b/dune/istl/colcompmatrix.hh
@@ -0,0 +1,597 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_COLCOMPMATRIX_HH
+#define DUNE_COLCOMPMATRIX_HH
+#include "bcrsmatrix.hh"
+#include "bvector.hh"
+#include <dune/common/fmatrix.hh>
+#include <dune/common/fvector.hh>
+#include <dune/common/typetraits.hh>
+#include <limits>
+
+namespace Dune
+{
+  /**
+   * @brief Provides access to an iterator over all matrix rows.
+   *
+   * @tparam M The type of the matrix.
+   */
+  template<class M>
+  class MatrixRowSet
+  {
+  public:
+    //! @brief The type of the matrix.
+    typedef M Matrix;
+    //! @brief The matrix row iterator type.
+    typedef typename Matrix::ConstRowIterator const_iterator;
+
+    /**
+     * @brief Construct an row set over all matrix rows.
+     * @param m The matrix for which we manage the rows.
+     */
+    MatrixRowSet(const Matrix& m)
+      : m_(m)
+    {}
+
+    //! @brief Get the row iterator at the first row.
+    const_iterator begin() const
+    {
+      return m_.begin();
+    }
+    //! @brief Get the row iterator at the end of all rows.
+    const_iterator end() const
+    {
+      return m_.end();
+    }
+  private:
+    const Matrix& m_;
+  };
+
+  /**
+   * @brief Provides access to an iterator over an arbitrary subset
+   * of matrix rows.
+   *
+   * @tparam M The type of the matrix.
+   * @tparam S the type of the set of valid row indices.
+   */
+  template<class M, class S>
+  class MatrixRowSubset
+  {
+  public:
+    /** @brief the type of the matrix class. */
+    typedef M Matrix;
+    /** @brief the type of the set of valid row indices. */
+    typedef S RowIndexSet;
+
+    /**
+     * @brief Construct an row set over all matrix rows.
+     * @param m The matrix for which we manage the rows.
+     * @param s The set of row indices we manage.
+     */
+    MatrixRowSubset(const Matrix& m, const RowIndexSet& s)
+      : m_(m), s_(s)
+    {}
+
+    const Matrix& matrix() const
+    {
+      return m_;
+    }
+
+    const RowIndexSet& rowIndexSet() const
+    {
+      return s_;
+    }
+
+    //! @brief The matrix row iterator type.
+    class const_iterator
+      : public ForwardIteratorFacade<const_iterator, const typename Matrix::row_type>
+    {
+    public:
+      const_iterator(typename Matrix::const_iterator firstRow,
+                     typename RowIndexSet::const_iterator pos)
+        : firstRow_(firstRow), pos_(pos)
+      {}
+
+
+      const typename Matrix::row_type& dereference() const
+      {
+        return *(firstRow_+ *pos_);
+      }
+      bool equals(const const_iterator& o) const
+      {
+        return pos_==o.pos_;
+      }
+      void increment()
+      {
+        ++pos_;
+      }
+      typename RowIndexSet::value_type index() const
+      {
+        return *pos_;
+      }
+
+    private:
+      //! @brief Iterator pointing to the first row of the matrix.
+      typename Matrix::const_iterator firstRow_;
+      //! @brief Iterator pointing to the current row index.
+      typename RowIndexSet::const_iterator pos_;
+    };
+
+    //! @brief Get the row iterator at the first row.
+    const_iterator begin() const
+    {
+      return const_iterator(m_.begin(), s_.begin());
+    }
+    //! @brief Get the row iterator at the end of all rows.
+    const_iterator end() const
+    {
+      return const_iterator(m_.begin(), s_.end());
+    }
+
+  private:
+    //! @brief The matrix for which we manage the row subset.
+    const Matrix& m_;
+    //! @brief The set of row indices we manage.
+    const RowIndexSet& s_;
+  };
+
+  /**
+   * @brief Utility class for converting an ISTL Matrix into a column-compressed matrix
+   * @tparam M the matrix type
+   */
+  template<class M>
+  struct ColCompMatrix
+  {};
+
+  /**
+   * @brief Inititializer for the ColCompMatrix
+   * as needed by OverlappingSchwarz
+   * @tparam M the matrix type
+   */
+  template<class M>
+  struct ColCompMatrixInitializer
+  {};
+
+  template<class M, class X, class TM, class TD, class T1>
+  class SeqOverlappingSchwarz;
+
+  template<class T>
+  struct SeqOverlappingSchwarzAssembler;
+
+  /**
+   * @brief Converter for BCRSMatrix to column-compressed Matrix.
+   * specialization for BCRSMatrix
+   */
+  template<class B, class TA, int n, int m>
+  class ColCompMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >
+  {
+    template<class M, class X, class TM, class TD, class T1>
+    friend class SeqOverlappingSchwarz;
+    friend struct ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<B,n,m>,TA> >;
+
+  public:
+    /** @brief The type of the matrix to convert. */
+    typedef BCRSMatrix<FieldMatrix<B,n,m>,TA> Matrix;
+    friend struct SeqOverlappingSchwarzAssembler<ColCompMatrix<Matrix> >;
+
+    typedef typename Matrix::size_type size_type;
+
+    /**
+     * @brief Constructor that initializes the data.
+     * @param mat The matrix to convert.
+     */
+    explicit ColCompMatrix(const Matrix& mat);
+
+    ColCompMatrix();
+
+    /** @brief Destructor */
+    virtual ~ColCompMatrix();
+
+    /**
+     * @brief Get the number of rows.
+     * @return  The number of rows.
+     */
+    size_type N() const
+    {
+      return N_;
+    }
+
+    size_type nnz() const
+    {
+      return Nnz_/n/m;
+    }
+
+    /**
+     * @brief Get the number of columns.
+     * @return  The number of columns.
+     */
+    size_type M() const
+    {
+      return M_;
+    }
+
+    B* getValues() const
+    {
+      return values;
+    }
+
+    int* getRowIndex() const
+    {
+      return rowindex;
+    }
+
+    int* getColStart() const
+    {
+      return colstart;
+    }
+
+    ColCompMatrix& operator=(const Matrix& mat);
+    ColCompMatrix& operator=(const ColCompMatrix& mat);
+
+    /**
+     * @brief Initialize data from a given set of matrix rows and columns
+     * @tparam The type of the row index set.
+     * @param mat the matrix with the values
+     * @param mrs The set of row (and column) indices to represent
+     */
+    virtual void setMatrix(const Matrix& mat, const std::set<std::size_t>& mrs);
+    /** @brief free allocated space. */
+    virtual void free();
+
+    /** @brief Initialize data from given matrix. */
+    virtual void setMatrix(const Matrix& mat);
+
+  public:
+    int N_, M_, Nnz_;
+    B* values;
+    int* rowindex;
+    int* colstart;
+  };
+
+  template<class T, class A, int n, int m>
+  class ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >
+  {
+    template<class I, class S, class D>
+    friend class OverlappingSchwarzInitializer;
+  public:
+    typedef Dune::BCRSMatrix<FieldMatrix<T,n,m>,A> Matrix;
+    typedef Dune::ColCompMatrix<Matrix> ColCompMatrix;
+    typedef typename Matrix::row_type::const_iterator CIter;
+    typedef typename Matrix::size_type size_type;
+
+    ColCompMatrixInitializer(ColCompMatrix& lum);
+
+    ColCompMatrixInitializer();
+
+    virtual ~ColCompMatrixInitializer();
+
+    template<typename Iter>
+    void addRowNnz(const Iter& row) const;
+
+    template<typename Iter, typename Set>
+    void addRowNnz(const Iter& row, const Set& s) const;
+
+    void allocate();
+
+    template<typename Iter>
+    void countEntries(const Iter& row, const CIter& col) const;
+
+    void countEntries(size_type colidx) const;
+
+    void calcColstart() const;
+
+    template<typename Iter>
+    void copyValue(const Iter& row, const CIter& col) const;
+
+    void copyValue(const CIter& col, size_type rowindex, size_type colidx) const;
+
+    virtual void createMatrix() const;
+
+  protected:
+
+    void allocateMatrixStorage() const;
+
+    void allocateMarker();
+
+    ColCompMatrix* mat;
+    size_type cols;
+    mutable size_type *marker;
+  };
+
+  template<class T, class A, int n, int m>
+  ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::ColCompMatrixInitializer(ColCompMatrix& mat_)
+    : mat(&mat_), cols(mat_.N()), marker(0)
+  {
+    mat->Nnz_=0;
+  }
+
+  template<class T, class A, int n, int m>
+  ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::ColCompMatrixInitializer()
+    : mat(0), cols(0), marker(0)
+  {}
+
+  template<class T, class A, int n, int m>
+  ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::~ColCompMatrixInitializer()
+  {
+    if(marker)
+      delete[] marker;
+  }
+
+  template<class T, class A, int n, int m>
+  template<typename Iter>
+  void ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::addRowNnz(const Iter& row) const
+  {
+    mat->Nnz_+=row->getsize();
+  }
+
+  template<class T, class A, int n, int m>
+  template<typename Iter, typename Map>
+  void ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::addRowNnz(const Iter& row,
+                                                                            const Map& indices) const
+  {
+    typedef typename  Iter::value_type::const_iterator RIter;
+    typedef typename Map::const_iterator MIter;
+    MIter siter =indices.begin();
+    for(RIter entry=row->begin(); entry!=row->end(); ++entry)
+    {
+      for(; siter!=indices.end() && *siter<entry.index(); ++siter) ;
+      if(siter==indices.end())
+        break;
+      if(*siter==entry.index())
+        // index is in subdomain
+        ++mat->Nnz_;
+    }
+  }
+
+  template<class T, class A, int n, int m>
+  void ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::allocate()
+  {
+    allocateMatrixStorage();
+    allocateMarker();
+  }
+
+  template<class T, class A, int n, int m>
+  void ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::allocateMatrixStorage() const
+  {
+    mat->Nnz_*=n*m;
+    // initialize data
+    mat->values=new T[mat->Nnz_];
+    mat->rowindex=new int[mat->Nnz_];
+    mat->colstart=new int[cols+1];
+  }
+
+  template<class T, class A, int n, int m>
+  void ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::allocateMarker()
+  {
+    marker = new typename Matrix::size_type[cols];
+
+    for(size_type i=0; i < cols; ++i)
+      marker[i]=0;
+  }
+
+  template<class T, class A, int n, int m>
+  template<typename Iter>
+  void ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::countEntries(const Iter& row, const CIter& col) const
+  {
+    countEntries(col.index());
+  }
+
+  template<class T, class A, int n, int m>
+  void ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::countEntries(size_type colindex) const
+  {
+    for(size_type i=0; i < m; ++i)
+    {
+      assert(colindex*m+i<cols);
+      marker[colindex*m+i]+=n;
+    }
+  }
+
+  template<class T, class A, int n, int m>
+  void ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::calcColstart() const
+  {
+    mat->colstart[0]=0;
+    for(size_type i=0; i < cols; ++i) {
+      assert(i<cols);
+      mat->colstart[i+1]=mat->colstart[i]+marker[i];
+      marker[i]=mat->colstart[i];
+    }
+  }
+
+  template<class T, class A, int n, int m>
+  template<typename Iter>
+  void ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::copyValue(const Iter& row, const CIter& col) const
+  {
+    copyValue(col, row.index(), col.index());
+  }
+
+  template<class T, class A, int n, int m>
+  void ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::copyValue(const CIter& col, size_type rowindex, size_type colindex) const
+  {
+    for(size_type i=0; i<n; i++) {
+      for(size_type j=0; j<m; j++) {
+        assert(colindex*m+j<cols-1 || (int)marker[colindex*m+j]<mat->colstart[colindex*m+j+1]);
+        assert((int)marker[colindex*m+j]<mat->Nnz_);
+        mat->rowindex[marker[colindex*m+j]]=rowindex*n+i;
+        mat->values[marker[colindex*m+j]]=(*col)[i][j];
+        ++marker[colindex*m+j]; // index for next entry in column
+      }
+    }
+  }
+
+  template<class T, class A, int n, int m>
+  void ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::createMatrix() const
+  {
+    delete[] marker;
+    marker=0;
+  }
+
+  template<class F, class MRS>
+  void copyToColCompMatrix(F& initializer, const MRS& mrs)
+  {
+    typedef typename MRS::const_iterator Iter;
+    typedef typename  std::iterator_traits<Iter>::value_type::const_iterator CIter;
+    for(Iter row=mrs.begin(); row!= mrs.end(); ++row)
+      initializer.addRowNnz(row);
+
+    initializer.allocate();
+
+    for(Iter row=mrs.begin(); row!= mrs.end(); ++row) {
+
+      for(CIter col=row->begin(); col != row->end(); ++col)
+        initializer.countEntries(row, col);
+    }
+
+    initializer.calcColstart();
+
+    for(Iter row=mrs.begin(); row!= mrs.end(); ++row) {
+      for(CIter col=row->begin(); col != row->end(); ++col) {
+        initializer.copyValue(row, col);
+      }
+
+    }
+    initializer.createMatrix();
+  }
+
+  template<class F, class M,class S>
+  void copyToColCompMatrix(F& initializer, const MatrixRowSubset<M,S>& mrs)
+  {
+    typedef MatrixRowSubset<M,S> MRS;
+    typedef typename MRS::RowIndexSet SIS;
+    typedef typename SIS::const_iterator SIter;
+    typedef typename MRS::const_iterator Iter;
+    typedef typename std::iterator_traits<Iter>::value_type row_type;
+    typedef typename row_type::const_iterator CIter;
+
+    // Calculate upper Bound for nonzeros
+    for(Iter row=mrs.begin(); row!= mrs.end(); ++row)
+      initializer.addRowNnz(row, mrs.rowIndexSet());
+
+    initializer.allocate();
+
+    typedef typename MRS::Matrix::size_type size_type;
+
+    // A vector containing the corresponding indices in
+    // the to create submatrix.
+    // If an entry is the maximum of size_type then this index will not appear in
+    // the submatrix.
+    std::vector<size_type> subMatrixIndex(mrs.matrix().N(),
+                                          std::numeric_limits<size_type>::max());
+    size_type s=0;
+    for(SIter index = mrs.rowIndexSet().begin(); index!=mrs.rowIndexSet().end(); ++index)
+      subMatrixIndex[*index]=s++;
+
+    for(Iter row=mrs.begin(); row!= mrs.end(); ++row)
+      for(CIter col=row->begin(); col != row->end(); ++col) {
+        if(subMatrixIndex[col.index()]!=std::numeric_limits<size_type>::max())
+          // This column is in our subset (use submatrix column index)
+          initializer.countEntries(subMatrixIndex[col.index()]);
+      }
+
+    initializer.calcColstart();
+
+    for(Iter row=mrs.begin(); row!= mrs.end(); ++row)
+      for(CIter col=row->begin(); col != row->end(); ++col) {
+        if(subMatrixIndex[col.index()]!=std::numeric_limits<size_type>::max())
+          // This value is in our submatrix -> copy (use submatrix indices
+          initializer.copyValue(col, subMatrixIndex[row.index()], subMatrixIndex[col.index()]);
+      }
+    initializer.createMatrix();
+  }
+
+#ifndef DOXYGEN
+
+  template<class B, class TA, int n, int m>
+  ColCompMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >::ColCompMatrix()
+    : N_(0), M_(0), Nnz_(0), values(0), rowindex(0), colstart(0)
+  {}
+
+  template<class B, class TA, int n, int m>
+  ColCompMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >
+  ::ColCompMatrix(const Matrix& mat)
+    : N_(n*mat.N()), M_(m*mat.M()), Nnz_(n*m*mat.nonzeroes())
+  {}
+
+  template<class B, class TA, int n, int m>
+  ColCompMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >&
+  ColCompMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >::operator=(const Matrix& mat)
+  {
+    if(N_+M_+Nnz_!=0)
+      free();
+    setMatrix(mat);
+    return *this;
+  }
+
+  template<class B, class TA, int n, int m>
+  ColCompMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >&
+  ColCompMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >::operator=(const ColCompMatrix& mat)
+  {
+    if(N_+M_+Nnz_!=0)
+      free();
+    N_=mat.N_;
+    M_=mat.M_;
+    Nnz_= mat.Nnz_;
+    if(M_>0) {
+      colstart=new int[M_+1];
+      for(int i=0; i<=M_; ++i)
+        colstart[i]=mat.colstart[i];
+    }
+
+    if(Nnz_>0) {
+      values = new B[Nnz_];
+      rowindex = new int[Nnz_];
+
+      for(int i=0; i<Nnz_; ++i)
+        values[i]=mat.values[i];
+
+      for(int i=0; i<Nnz_; ++i)
+        rowindex[i]=mat.rowindex[i];
+    }
+    return *this;
+  }
+
+  template<class B, class TA, int n, int m>
+  void ColCompMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >
+  ::setMatrix(const Matrix& mat)
+  {
+    N_=n*mat.N();
+    M_=m*mat.M();
+    ColCompMatrixInitializer<Matrix> initializer(*this);
+
+    copyToColCompMatrix(initializer, MatrixRowSet<Matrix>(mat));
+  }
+
+  template<class B, class TA, int n, int m>
+  void ColCompMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >
+  ::setMatrix(const Matrix& mat, const std::set<std::size_t>& mrs)
+  {
+    if(N_+M_+Nnz_!=0)
+      free();
+    N_=mrs.size()*n;
+    M_=mrs.size()*m;
+    ColCompMatrixInitializer<Matrix> initializer(*this);
+
+    copyToColCompMatrix(initializer, MatrixRowSubset<Matrix,std::set<std::size_t> >(mat,mrs));
+  }
+
+  template<class B, class TA, int n, int m>
+  ColCompMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >::~ColCompMatrix()
+  {
+    if(N_+M_+Nnz_!=0)
+      free();
+  }
+
+  template<class B, class TA, int n, int m>
+  void ColCompMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >::free()
+  {
+    delete[] values;
+    delete[] rowindex;
+    delete[] colstart;
+    N_ = 0;
+    M_ = 0;
+    Nnz_ = 0;
+  }
+
+#endif // DOXYGEN
+
+}
+#endif
diff --git a/dune/istl/overlappingschwarz.hh b/dune/istl/overlappingschwarz.hh
index 25dd795c..4af4e300 100644
--- a/dune/istl/overlappingschwarz.hh
+++ b/dune/istl/overlappingschwarz.hh
@@ -203,13 +203,24 @@ namespace Dune
     DynamicMatrix<K> A;
   };
 
-  template<typename T>
-  struct OverlappingAssigner
+  template<typename T, bool tag>
+  class OverlappingAssignerHelper
   {};
 
+  template<typename T>
+  struct OverlappingAssigner : public OverlappingAssignerHelper<T,Dune::StoresColumnCompressed<T>::value >
+  {
+    public:
+    OverlappingAssigner(std::size_t maxlength, const typename T::matrix_type& mat,
+                        const typename T::range_type& b, typename T::range_type& x)
+      : OverlappingAssignerHelper<T,Dune::StoresColumnCompressed<T>::value >
+      (maxlength,mat,b,x)
+      {}
+  };
+
   // specialization for DynamicMatrix
   template<class K, int n, class Al, class X, class Y>
-  class OverlappingAssigner< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y > >
+  class OverlappingAssignerHelper< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y >,false>
   {
   public:
     typedef BCRSMatrix< FieldMatrix<K,n,n>, Al> matrix_type;
@@ -225,7 +236,7 @@ namespace Dune
      * @param b_ the global right hand side.
      * @param x_ the global left hand side.
      */
-    OverlappingAssigner(std::size_t maxlength, const BCRSMatrix<FieldMatrix<K,n,n>, Al>& mat_, const X& b_, Y& x_);
+    OverlappingAssignerHelper(std::size_t maxlength, const BCRSMatrix<FieldMatrix<K,n,n>, Al>& mat_, const X& b_, Y& x_);
 
     /**
      * @brief Deallocates memory of the local vector.
@@ -298,11 +309,11 @@ namespace Dune
   };
 
 #if HAVE_SUPERLU
-  template<typename T, typename A, int n, int m>
-  struct OverlappingAssigner<SuperLU<BCRSMatrix<FieldMatrix<T,n,m>,A> > >
+  template<template<class> class S, int n, int m, typename T, typename A>
+  struct OverlappingAssignerHelper<S<BCRSMatrix<FieldMatrix<T,n,m>, A> >, true>
   {
-    typedef BCRSMatrix<FieldMatrix<T,n,m>,A> matrix_type;
-    typedef typename SuperLU<BCRSMatrix<FieldMatrix<T,n,m>,A> >::range_type range_type;
+    typedef BCRSMatrix<FieldMatrix<T,n,m>, A> matrix_type;
+    typedef typename S<BCRSMatrix<FieldMatrix<T,n,m>, A> >::range_type range_type;
     typedef typename range_type::field_type field_type;
     typedef typename range_type::block_type block_type;
 
@@ -315,7 +326,7 @@ namespace Dune
      * @param b the global right hand side.
      * @param x the global left hand side.
      */
-    OverlappingAssigner(std::size_t maxlength, const BCRSMatrix<FieldMatrix<T,n,m>,A>& mat,
+    OverlappingAssignerHelper(std::size_t maxlength, const matrix_type& mat,
                         const range_type& b, range_type& x);
     /**
      * @brief Deallocates memory of the local vector.
@@ -467,7 +478,7 @@ namespace Dune
 
   // specialization for ILU0
   template<class M, class X, class Y>
-  class OverlappingAssigner<ILU0SubdomainSolver<M,X,Y> >
+  class OverlappingAssignerHelper<ILU0SubdomainSolver<M,X,Y>, false>
     : public OverlappingAssignerILUBase<M,X,Y>
   {
   public:
@@ -478,7 +489,7 @@ namespace Dune
      * @param b the global right hand side.
      * @param x the global left hand side.
      */
-    OverlappingAssigner(std::size_t maxlength, const M& mat,
+    OverlappingAssignerHelper(std::size_t maxlength, const M& mat,
                         const Y& b, X& x)
       : OverlappingAssignerILUBase<M,X,Y>(maxlength, mat,b,x)
     {}
@@ -486,7 +497,7 @@ namespace Dune
 
   // specialization for ILUN
   template<class M, class X, class Y>
-  class OverlappingAssigner<ILUNSubdomainSolver<M,X,Y> >
+  class OverlappingAssignerHelper<ILUNSubdomainSolver<M,X,Y>,false>
     : public OverlappingAssignerILUBase<M,X,Y>
   {
   public:
@@ -497,7 +508,7 @@ namespace Dune
      * @param b the global right hand side.
      * @param x the global left hand side.
      */
-    OverlappingAssigner(std::size_t maxlength, const M& mat,
+    OverlappingAssignerHelper(std::size_t maxlength, const M& mat,
                         const Y& b, X& x)
       : OverlappingAssignerILUBase<M,X,Y>(maxlength, mat,b,x)
     {}
@@ -673,13 +684,18 @@ namespace Dune
     }
   };
 
+  template<class T, bool tag>
+  struct SeqOverlappingSchwarzAssemblerHelper
+  {};
+
   template<class T>
   struct SeqOverlappingSchwarzAssembler
+    : public SeqOverlappingSchwarzAssemblerHelper<T,Dune::StoresColumnCompressed<T>::value>
   {};
 
 
   template<class K, int n, class Al, class X, class Y>
-  struct SeqOverlappingSchwarzAssembler< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y > >
+  struct SeqOverlappingSchwarzAssemblerHelper< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y >,false>
   {
     typedef BCRSMatrix< FieldMatrix<K,n,n>, Al> matrix_type;
     template<class RowToDomain, class Solvers, class SubDomains>
@@ -688,17 +704,15 @@ namespace Dune
                                              bool onTheFly);
   };
 
-#if HAVE_SUPERLU
-  template<class T>
-  struct SeqOverlappingSchwarzAssembler<SuperLU<T> >
+  template<template<class> class S, typename T, typename A, int m, int n>
+  struct SeqOverlappingSchwarzAssemblerHelper<S<BCRSMatrix<FieldMatrix<T,m,n>,A> >,true>
   {
-    typedef T matrix_type;
+    typedef BCRSMatrix<FieldMatrix<T,m,n>,A> matrix_type;
     template<class RowToDomain, class Solvers, class SubDomains>
     static std::size_t assembleLocalProblems(const RowToDomain& rowToDomain, const matrix_type& mat,
                                              Solvers& solvers, const SubDomains& domains,
                                              bool onTheFly);
   };
-#endif
 
   template<class M,class X, class Y>
   struct SeqOverlappingSchwarzAssemblerILUBase
@@ -711,12 +725,12 @@ namespace Dune
   };
 
   template<class M,class X, class Y>
-  struct SeqOverlappingSchwarzAssembler<ILU0SubdomainSolver<M,X,Y> >
+  struct SeqOverlappingSchwarzAssemblerHelper<ILU0SubdomainSolver<M,X,Y>,false>
     : public SeqOverlappingSchwarzAssemblerILUBase<M,X,Y>
   {};
 
   template<class M,class X, class Y>
-  struct SeqOverlappingSchwarzAssembler<ILUNSubdomainSolver<M,X,Y> >
+  struct SeqOverlappingSchwarzAssemblerHelper<ILUNSubdomainSolver<M,X,Y>,false>
     : public SeqOverlappingSchwarzAssemblerILUBase<M,X,Y>
   {};
 
@@ -1098,7 +1112,7 @@ namespace Dune
   template<class K, int n, class Al, class X, class Y>
   template<class RowToDomain, class Solvers, class SubDomains>
   std::size_t
-  SeqOverlappingSchwarzAssembler< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y > >::
+  SeqOverlappingSchwarzAssemblerHelper< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y >,false>::
   assembleLocalProblems(const RowToDomain& rowToDomain,
                         const matrix_type& mat,
                         Solvers& solvers,
@@ -1118,15 +1132,16 @@ namespace Dune
   }
 
 #if HAVE_SUPERLU
-  template<class T>
+  template<template<class> class S, typename T, typename A, int m, int n>
   template<class RowToDomain, class Solvers, class SubDomains>
-  std::size_t SeqOverlappingSchwarzAssembler<SuperLU<T> >::assembleLocalProblems(const RowToDomain& rowToDomain,
+  std::size_t SeqOverlappingSchwarzAssemblerHelper<S<BCRSMatrix<FieldMatrix<T,m,n>,A> >,true>::assembleLocalProblems(const RowToDomain& rowToDomain,
                                                                                  const matrix_type& mat,
                                                                                  Solvers& solvers,
                                                                                  const SubDomains& subDomains,
                                                                                  bool onTheFly)
   {
-    typedef typename std::vector<SuperMatrixInitializer<matrix_type> >::iterator InitializerIterator;
+    typedef typename S<BCRSMatrix<FieldMatrix<T,m,n>,A> >::MatrixInitializer MatrixInitializer;
+    typedef typename std::vector<MatrixInitializer>::iterator InitializerIterator;
     typedef typename SubDomains::const_iterator DomainIterator;
     typedef typename Solvers::iterator SolverIterator;
     std::size_t maxlength = 0;
@@ -1140,7 +1155,7 @@ namespace Dune
       DomainIterator domain=subDomains.begin();
 
       // Create the initializers list.
-      std::vector<SuperMatrixInitializer<matrix_type> > initializers(subDomains.size());
+      std::vector<MatrixInitializer> initializers(subDomains.size());
 
       SolverIterator solver=solvers.begin();
       for(InitializerIterator initializer=initializers.begin(); initializer!=initializers.end();
@@ -1148,27 +1163,21 @@ namespace Dune
         solver->mat.N_=SeqOverlappingSchwarzDomainSize<matrix_type>::size(*domain);
         solver->mat.M_=SeqOverlappingSchwarzDomainSize<matrix_type>::size(*domain);
         //solver->setVerbosity(true);
-        *initializer=SuperMatrixInitializer<matrix_type>(solver->mat);
+        *initializer=MatrixInitializer(solver->mat);
       }
 
       // Set up the supermatrices according to the subdomains
-      typedef OverlappingSchwarzInitializer<std::vector<SuperMatrixInitializer<matrix_type> >,
+      typedef OverlappingSchwarzInitializer<std::vector<MatrixInitializer>,
           RowToDomain, SubDomains> Initializer;
 
       Initializer initializer(initializers, rowToDomain, subDomains);
-      copyToSuperMatrix(initializer, mat);
-      if(solvers.size()==1)
-        assert(solvers[0].mat==mat);
-
-      /*    for(SolverIterator solver=solvers.begin(); solver!=solvers.end(); ++solver)
-            dPrint_CompCol_Matrix("superlu", &static_cast<SuperMatrix&>(solver->mat)); */
+      copyToColCompMatrix(initializer, mat);
 
       // Calculate the LU decompositions
-      std::for_each(solvers.begin(), solvers.end(), std::mem_fun_ref(&SuperLU<matrix_type>::decompose));
+      std::for_each(solvers.begin(), solvers.end(), std::mem_fun_ref(&S<BCRSMatrix<FieldMatrix<T,m,n>,A> >::decompose));
       for(SolverIterator solver=solvers.begin(); solver!=solvers.end(); ++solver) {
         assert(solver->mat.N()==solver->mat.M());
         maxlength=std::max(maxlength, solver->mat.N());
-        //writeCompColMatrixToMatlab(static_cast<SuperLUMatrix<M>&>(solver->mat), std::cout);
       }
     }
     return maxlength;
@@ -1263,8 +1272,8 @@ namespace Dune
   }
 
   template<class K, int n, class Al, class X, class Y>
-  OverlappingAssigner< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y > >
-  ::OverlappingAssigner(std::size_t maxlength, const BCRSMatrix<FieldMatrix<K,n,n>, Al>& mat_,
+  OverlappingAssignerHelper< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y >,false>
+  ::OverlappingAssignerHelper(std::size_t maxlength, const BCRSMatrix<FieldMatrix<K,n,n>, Al>& mat_,
                         const X& b_, Y& x_) :
     mat(&mat_),
     rhs_( new DynamicVector<field_type>(maxlength, 42) ),
@@ -1277,7 +1286,7 @@ namespace Dune
 
   template<class K, int n, class Al, class X, class Y>
   void
-  OverlappingAssigner< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y > >
+  OverlappingAssignerHelper< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y >,false>
   ::deallocate()
   {
     delete rhs_;
@@ -1286,7 +1295,7 @@ namespace Dune
 
   template<class K, int n, class Al, class X, class Y>
   void
-  OverlappingAssigner< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y > >
+  OverlappingAssignerHelper< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y >,false>
   ::resetIndexForNextDomain()
   {
     i=0;
@@ -1294,7 +1303,7 @@ namespace Dune
 
   template<class K, int n, class Al, class X, class Y>
   DynamicVector<K> &
-  OverlappingAssigner< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y > >
+  OverlappingAssignerHelper< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y >,false>
   ::lhs()
   {
     return *lhs_;
@@ -1302,7 +1311,7 @@ namespace Dune
 
   template<class K, int n, class Al, class X, class Y>
   DynamicVector<K> &
-  OverlappingAssigner< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y > >
+  OverlappingAssignerHelper< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y >,false>
   ::rhs()
   {
     return *rhs_;
@@ -1310,7 +1319,7 @@ namespace Dune
 
   template<class K, int n, class Al, class X, class Y>
   void
-  OverlappingAssigner< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y > >
+  OverlappingAssignerHelper< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y >,false>
   ::relaxResult(field_type relax)
   {
     lhs() *= relax;
@@ -1318,7 +1327,7 @@ namespace Dune
 
   template<class K, int n, class Al, class X, class Y>
   void
-  OverlappingAssigner< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y > >
+  OverlappingAssignerHelper< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y >,false>
   ::operator()(const size_type& domainIndex)
   {
     lhs() = 0.0;
@@ -1363,7 +1372,7 @@ namespace Dune
 
   template<class K, int n, class Al, class X, class Y>
   void
-  OverlappingAssigner< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y > >
+  OverlappingAssignerHelper< DynamicMatrixSubdomainSolver< BCRSMatrix< FieldMatrix<K,n,n>, Al>, X, Y >,false>
   ::assignResult(block_type& res)
   {
     // assign the result of the local solve to the global vector
@@ -1375,9 +1384,9 @@ namespace Dune
 
 #if HAVE_SUPERLU
 
-  template<typename T, typename A, int n, int m>
-  OverlappingAssigner<SuperLU<BCRSMatrix<FieldMatrix<T,n,m>,A> > >
-  ::OverlappingAssigner(std::size_t maxlength,
+  template<template<class> class S, int n, int m, typename T, typename A>
+  OverlappingAssignerHelper<S<BCRSMatrix<FieldMatrix<T,n,m>,A> >,true>
+  ::OverlappingAssignerHelper(std::size_t maxlength,
                         const BCRSMatrix<FieldMatrix<T,n,m>,A>& mat_,
                         const range_type& b_,
                         range_type& x_)
@@ -1390,15 +1399,15 @@ namespace Dune
 
   }
 
-  template<typename T, typename A, int n, int m>
-  void OverlappingAssigner<SuperLU<BCRSMatrix<FieldMatrix<T,n,m>,A> > >::deallocate()
+  template<template<class> class S, int n, int m, typename T, typename A>
+  void OverlappingAssignerHelper<S<BCRSMatrix<FieldMatrix<T,n,m>,A> >,true>::deallocate()
   {
     delete[] rhs_;
     delete[] lhs_;
   }
 
-  template<typename T, typename A, int n, int m>
-  void OverlappingAssigner<SuperLU<BCRSMatrix<FieldMatrix<T,n,m>,A> > >::operator()(const size_type& domainIndex)
+  template<template<class> class S, int n, int m, typename T, typename A>
+  void OverlappingAssignerHelper<S<BCRSMatrix<FieldMatrix<T,n,m>,A> >,true>::operator()(const size_type& domainIndex)
   {
     //assign right hand side of current domainindex block
     // rhs is an array of doubles!
@@ -1425,8 +1434,9 @@ namespace Dune
     }
 
   }
-  template<typename T, typename A, int n, int m>
-  void OverlappingAssigner<SuperLU<BCRSMatrix<FieldMatrix<T,n,m>,A> > >::relaxResult(field_type relax)
+
+  template<template<class> class S, int n, int m, typename T, typename A>
+  void OverlappingAssignerHelper<S<BCRSMatrix<FieldMatrix<T,n,m>,A> >,true>::relaxResult(field_type relax)
   {
     for(size_type j=i+n; i<j; ++i) {
       assert(i<maxlength_);
@@ -1435,8 +1445,8 @@ namespace Dune
     i-=n;
   }
 
-  template<typename T, typename A, int n, int m>
-  void OverlappingAssigner<SuperLU<BCRSMatrix<FieldMatrix<T,n,m>,A> > >::assignResult(block_type& res)
+  template<template<class> class S, int n, int m, typename T, typename A>
+  void OverlappingAssignerHelper<S<BCRSMatrix<FieldMatrix<T,n,m>,A> >,true>::assignResult(block_type& res)
   {
     // assign the result of the local solve to the global vector
     for(size_type j=0; j<n; ++j, ++i) {
@@ -1445,22 +1455,22 @@ namespace Dune
     }
   }
 
-  template<typename T, typename A, int n, int m>
-  void OverlappingAssigner<SuperLU<BCRSMatrix<FieldMatrix<T,n,m>,A> > >::resetIndexForNextDomain()
+  template<template<class> class S, int n, int m, typename T, typename A>
+  void OverlappingAssignerHelper<S<BCRSMatrix<FieldMatrix<T,n,m>,A> >,true>::resetIndexForNextDomain()
   {
     i=0;
   }
 
-  template<typename T, typename A, int n, int m>
-  typename OverlappingAssigner<SuperLU<BCRSMatrix<FieldMatrix<T,n,m>,A> > >::field_type*
-  OverlappingAssigner<SuperLU<BCRSMatrix<FieldMatrix<T,n,m>,A> > >::lhs()
+  template<template<class> class S, int n, int m, typename T, typename A>
+  typename OverlappingAssignerHelper<S<BCRSMatrix<FieldMatrix<T,n,m>,A> >,true>::field_type*
+  OverlappingAssignerHelper<S<BCRSMatrix<FieldMatrix<T,n,m>,A> >,true>::lhs()
   {
     return lhs_;
   }
 
-  template<typename T, typename A, int n, int m>
-  typename OverlappingAssigner<SuperLU<BCRSMatrix<FieldMatrix<T,n,m>,A> > >::field_type*
-  OverlappingAssigner<SuperLU<BCRSMatrix<FieldMatrix<T,n,m>,A> > >::rhs()
+  template<template<class> class S, int n, int m, typename T, typename A>
+  typename OverlappingAssignerHelper<S<BCRSMatrix<FieldMatrix<T,n,m>,A> >,true>::field_type*
+  OverlappingAssignerHelper<S<BCRSMatrix<FieldMatrix<T,n,m>,A> >,true>::rhs()
   {
     return rhs_;
   }
diff --git a/dune/istl/solvertype.hh b/dune/istl/solvertype.hh
index 0a3f2409..12f174f0 100644
--- a/dune/istl/solvertype.hh
+++ b/dune/istl/solvertype.hh
@@ -22,5 +22,17 @@ namespace Dune
       value =false
     };
   };
+
+  template<typename Solver>
+  struct StoresColumnCompressed
+  {
+    enum
+    {
+      /**
+       * @brief whether the solver internally uses column compressed storage
+       */
+      value = false
+    };
+  };
 } // end namespace Dune
 #endif
diff --git a/dune/istl/superlu.hh b/dune/istl/superlu.hh
index 4bc725ca..efecbcf4 100644
--- a/dune/istl/superlu.hh
+++ b/dune/istl/superlu.hh
@@ -83,8 +83,8 @@ namespace Dune
   template<class M, class T, class TM, class TD, class TA>
   class SeqOverlappingSchwarz;
 
-  template<class T>
-  struct SeqOverlappingSchwarzAssembler;
+  template<class T, bool tag>
+  struct SeqOverlappingSchwarzAssemblerHelper;
 
   template<class T>
   struct SuperLUSolveChooser
@@ -285,10 +285,13 @@ namespace Dune
               typename A::template rebind<FieldVector<T,n> >::other> >
   {
   public:
-    /* @brief The matrix type. */
+    /** @brief The matrix type. */
     typedef Dune::BCRSMatrix<FieldMatrix<T,n,m>,A> Matrix;
-    /* @brief The corresponding SuperLU Matrix type.*/
+    typedef Dune::BCRSMatrix<FieldMatrix<T,n,m>,A> matrix_type;
+    /** @brief The corresponding SuperLU Matrix type.*/
     typedef Dune::SuperLUMatrix<Matrix> SuperLUMatrix;
+    /** @brief Type of an associated initializer class. */
+    typedef SuperMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> > MatrixInitializer;
     /** @brief The type of the domain of the solver. */
     typedef Dune::BlockVector<
         FieldVector<T,m>,
@@ -363,7 +366,7 @@ namespace Dune
     friend class std::mem_fun_ref_t<void,SuperLU>;
     template<class M,class X, class TM, class TD, class T1>
     friend class SeqOverlappingSchwarz;
-    friend struct SeqOverlappingSchwarzAssembler<SuperLU<Matrix> >;
+    friend struct SeqOverlappingSchwarzAssemblerHelper<SuperLU<Matrix>,true>;
 
     /** @brief computes the LU Decomposition */
     void decompose();
@@ -708,6 +711,12 @@ namespace Dune
   {
     enum { value=true};
   };
+
+  template<typename T, typename A, int n, int m>
+  struct StoresColumnCompressed<SuperLU<BCRSMatrix<FieldMatrix<T,n,m>,A> > >
+  {
+    enum { value = true };
+  };
 }
 
 #endif // HAVE_SUPERLU
diff --git a/dune/istl/supermatrix.hh b/dune/istl/supermatrix.hh
index 600bd083..fbe43757 100644
--- a/dune/istl/supermatrix.hh
+++ b/dune/istl/supermatrix.hh
@@ -52,6 +52,8 @@
 #include <dune/common/typetraits.hh>
 #include <limits>
 
+#include"colcompmatrix.hh"
+
 namespace Dune
 {
 
@@ -155,131 +157,6 @@ namespace Dune
   };
 #endif
 
-  /**
-   * @brief Provides access to an iterator over all matrix rows.
-   *
-   * @tparam M The type of the matrix.
-   */
-  template<class M>
-  class MatrixRowSet
-  {
-  public:
-    // @brief The type of the matrix.
-    typedef M Matrix;
-    // @brief The matrix row iterator type.
-    typedef typename Matrix::ConstRowIterator const_iterator;
-
-    /**
-     * @brief Construct an row set over all matrix rows.
-     * @param m The matrix for which we manage the rows.
-     */
-    MatrixRowSet(const Matrix& m)
-      : m_(m)
-    {}
-
-    // @brief Get the row iterator at the first row.
-    const_iterator begin() const
-    {
-      return m_.begin();
-    }
-    //@brief Get the row iterator at the end of all rows.
-    const_iterator end() const
-    {
-      return m_.end();
-    }
-  private:
-    const Matrix& m_;
-  };
-
-  /**
-   * @brief Provides access to an iterator over an arbitrary subset
-   * of matrix rows.
-   *
-   * @tparam M The type of the matrix.
-   * @tparam S the type of the set of valid row indices.
-   */
-  template<class M, class S>
-  class MatrixRowSubset
-  {
-  public:
-    /* @brief the type of the matrix class. */
-    typedef M Matrix;
-    /* @brief the type of the set of valid row indices. */
-    typedef S RowIndexSet;
-
-    /**
-     * @brief Construct an row set over all matrix rows.
-     * @param m The matrix for which we manage the rows.
-     + @param s The set of row indices we manage.
-     */
-    MatrixRowSubset(const Matrix& m, const RowIndexSet& s)
-      : m_(m), s_(s)
-    {}
-
-    const Matrix& matrix() const
-    {
-      return m_;
-    }
-
-    const RowIndexSet& rowIndexSet() const
-    {
-      return s_;
-    }
-
-    // @brief The matrix row iterator type.
-    class const_iterator
-      : public ForwardIteratorFacade<const_iterator, const typename Matrix::row_type>
-    {
-    public:
-      const_iterator(typename Matrix::const_iterator firstRow,
-                     typename RowIndexSet::const_iterator pos)
-        : firstRow_(firstRow), pos_(pos)
-      {}
-
-
-      const typename Matrix::row_type& dereference() const
-      {
-        return *(firstRow_+ *pos_);
-      }
-      bool equals(const const_iterator& o) const
-      {
-        return pos_==o.pos_;
-      }
-      void increment()
-      {
-        ++pos_;
-      }
-      typename RowIndexSet::value_type index() const
-      {
-        return *pos_;
-      }
-
-    private:
-      // @brief Iterator pointing to the first row of the matrix.
-      typename Matrix::const_iterator firstRow_;
-      // @brief Iterator pointing to the current row index.
-      typename RowIndexSet::const_iterator pos_;
-    };
-
-    // @brief Get the row iterator at the first row.
-    const_iterator begin() const
-    {
-      return const_iterator(m_.begin(), s_.begin());
-    }
-    //@brief Get the row iterator at the end of all rows.
-    const_iterator end() const
-    {
-      return const_iterator(m_.begin(), s_.end());
-    }
-
-  private:
-    // @brief The matrix for which we manage the row subset.
-    const Matrix& m_;
-    // @brief The set of row indices we manage.
-    const RowIndexSet& s_;
-
-  };
-
   template<class T>
   struct BaseGetSuperLUType
   {
@@ -337,13 +214,6 @@ namespace Dune
   struct SuperMatrixInitializer
   {};
 
-  template<class M, class X, class TM, class TD, class T1>
-  class SeqOverlappingSchwarz;
-
-
-  template<class T>
-  struct SeqOverlappingSchwarzAssembler;
-
   template<class T>
   class SuperLU;
 
@@ -352,11 +222,11 @@ namespace Dune
    */
   template<class B, class TA, int n, int m>
   class SuperLUMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >
+    : public ColCompMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >
   {
     template<class M, class X, class TM, class TD, class T1>
     friend class SeqOverlappingSchwarz;
     friend struct SuperMatrixInitializer<BCRSMatrix<FieldMatrix<B,n,m>,TA> >;
-
   public:
     /** @brief The type of the matrix to convert. */
     typedef BCRSMatrix<FieldMatrix<B,n,m>,TA> Matrix;
@@ -369,12 +239,18 @@ namespace Dune
      * @brief Constructor that initializes the data.
      * @param mat The matrix to convert.
      */
-    SuperLUMatrix(const Matrix& mat);
+    explicit SuperLUMatrix(const Matrix& mat) : ColCompMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >(mat)
+    {}
 
-    SuperLUMatrix();
+    SuperLUMatrix() : ColCompMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >()
+    {}
 
     /** @brief Destructor */
-    ~SuperLUMatrix();
+    virtual ~SuperLUMatrix()
+    {
+      if (this->N_+this->M_*this->Nnz_ != 0)
+        free();
+    }
 
     /** @brief Cast to a SuperLU Matrix */
     operator SuperMatrix&()
@@ -387,491 +263,92 @@ namespace Dune
     {
       return A;
     }
-    bool operator==(const Matrix& mat) const;
 
-    /**
-     * @brief Get the number of rows.
-     * @return  The number of rows.
-     */
-    size_type N() const
+    SuperLUMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >& operator=(const BCRSMatrix<FieldMatrix<B,n,m>,TA>& mat)
     {
-      return N_;
+      this->ColCompMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >::operator=(mat);
+      SuperMatrixCreateSparseChooser<B>
+           ::create(&A, this->N_, this->M_, this->colstart[this->N_],
+             this->values,this->rowindex, this->colstart, SLU_NC,
+             static_cast<Dtype_t>(GetSuperLUType<B>::type), SLU_GE);
+      return *this;
     }
 
-    size_type nnz() const
+    SuperLUMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >& operator=(const SuperLUMatrix <BCRSMatrix<FieldMatrix<B,n,m>,TA> >& mat)
     {
-      return Nnz_/n/m;
+      this->ColCompMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >::operator=(mat);
+      SuperMatrixCreateSparseChooser<B>
+           ::create(&A, this->N_, this->M_, this->colstart[this->N_],
+             this->values,this->rowindex, this->colstart, SLU_NC,
+             static_cast<Dtype_t>(GetSuperLUType<B>::type), SLU_GE);
+      return *this;
     }
 
     /**
-     * @brief Get the number of columns.
-     * @return  The number of columns.
+     * @brief Initialize data from a given set of matrix rows and columns
+     * @tparam The type of the row index set.
+     * @param mat the matrix with the values
+     * @param mrs The set of row (and column) indices to represent
      */
-    size_type M() const
+    virtual void setMatrix(const Matrix& mat, const std::set<std::size_t>& mrs)
     {
-      return M_;
+      if(this->N_+this->M_+this->Nnz_!=0)
+        free();
+      this->N_=mrs.size()*n;
+      this->M_=mrs.size()*m;
+      SuperMatrixInitializer<Matrix> initializer(*this);
+
+      copyToColCompMatrix(initializer, MatrixRowSubset<Matrix,std::set<std::size_t> >(mat,mrs));
     }
 
-    SuperLUMatrix& operator=(const Matrix& mat);
+    /** @brief Initialize data from given matrix. */
+    virtual void setMatrix(const Matrix& mat)
+    {
+      this->N_=n*mat.N();
+      this->M_=m*mat.M();
+      SuperMatrixInitializer<Matrix> initializer(*this);
 
-    SuperLUMatrix& operator=(const SuperLUMatrix& mat);
+      copyToColCompMatrix(initializer, MatrixRowSet<Matrix>(mat));
+    }
 
-    /**
-     * @brief Initialize data from a given set of matrix rows and columns
-     * @tparam The type of the row index set.
-     * @param mat the matrix with the values
-     * @param mrs The set of row (and column) indices to represent
-     */
-    template<class S>
-    void setMatrix(const Matrix& mat, const S& mrs);
     /** @brief free allocated space. */
-    void free();
+    virtual void free()
+    {
+      ColCompMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >::free();
+      SUPERLU_FREE(A.Store);
+    }
   private:
-    /** @brief Initialize data from given matrix. */
-    void setMatrix(const Matrix& mat);
-
-    int N_, M_, Nnz_;
-    B* values;
-    int* rowindex;
-    int* colstart;
     SuperMatrix A;
   };
 
-  template<class T, class A, int n, int m>
-  void writeCompColMatrixToMatlab(const SuperLUMatrix<BCRSMatrix<FieldMatrix<T,n,m>,A> >& mat,
-                                  std::ostream& os)
-  {
-    const SuperMatrix a=static_cast<const SuperMatrix&>(mat);
-    NCformat *astore = (NCformat *) a.Store;
-    double* dp = (double*)astore->nzval;
-
-    // remember old flags
-    std::ios_base::fmtflags oldflags = os.flags();
-    // set the output format
-    //os.setf(std::ios_base::scientific, std::ios_base::floatfield);
-    int oldprec = os.precision();
-    //os.precision(10);
-    //dPrint_CompCol_Matrix("A", const_cast<SuperMatrix*>(&a));
-
-    os <<"[";
-    for(int row=0; row<a.nrow; ++row) {
-      for(int col= 0; col < a.ncol; ++col) {
-        // linear search for col
-        int i;
-        for(i=astore->colptr[col]; i < astore->colptr[col+1]; ++i)
-          if(astore->rowind[i]==row) {
-            os<<dp[i]<<" ";
-            break;
-          }
-        if(i==astore->colptr[col+1])
-          // entry not present
-          os<<0<<" ";
-      }
-      if(row==a.nrow-1)
-        os<<"]";
-      os<<std::endl;
-    }
-    // reset the output format
-    os.flags(oldflags);
-    os.precision(oldprec);
-  }
-
-
   template<class T, class A, int n, int m>
   class SuperMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >
+    : public ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >
   {
     template<class I, class S, class D>
     friend class OverlappingSchwarzInitializer;
   public:
-    typedef Dune::BCRSMatrix<FieldMatrix<T,n,m>,A> Matrix;
+    typedef BCRSMatrix<FieldMatrix<T,n,m>,A> Matrix;
     typedef Dune::SuperLUMatrix<Matrix> SuperLUMatrix;
-    typedef typename Matrix::row_type::const_iterator CIter;
-    typedef typename Matrix::size_type size_type;
-
-    SuperMatrixInitializer(SuperLUMatrix& lum);
-
-    SuperMatrixInitializer();
-
-    ~SuperMatrixInitializer();
-
-    template<typename Iter>
-    void addRowNnz(const Iter& row) const;
-
-    template<typename Iter, typename Set>
-    void addRowNnz(const Iter& row, const Set& s) const;
-
-    void allocate();
-
-    template<typename Iter>
-    void countEntries(const Iter& row, const CIter& col) const;
-
-    void countEntries(size_type colidx) const;
-
-    void calcColstart() const;
-
-    template<typename Iter>
-    void copyValue(const Iter& row, const CIter& col) const;
-
-    void copyValue(const CIter& col, size_type rowindex, size_type colidx) const;
-
-    void createMatrix() const;
-
-  private:
-
-    void allocateMatrixStorage() const;
-
-    void allocateMarker();
-
-    SuperLUMatrix* mat;
-    size_type cols;
-    mutable size_type *marker;
-  };
-
-  template<class T, class A, int n, int m>
-  SuperMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::SuperMatrixInitializer(SuperLUMatrix& mat_)
-    : mat(&mat_), cols(mat_.N()), marker(0)
-  {
-    mat->Nnz_=0;
-  }
 
-  template<class T, class A, int n, int m>
-  SuperMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::SuperMatrixInitializer()
-    : mat(0), cols(0), marker(0)
-  {}
-
-  template<class T, class A, int n, int m>
-  SuperMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::~SuperMatrixInitializer()
-  {
-    if(marker)
-      delete[] marker;
-  }
+    SuperMatrixInitializer(SuperLUMatrix& lum) : ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >(lum)
+      ,slumat(&lum)
+    {}
 
-  template<class T, class A, int n, int m>
-  template<typename Iter>
-  void SuperMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::addRowNnz(const Iter& row) const
-  {
-    mat->Nnz_+=row->getsize();
-  }
+    SuperMatrixInitializer() : ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >()
+    {}
 
-  template<class T, class A, int n, int m>
-  template<typename Iter, typename Map>
-  void SuperMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::addRowNnz(const Iter& row,
-                                                                            const Map& indices) const
-  {
-    typedef typename  Iter::value_type::const_iterator RIter;
-    typedef typename Map::const_iterator MIter;
-    MIter siter =indices.begin();
-    for(RIter entry=row->begin(); entry!=row->end(); ++entry)
+    virtual void createMatrix() const
     {
-      for(; siter!=indices.end() && *siter<entry.index(); ++siter) ;
-      if(siter==indices.end())
-        break;
-      if(*siter==entry.index())
-        // index is in subdomain
-        ++mat->Nnz_;
-    }
-  }
-
-  template<class T, class A, int n, int m>
-  void SuperMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::allocate()
-  {
-    allocateMatrixStorage();
-    allocateMarker();
-  }
-
-  template<class T, class A, int n, int m>
-  void SuperMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::allocateMatrixStorage() const
-  {
-    mat->Nnz_*=n*m;
-    // initialize data
-    mat->values=new T[mat->Nnz_];
-    mat->rowindex=new int[mat->Nnz_];
-    mat->colstart=new int[cols+1];
-  }
-
-  template<class T, class A, int n, int m>
-  void SuperMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::allocateMarker()
-  {
-    marker = new typename Matrix::size_type[cols];
-
-    for(size_type i=0; i < cols; ++i)
-      marker[i]=0;
-  }
-
-  template<class T, class A, int n, int m>
-  template<typename Iter>
-  void SuperMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::countEntries(const Iter& row, const CIter& col) const
-  {
-    countEntries(col.index());
-
-  }
-
-  template<class T, class A, int n, int m>
-  void SuperMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::countEntries(size_type colindex) const
-  {
-    for(size_type i=0; i < m; ++i) {
-      assert(colindex*m+i<cols);
-      marker[colindex*m+i]+=n;
-    }
-
-  }
-
-  template<class T, class A, int n, int m>
-  void SuperMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::calcColstart() const
-  {
-    mat->colstart[0]=0;
-    for(size_type i=0; i < cols; ++i) {
-      assert(i<cols);
-      mat->colstart[i+1]=mat->colstart[i]+marker[i];
-      marker[i]=mat->colstart[i];
-    }
-  }
-
-  template<class T, class A, int n, int m>
-  template<typename Iter>
-  void SuperMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::copyValue(const Iter& row, const CIter& col) const
-  {
-    copyValue(col, row.index(), col.index());
-  }
-
-  template<class T, class A, int n, int m>
-  void SuperMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::copyValue(const CIter& col, size_type rowindex, size_type colindex) const
-  {
-    for(size_type i=0; i<n; i++) {
-      for(size_type j=0; j<m; j++) {
-        assert(colindex*m+j<cols-1 || (int)marker[colindex*m+j]<mat->colstart[colindex*m+j+1]);
-        assert((int)marker[colindex*m+j]<mat->Nnz_);
-        mat->rowindex[marker[colindex*m+j]]=rowindex*n+i;
-        mat->values[marker[colindex*m+j]]=(*col)[i][j];
-        ++marker[colindex*m+j]; // index for next entry in column
-      }
-    }
-  }
-
-  template<class T, class A, int n, int m>
-  void SuperMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::createMatrix() const
-  {
-    delete[] marker;
-    marker=0;
-    SuperMatrixCreateSparseChooser<T>
-    ::create(&mat->A, mat->N_, mat->M_, mat->colstart[cols],
-             mat->values, mat->rowindex, mat->colstart, SLU_NC,
+      ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> >::createMatrix();
+      SuperMatrixCreateSparseChooser<T>
+           ::create(&slumat->A, slumat->N_, slumat->M_, slumat->colstart[this->cols],
+             slumat->values,slumat->rowindex, slumat->colstart, SLU_NC,
              static_cast<Dtype_t>(GetSuperLUType<T>::type), SLU_GE);
-  }
-
-  template<class F, class MRS>
-  void copyToSuperMatrix(F& initializer, const MRS& mrs)
-  {
-    typedef typename MRS::const_iterator Iter;
-    typedef typename  std::iterator_traits<Iter>::value_type::const_iterator CIter;
-    for(Iter row=mrs.begin(); row!= mrs.end(); ++row)
-      initializer.addRowNnz(row);
-
-    initializer.allocate();
-
-    for(Iter row=mrs.begin(); row!= mrs.end(); ++row) {
-
-      for(CIter col=row->begin(); col != row->end(); ++col)
-        initializer.countEntries(row, col);
-    }
-
-    initializer.calcColstart();
-
-    for(Iter row=mrs.begin(); row!= mrs.end(); ++row) {
-      for(CIter col=row->begin(); col != row->end(); ++col) {
-        initializer.copyValue(row, col);
-      }
-
-    }
-    initializer.createMatrix();
-  }
-
-  template<class F, class M,class S>
-  void copyToSuperMatrix(F& initializer, const MatrixRowSubset<M,S>& mrs)
-  {
-    typedef MatrixRowSubset<M,S> MRS;
-    typedef typename MRS::RowIndexSet SIS;
-    typedef typename SIS::const_iterator SIter;
-    typedef typename MRS::const_iterator Iter;
-    typedef typename std::iterator_traits<Iter>::value_type row_type;
-    typedef typename row_type::const_iterator CIter;
-
-    // Calculate upper Bound for nonzeros
-    for(Iter row=mrs.begin(); row!= mrs.end(); ++row)
-      initializer.addRowNnz(row, mrs.rowIndexSet());
-
-    initializer.allocate();
-
-    typedef typename MRS::Matrix::size_type size_type;
-
-    // A vector containing the corresponding indices in
-    // the to create submatrix.
-    // If an entry is the maximum of size_type then this index will not appear in
-    // the submatrix.
-    std::vector<size_type> subMatrixIndex(mrs.matrix().N(),
-                                          std::numeric_limits<size_type>::max());
-    size_type s=0;
-    for(SIter index = mrs.rowIndexSet().begin(); index!=mrs.rowIndexSet().end(); ++index)
-      subMatrixIndex[*index]=s++;
-
-    for(Iter row=mrs.begin(); row!= mrs.end(); ++row)
-      for(CIter col=row->begin(); col != row->end(); ++col) {
-        if(subMatrixIndex[col.index()]!=std::numeric_limits<size_type>::max())
-          // This column is in our subset (use submatrix column index)
-          initializer.countEntries(subMatrixIndex[col.index()]);
-      }
-
-    initializer.calcColstart();
-
-    for(Iter row=mrs.begin(); row!= mrs.end(); ++row)
-      for(CIter col=row->begin(); col != row->end(); ++col) {
-        if(subMatrixIndex[col.index()]!=std::numeric_limits<size_type>::max())
-          // This value is in our submatrix -> copy (use submatrix indices
-          initializer.copyValue(col, subMatrixIndex[row.index()], subMatrixIndex[col.index()]);
-      }
-    initializer.createMatrix();
-  }
-
-#ifndef DOXYGEN
-
-  template<class B, class TA, int n, int m>
-  bool SuperLUMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >::operator==(const BCRSMatrix<FieldMatrix<B,n,m>,TA>& mat) const
-  {
-    const NCformat* S=static_cast<const NCformat *>(A.Store);
-    for(size_type col=0; col < M(); ++col) {
-      for(int j=S->colptr[col]; j < S->colptr[col+1]; ++j) {
-        int row=S->rowind[j];
-        if((mat[row/n][col/m])[row%n][col%m]!=reinterpret_cast<B*>(S->nzval)[j]) {
-          std::cerr<<" bcrs["<<row/n<<"]["<<col/m<<"]["<<row%n<<"]["<<row%m
-                   <<"]="<<(mat[row/n][col/m])[row%n][col%m]<<" super["<<row<<"]["<<col<<"]="<<reinterpret_cast<B*>(S->nzval)[j];
-
-          return false;
-        }
-      }
-    }
-
-    return true;
-  }
-
-#endif // DOYXGEN
-
-  template<class B, class TA, int n, int m>
-  bool operator==(SuperLUMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >& sla, BCRSMatrix<FieldMatrix<B,n,m>,TA>& a)
-  {
-    return a==sla;
-  }
-
-#ifndef DOXYGEN
-
-  template<class B, class TA, int n, int m>
-  SuperLUMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >::SuperLUMatrix()
-    : N_(0), M_(0), Nnz_(0), values(0), rowindex(0), colstart(0)
-  {}
-
-  template<class B, class TA, int n, int m>
-  SuperLUMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >
-  ::SuperLUMatrix(const Matrix& mat)
-    : N_(n*mat.N()), M_(m*mat.M()), Nnz_(n*m*mat.Nnz())
-  {}
-
-  template<class B, class TA, int n, int m>
-  SuperLUMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >&
-  SuperLUMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >::operator=(const Matrix& mat)
-  {
-    if(N_+M_+Nnz_!=0)
-      free();
-    setMatrix(mat);
-    return *this;
-  }
-
-  template<class B, class TA, int n, int m>
-  SuperLUMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >&
-  SuperLUMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >::operator=(const SuperLUMatrix& mat)
-  {
-    if(N_+M_+Nnz_!=0)
-      free();
-    N_=mat.N_;
-    M_=mat.M_;
-    Nnz_= mat.Nnz_;
-    if(M_>0) {
-      colstart=new int[M_+1];
-      for(int i=0; i<=M_; ++i)
-        colstart[i]=mat.colstart[i];
     }
-
-    if(Nnz_>0) {
-      values = new B[Nnz_];
-      rowindex = new int[Nnz_];
-
-      for(int i=0; i<Nnz_; ++i)
-        values[i]=mat.values[i];
-
-      for(int i=0; i<Nnz_; ++i)
-        rowindex[i]=mat.rowindex[i];
-    }
-    if(M_+Nnz_>0)
-      dCreate_CompCol_Matrix(&A, N_, M_, Nnz_,
-                             values, rowindex, colstart, SLU_NC, static_cast<Dtype_t>(GetSuperLUType<B>::type), SLU_GE);
-    return *this;
-  }
-
-  template<class B, class TA, int n, int m>
-  void SuperLUMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >
-  ::setMatrix(const Matrix& mat)
-  {
-    N_=n*mat.N();
-    M_=m*mat.M();
-    SuperMatrixInitializer<Matrix> initializer(*this);
-
-    copyToSuperMatrix(initializer, MatrixRowSet<Matrix>(mat));
-
-#ifdef DUNE_ISTL_WITH_CHECKING
-    char name[] = {'A',0};
-    if(N_<0)
-      SuperMatrixPrinter<B>::print(name,&A);
-    assert(*this==mat);
-#endif
-  }
-
-  template<class B, class TA, int n, int m>
-  template<class S>
-  void SuperLUMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >
-  ::setMatrix(const Matrix& mat, const S& mrs)
-  {
-    if(N_+M_+Nnz_!=0)
-      free();
-    N_=mrs.size()*n;
-    M_=mrs.size()*m;
-    SuperMatrixInitializer<Matrix> initializer(*this);
-
-    copyToSuperMatrix(initializer, MatrixRowSubset<Matrix,S>(mat,mrs));
-
-#ifdef DUNE_ISTL_WITH_CHECKING
-    char name[] = {'A',0};
-    if(N_<0)
-      SuperMatrixPrinter<B>::print(name,&A);
-#endif
-  }
-
-  template<class B, class TA, int n, int m>
-  SuperLUMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >::~SuperLUMatrix()
-  {
-    if(N_+M_+Nnz_!=0)
-      free();
-  }
-
-  template<class B, class TA, int n, int m>
-  void SuperLUMatrix<BCRSMatrix<FieldMatrix<B,n,m>,TA> >::free()
-  {
-    delete[] values;
-    delete[] rowindex;
-    delete[] colstart;
-    SUPERLU_FREE(A.Store);
-    N_=M_=Nnz_=0;
-  }
-
-#endif // DOXYGEN
-
+    private:
+    SuperLUMatrix* slumat;
+  };
 }
 #endif
 #endif
diff --git a/dune/istl/test/CMakeLists.txt b/dune/istl/test/CMakeLists.txt
index 60b3dfed..9c9ad552 100644
--- a/dune/istl/test/CMakeLists.txt
+++ b/dune/istl/test/CMakeLists.txt
@@ -21,14 +21,22 @@ endif(HAVE_PARDISO)
 if(SUPERLU_FOUND)
   set(SUPERLUTESTS
   complexrhstest superlutest superluztest superluctest superlustest
-    overlappingschwarztest)
+  )
 endif(SUPERLU_FOUND)
 
+if(UMFPACK_FOUND)
+  set(UMFPACKTESTS umfpacktest)
+endif()
+
+if ((SUPERLU_FOUND) OR (UMFPACK_FOUND))
+  set(OVLPSCHWARZTESTS overlappingschwarztest)
+endif()
+
 if(HAVE_MPI)
   set(MPITESTS vectorcommtest matrixmarkettest matrixredisttest)
 endif(HAVE_MPI)
 
-set(ALLTESTS ${MPITESTS} ${NORMALTEST} ${PARDISOTEST} ${SUPERLUTESTS})
+set(ALLTESTS ${MPITESTS} ${NORMALTEST} ${PARDISOTEST} ${SUPERLUTESTS} ${UMFPACKTESTS} ${OVLPSCHWARZTESTS})
 
 
 # We do not want want to build the tests during make all,
@@ -78,11 +86,25 @@ if(SUPERLU_FOUND)
   add_executable(superluztest "superlutest.cc")
   set_property(TARGET superluztest APPEND PROPERTY COMPILE_DEFINITIONS "SUPERLU_NTYPE=3")
 
-  add_executable(overlappingschwarztest "overlappingschwarztest.cc")
-
   add_dune_superlu_flags("${SUPERLUTESTS}")
 endif(SUPERLU_FOUND)
 
+if(UMFPACK_FOUND)
+  add_executable(umfpacktest umfpacktest.cc)
+  add_dune_umfpack_flags(umfpacktest)
+  set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "umfpack_decomp")
+endif()
+
+if ((SUPERLU_FOUND) OR (UMFPACK_FOUND))
+  add_executable(overlappingschwarztest "overlappingschwarztest.cc")
+  if (SUPERLU_FOUND)
+    add_dune_superlu_flags(overlappingschwarztest)
+  endif()
+  if (UMFPACK_FOUND)
+    add_dune_umfpack_flags(overlappingschwarztest)
+  endif()
+endif()
+
 if(HAVE_MPI)
   add_executable(matrixredisttest "matrixredisttest.cc")
   add_executable(vectorcommtest "vectorcommtest.cc")
diff --git a/dune/istl/test/Makefile.am b/dune/istl/test/Makefile.am
index c0de4632..bd531d34 100644
--- a/dune/istl/test/Makefile.am
+++ b/dune/istl/test/Makefile.am
@@ -9,13 +9,17 @@ if MPI
 endif
 
 if SUPERLU
-  SUPERLUTESTS =  superlutest superluztest superluctest superlustest overlappingschwarztest
+  SUPERLUTESTS =  superlutest superluztest superluctest superlustest
 endif
 
 if PARDISO
   PARDISOTEST = test_pardiso
 endif
 
+if UMFPACK
+  UMFPACKTEST = umfpacktest
+endif
+
 # which tests where program to build and run are equal
 NORMALTESTS = basearraytest \
               bcrsbuildtest \
@@ -29,18 +33,24 @@ NORMALTESTS = basearraytest \
               matrixutilstest \
               mmtest \
               mv \
+              overlappingschwarztest \
               scaledidmatrixtest \
               seqmatrixmarkettest \
               vbvectortest
 
 # list of tests to run (indicestest is special case)
-TESTS = $(NORMALTESTS) $(MPITESTS) $(SUPERLUTESTS) $(PARDISOTEST) $(PARMETISTESTS)
+TESTS = $(NORMALTESTS) $(MPITESTS) $(SUPERLUTESTS) $(PARDISOTEST) $(PARMETISTESTS) $(UMFPACKTEST)
 
 # programs just to build when "make check" is used
-check_PROGRAMS = $(NORMALTESTS) $(MPITESTS) $(SUPERLUTESTS) $(PARDISOTEST) $(PARMETISTESTS)
+check_PROGRAMS = $(NORMALTESTS) $(MPITESTS) $(SUPERLUTESTS) $(PARDISOTEST) $(PARMETISTESTS) $(UMFPACKTEST)
 
 # define the programs
 
+overlappingschwarztest_SOURCES = overlappingschwarztest.cc
+overlappingschwarztest_LDADD =
+overlappingschwarztest_LDFLAGS= $(AM_LDFLAGS)
+overlappingschwarztest_CPPFLAGS= $(AM_CPPFLAGS)
+
 if SUPERLU
   superlutest_SOURCES = superlutest.cc
   superlutest_LDADD= $(SUPERLU_LIBS)
@@ -64,13 +74,21 @@ if SUPERLU
   superluztest_LDFLAGS= $(AM_LDFLAGS) $(SUPERLU_LDFLAGS)
   superluztest_CPPFLAGS=$(AM_CPPFLAGS) $(SUPERLU_CPPFLAGS) -DSUPERLU_NTYPE=3
 
-  overlappingschwarztest_SOURCES = overlappingschwarztest.cc
-  overlappingschwarztest_LDADD= $(SUPERLU_LIBS)
-  overlappingschwarztest_LDFLAGS= $(AM_LDFLAGS) $(SUPERLU_LDFLAGS)
-  overlappingschwarztest_CPPFLAGS=$(AM_CPPFLAGS) $(SUPERLU_CPPFLAGS)
+  overlappingschwarztest_LDADD += $(SUPERLU_LIBS)
+  overlappingschwarztest_LDFLAGS +=  $(SUPERLU_LDFLAGS)
+  overlappingschwarztest_CPPFLAGS += $(SUPERLU_CPPFLAGS)
 endif
 
+if UMFPACK
+  umfpacktest_SOURCES= umfpacktest.cc
+  umfpacktest_LDADD= $(UMFPACK_LIBS)
+  umfpacktest_LDFLAGS= $(AM_LDFLAGS) $(UMFPACK_LDFLAGS)
+  umfpacktest_CPPFLAGS= $(AM_CPPFLAGS) $(UMFPACK_CPPFLAGS)
 
+  overlappingschwarztest_LDADD += $(UMFPACK_LIBS)
+  overlappingschwarztest_LDFLAGS += $(UMFPACK_LDFLAGS)
+  overlappingschwarztest_CPPFLAGS += $(UMFPACK_CPPFLAGS)
+endif
 
 if PARDISO
   test_pardiso_SOURCES = test_pardiso.cc
@@ -145,3 +163,5 @@ endif
 include $(top_srcdir)/am/global-rules
 
 EXTRA_DIST = CMakeLists.txt
+
+CLEANFILES = umfpack_decomp
\ No newline at end of file
diff --git a/dune/istl/test/overlappingschwarztest.cc b/dune/istl/test/overlappingschwarztest.cc
index 23559426..226e00a3 100644
--- a/dune/istl/test/overlappingschwarztest.cc
+++ b/dune/istl/test/overlappingschwarztest.cc
@@ -11,6 +11,8 @@
 #include <dune/common/sllist.hh>
 #include <dune/istl/overlappingschwarz.hh>
 #include <dune/istl/solvers.hh>
+#include<dune/istl/superlu.hh>
+#include<dune/istl/umfpack.hh>
 
 #include <iterator>
 
@@ -137,11 +139,20 @@ int main(int argc, char** argv)
   //  b=0;
   //  x=100;
   //  setBoundary(x,b,N);
-  //Dune::SeqOverlappingSchwarz<BCRSMat,BVector,Dune::AdditiveSchwarzMode,
-  //    Dune::SuperLU<BCRSMat> > prec0(mat, domains, 1);
-  Dune::SeqOverlappingSchwarz<BCRSMat,BVector> prec0(mat, domains, 1);
+#if HAVE_UMFPACK
+  std::cout << "Do testing with UMFPack" << std::endl;
+  Dune::SeqOverlappingSchwarz<BCRSMat,BVector,Dune::AdditiveSchwarzMode,
+      Dune::UMFPack<BCRSMat> > prec0(mat, domains, 1);
   Dune::LoopSolver<BVector> solver0(fop, prec0, 1e-2,100,2);
   solver0.apply(x,b, res);
+#endif
+#if HAVE_SUPERLU
+  std::cout << "Do testing with SuperLU" << std::endl;
+  Dune::SeqOverlappingSchwarz<BCRSMat,BVector,Dune::AdditiveSchwarzMode,
+      Dune::SuperLU<BCRSMat> > slu_prec0(mat, domains, 1);
+  Dune::LoopSolver<BVector> slu_solver(fop, slu_prec0, 1e-2,100,2);
+  slu_solver.apply(x,b, res);
+#endif
   return 0;
 
   std::cout<<"Additive Schwarz not on the fly (domains vector)"<<std::endl;
diff --git a/dune/istl/test/umfpacktest.cc b/dune/istl/test/umfpacktest.cc
new file mode 100644
index 00000000..ce45368e
--- /dev/null
+++ b/dune/istl/test/umfpacktest.cc
@@ -0,0 +1,80 @@
+#include "config.h"
+
+#include <complex>
+#include<iostream>
+
+#include <dune/common/fmatrix.hh>
+#include <dune/common/fvector.hh>
+#include <dune/common/timer.hh>
+#include <dune/istl/bvector.hh>
+#include <dune/istl/colcompmatrix.hh>
+#include <dune/istl/io.hh>
+#include <dune/istl/operators.hh>
+#include <dune/istl/umfpack.hh>
+
+#include "laplacian.hh"
+
+
+int main(int argc, char** argv)
+{
+  try
+  {
+    typedef int I;
+    typedef double FIELD_TYPE;
+    //typedef std::complex<double> FIELD_TYPE;
+
+    const int BS=1;
+    std::size_t N=100;
+
+    if(argc>1)
+      N = atoi(argv[1]);
+    std::cout<<"testing for N="<<N<<" BS="<<1<<std::endl;
+
+    typedef Dune::FieldMatrix<FIELD_TYPE,BS,BS> MatrixBlock;
+    typedef Dune::BCRSMatrix<MatrixBlock> BCRSMat;
+    typedef Dune::FieldVector<FIELD_TYPE,BS> VectorBlock;
+    typedef Dune::BlockVector<VectorBlock> Vector;
+    typedef Dune::MatrixAdapter<BCRSMat,Vector,Vector> Operator;
+
+    BCRSMat mat;
+    Operator fop(mat);
+    Vector b(N*N), x(N*N);
+
+    setupLaplacian(mat,N);
+    b=1;
+    x=0;
+
+    Dune::Timer watch;
+
+    watch.reset();
+
+    Dune::UMFPack<BCRSMat> solver(mat,1);
+
+    Dune::InverseOperatorResult res;
+
+    Dune::UMFPack<BCRSMat> solver1;
+
+    std::set<std::size_t> mrs;
+    for(std::size_t s=0; s < N/2; ++s)
+      mrs.insert(s);
+
+    solver1.setSubMatrix(mat,mrs);
+    solver1.setVerbosity(true);
+
+    solver.apply(x,b, res);
+    solver.free();
+
+    solver1.apply(x,b, res);
+    solver1.apply(reinterpret_cast<FIELD_TYPE*>(&x[0]), reinterpret_cast<FIELD_TYPE*>(&b[0]));
+
+    Dune::UMFPack<BCRSMat> save_solver(mat,"umfpack_decomp",0);
+    Dune::UMFPack<BCRSMat> load_solver(mat,"umfpack_decomp",0);
+    return 0;
+  }
+  catch(Dune::Exception &e)
+  {
+    std::cerr << "Dune reported error: " << e << std::endl;
+  }
+  catch (...)
+  {}
+}
diff --git a/dune/istl/umfpack.hh b/dune/istl/umfpack.hh
new file mode 100644
index 00000000..6aa92c08
--- /dev/null
+++ b/dune/istl/umfpack.hh
@@ -0,0 +1,460 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_UMFPACK_HH
+#define DUNE_UMFPACK_HH
+
+#if HAVE_UMFPACK
+
+#include<complex>
+#include<type_traits>
+
+#include<umfpack.h>
+
+#include<dune/common/exceptions.hh>
+#include<dune/common/fmatrix.hh>
+#include<dune/common/fvector.hh>
+#include<dune/istl/bcrsmatrix.hh>
+#include<dune/istl/solvers.hh>
+#include<dune/istl/solvertype.hh>
+
+#include"colcompmatrix.hh"
+
+
+namespace Dune {
+  /**
+   * @addtogroup ISTL
+   *
+   * @{
+   */
+  /**
+   * @file
+   * @author Dominic Kempf
+   * @brief Classes for using UMFPack with ISTL matrices.
+   */
+
+  // FORWARD DECLARATIONS
+  template<class M, class T, class TM, class TD, class TA>
+  class SeqOverlappingSchwarz;
+
+  template<class T, bool tag>
+  struct SeqOverlappingSchwarzAssemblerHelper;
+
+  /** @brief use the UMFPack Package to directly solve linear systems
+   * @tparam Matrix the matrix type defining the system
+   * Details on UMFPack are to be found on
+   * http://www.cise.ufl.edu/research/sparse/umfpack/
+   */
+  template<class Matrix>
+  class UMFPack
+  {};
+
+  // wrapper class for C-Function Calls in the backend. Choose the right function namespace
+  // depending on the template parameter used.
+  template<typename T>
+  struct UMFPackMethodChooser
+  {};
+
+  template<>
+  struct UMFPackMethodChooser<double>
+  {
+    template<typename... A>
+    static void defaults(A... args)
+    {
+      umfpack_di_defaults(args...);
+    }
+    template<typename... A>
+    static void free_numeric(A... args)
+    {
+      umfpack_di_free_numeric(args...);
+    }
+    template<typename... A>
+    static void free_symbolic(A... args)
+    {
+      umfpack_di_free_symbolic(args...);
+    }
+    template<typename... A>
+    static int load_numeric(A... args)
+    {
+      return umfpack_di_load_numeric(args...);
+    }
+    template<typename... A>
+    static void numeric(A... args)
+    {
+      umfpack_di_numeric(args...);
+    }
+    template<typename... A>
+    static void report_info(A... args)
+    {
+      umfpack_di_report_info(args...);
+    }
+    template<typename... A>
+    static void report_status(A... args)
+    {
+      umfpack_di_report_status(args...);
+    }
+    template<typename... A>
+    static int save_numeric(A... args)
+    {
+      return umfpack_di_save_numeric(args...);
+    }
+    template<typename... A>
+    static void solve(A... args)
+    {
+      umfpack_di_solve(args...);
+    }
+    template<typename... A>
+    static void symbolic(A... args)
+    {
+      umfpack_di_symbolic(args...);
+    }
+  };
+
+  template<>
+  struct UMFPackMethodChooser<std::complex<double> >
+  {
+    template<typename... A>
+    static void defaults(A... args)
+    {
+      umfpack_zi_defaults(args...);
+    }
+    template<typename... A>
+    static void free_numeric(A... args)
+    {
+      umfpack_zi_free_numeric(args...);
+    }
+    template<typename... A>
+    static void free_symbolic(A... args)
+    {
+      umfpack_zi_free_symbolic(args...);
+    }
+    template<typename... A>
+    static int load_numeric(A... args)
+    {
+      return umfpack_zi_load_numeric(args...);
+    }
+    template<typename... A>
+    static void numeric(const int* cs, const int* ri, const double* val, A... args)
+    {
+      umfpack_zi_numeric(cs,ri,val,NULL,args...);
+    }
+    template<typename... A>
+    static void report_info(A... args)
+    {
+      umfpack_zi_report_info(args...);
+    }
+    template<typename... A>
+    static void report_status(A... args)
+    {
+      umfpack_zi_report_status(args...);
+    }
+    template<typename... A>
+    static int save_numeric(A... args)
+    {
+      return umfpack_zi_save_numeric(args...);
+    }
+    template<typename... A>
+    static void solve(int m, const int* cs, const int* ri, std::complex<double>* val, double* x, const double* b,A... args)
+    {
+      const double* cval = reinterpret_cast<const double*>(val);
+      umfpack_zi_solve(m,cs,ri,cval,NULL,x,NULL,b,NULL,args...);
+    }
+    template<typename... A>
+    static void symbolic(int m, int n, const int* cs, const int* ri, const double* val, A... args)
+    {
+      umfpack_zi_symbolic(m,n,cs,ri,val,NULL,args...);
+    }
+  };
+
+  /** @brief use the UMFPack Package to directly solve linear systems
+   *
+   * Specialization for the Dune::BCRSMatrix. UMFPack will always go double
+   * precision and supports complex numbers
+   * too (use std::complex<double> for that).
+   */
+  template<typename T, typename A, int n, int m>
+  class UMFPack<BCRSMatrix<FieldMatrix<T,n,m>,A > >
+      : public InverseOperator<
+          BlockVector<FieldVector<T,m>,
+              typename A::template rebind<FieldVector<T,m> >::other>,
+          BlockVector<FieldVector<T,n>,
+              typename A::template rebind<FieldVector<T,n> >::other> >
+  {
+    public:
+    /** @brief The matrix type. */
+    typedef Dune::BCRSMatrix<FieldMatrix<T,n,m>,A> Matrix;
+    typedef Dune::BCRSMatrix<FieldMatrix<T,n,m>,A> matrix_type;
+    /** @brief The corresponding SuperLU Matrix type.*/
+    typedef Dune::ColCompMatrix<Matrix> UMFPackMatrix;
+    /** @brief Type of an associated initializer class. */
+    typedef ColCompMatrixInitializer<BCRSMatrix<FieldMatrix<T,n,m>,A> > MatrixInitializer;
+    /** @brief The type of the domain of the solver. */
+    typedef Dune::BlockVector<
+        FieldVector<T,m>,
+        typename A::template rebind<FieldVector<T,m> >::other> domain_type;
+    /** @brief The type of the range of the solver. */
+    typedef Dune::BlockVector<
+        FieldVector<T,n>,
+        typename A::template rebind<FieldVector<T,n> >::other> range_type;
+
+    /** @brief construct a solver object from a BCRSMatrix
+     *  @param mat_ the matrix to solve for
+     *  @param verbose [0..2] set the verbosity level, defaults to 0
+     */
+    UMFPack(const Matrix& mat_, int verbose=0)
+    {
+      //check whether T is a supported type
+      static_assert((std::is_same<T,double>::value) || (std::is_same<T,std::complex<double> >::value),
+                    "Unsupported Type in UMFPack (only double and std::complex<double> supported)");
+      Caller::defaults(UMF_Control);
+      setVerbosity(verbose);
+      setMatrix(mat_);
+    }
+
+    /** @brief default constructor
+     */
+    UMFPack() : verbose(0)
+    {
+      //check whether T is a supported type
+      static_assert((std::is_same<T,double>::value) || (std::is_same<T,std::complex<double> >::value),
+                    "Unsupported Type in UMFPack (only double and std::complex<double> supported)");
+      Caller::defaults(UMF_Control);
+    }
+
+    /** @brief try loading a decomposition from file and do a decomposition if unsuccesful
+     * @param mat_ the matrix to decompose when no decoposition file found
+     * @param file the decomposition file
+     * @param verbose the verbosity level
+     * use saveDecomposition(char* file) for manually storing a decomposition. This constructor
+     * will decompose mat_ and store the result to file if no file wasnt found in the first place.
+     * Thus, if you always use this you will only compute the decomposition once (and when you manually
+     * deleted the decomposition file).
+     */
+    UMFPack(const Matrix& mat_, const char* file, int verbose=0)
+    {
+      //check whether T is a supported type
+      static_assert((std::is_same<T,double>::value) || (std::is_same<T,std::complex<double> >::value),
+                    "Unsupported Type in UMFPack (only double and std::complex<double> supported)");
+      Caller::defaults(UMF_Control);
+      setVerbosity(verbose);
+      int errcode = Caller::load_numeric(&UMF_Numeric, const_cast<char*>(file));
+      if ((errcode == UMFPACK_ERROR_out_of_memory) || (errcode == UMFPACK_ERROR_file_IO))
+      {
+        setMatrix(mat_);
+        saveDecomposition(file);
+      }
+      else
+        std::cout << "UMFPack decomposition succesfully loaded from " << file << std::endl;
+    }
+
+    /** @brief try loading a decomposition from file
+     * @param file the decomposition file
+     * @param verbose the verbosity level
+     * throws an exception when not being able to load the file. Doesnt need knowledge of the
+     * actual matrix!
+     */
+    UMFPack(const char* file, int verbose=0)
+    {
+      //check whether T is a supported type
+      static_assert((std::is_same<T,double>::value) || (std::is_same<T,std::complex<double> >::value),
+                    "Unsupported Type in UMFPack (only double and std::complex<double> supported)");
+      Caller::defaults(UMF_Control);
+      int errcode = Caller::load_numeric(&UMF_Numeric, const_cast<char*>(file));
+      if (errcode == UMFPACK_ERROR_out_of_memory)
+        DUNE_THROW(Dune::Exception, "ran out of memory while loading UMFPack decomposition");
+      if (errcode == UMFPACK_ERROR_file_IO)
+        DUNE_THROW(Dune::Exception, "IO error while loading UMFPack decomposition");
+      std::cout << "UMFPack decomposition succesfully loaded from " << file << std::endl;
+      setVerbosity(verbose);
+    }
+
+    ~UMFPack()
+    {
+      if (mat.N() + mat.M() > 0)
+      free();
+    }
+
+    /**
+     *  \copydoc InverseOperator::apply(X&, Y&, InverserOperatorResult&)
+     */
+    void apply(domain_type& x, range_type& b, InverseOperatorResult& res)
+    {
+      Caller::solve(UMFPACK_A,
+                    mat.getColStart(),
+                    mat.getRowIndex(),
+                    mat.getValues(),
+                    reinterpret_cast<double*>(&x[0]),
+                    reinterpret_cast<double*>(&b[0]),
+                    UMF_Numeric,
+                    UMF_Control,
+                    UMF_Info);
+
+      //this is a direct solver
+      res.iterations = 1;
+      res.converged = true;
+
+      printOnApply();
+    }
+
+    /**
+     *  \copydoc InverseOperator::apply(X&,Y&,double,InverseOperatorResult&)
+     */
+    void apply (domain_type& x, range_type& b, double reduction, InverseOperatorResult& res)
+    {
+      apply(x,b,res);
+    }
+
+    /**
+     * @brief additional apply method with c-arrays in analogy to superlu
+     * @param x solution array
+     * @param b rhs array
+     */
+    void apply(T* x, T* b)
+    {
+      Caller::solve(UMFPACK_A,
+                    mat.getColStart(),
+                    mat.getRowIndex(),
+                    mat.getValues(),
+                    x,
+                    b,
+                    UMF_Numeric,
+                    UMF_Control,
+                    UMF_Info);
+      printOnApply();
+    }
+
+    /** @brief saves a decomposition to a file
+     * @param file the filename to save to
+     */
+    void saveDecomposition(const char* file)
+    {
+      int errcode = Caller::save_numeric(UMF_Numeric, const_cast<char*>(file));
+      if (errcode != UMFPACK_OK)
+        DUNE_THROW(Dune::Exception,"IO ERROR while trying to save UMFPack decomposition");
+    }
+
+    /** @brief Initialize data from given matrix. */
+    void setMatrix(const Matrix& _mat)
+    {
+      if (mat.N() + mat.M() > 0)
+        free();
+      mat = _mat;
+      decompose();
+    }
+
+    template<class S>
+    void setSubMatrix(const Matrix& _mat, const S& rowIndexSet)
+    {
+      if (mat.N() + mat.M() > 0)
+        free();
+      mat.setMatrix(_mat,rowIndexSet);
+      decompose();
+    }
+
+    /** @brief sets the verbosity level for the UMFPack solver
+     * @param v verbosity level
+     * The following levels are implemented:
+     * 0 - only error messages
+     * 1 - a bit of statistics on decomposition and solution
+     * 2 - lots of statistics on decomposition and solution
+     */
+    void setVerbosity(int v)
+    {
+      verbose = v;
+      // set the verbosity level in UMFPack
+      if (verbose == 0)
+        UMF_Control[UMFPACK_PRL] = 1;
+      if (verbose == 1)
+        UMF_Control[UMFPACK_PRL] = 2;
+      if (verbose == 2)
+        UMF_Control[UMFPACK_PRL] = 4;
+    }
+
+    /**
+     * @brief free allocated space.
+     * @warning later calling apply will result in an error.
+     */
+    void free()
+    {
+      Caller::free_symbolic(&UMF_Symbolic);
+      Caller::free_numeric(&UMF_Numeric);
+      mat.free();
+    }
+
+    private:
+    typedef typename Dune::UMFPackMethodChooser<T> Caller;
+
+    template<class M,class X, class TM, class TD, class T1>
+    friend class SeqOverlappingSchwarz;
+    friend struct SeqOverlappingSchwarzAssemblerHelper<UMFPack<Matrix>,true>;
+
+    /** @brief computes the LU Decomposition */
+    void decompose()
+    {
+      Caller::symbolic(static_cast<int>(mat.N()),
+                       static_cast<int>(mat.N()),
+                       mat.getColStart(),
+                       mat.getRowIndex(),
+                       reinterpret_cast<double*>(mat.getValues()),
+                       &UMF_Symbolic,
+                       UMF_Control,
+                       UMF_Info);
+      Caller::numeric(mat.getColStart(),
+                      mat.getRowIndex(),
+                      reinterpret_cast<double*>(mat.getValues()),
+                      UMF_Symbolic,
+                      &UMF_Numeric,
+                      UMF_Control,
+                      UMF_Info);
+      Caller::report_status(UMF_Control,UMF_Info[UMFPACK_STATUS]);
+      if (verbose == 1)
+      {
+        std::cout << "[UMFPack Decomposition]" << std::endl;
+        std::cout << "Wallclock Time taken: " << UMF_Info[UMFPACK_NUMERIC_WALLTIME] << " (CPU Time: " << UMF_Info[UMFPACK_NUMERIC_TIME] << ")" << std::endl;
+        std::cout << "Flops taken: " << UMF_Info[UMFPACK_FLOPS] << std::endl;
+        std::cout << "Peak Memory Usage: " << UMF_Info[UMFPACK_PEAK_MEMORY]*UMF_Info[UMFPACK_SIZE_OF_UNIT] << " bytes" << std::endl;
+        std::cout << "Condition number estimate: " << 1./UMF_Info[UMFPACK_RCOND] << std::endl;
+        std::cout << "Numbers of non-zeroes in decomposition: L: " << UMF_Info[UMFPACK_LNZ] << " U: " << UMF_Info[UMFPACK_UNZ] << std::endl;
+      }
+      if (verbose == 2)
+      {
+        Caller::report_info(UMF_Control,UMF_Info);
+      }
+    }
+
+    void printOnApply()
+    {
+      Caller::report_status(UMF_Control,UMF_Info[UMFPACK_STATUS]);
+      if (verbose > 0)
+      {
+        std::cout << "[UMFPack Solve]" << std::endl;
+        std::cout << "Wallclock Time: " << UMF_Info[UMFPACK_SOLVE_WALLTIME] << " (CPU Time: " << UMF_Info[UMFPACK_SOLVE_TIME] << ")" << std::endl;
+        std::cout << "Flops Taken: " << UMF_Info[UMFPACK_SOLVE_FLOPS] << std::endl;
+        std::cout << "Iterative Refinement steps taken: " << UMF_Info[UMFPACK_IR_TAKEN] << std::endl;
+        std::cout << "Error Estimate: " << UMF_Info[UMFPACK_OMEGA1] << " resp. " << UMF_Info[UMFPACK_OMEGA2] << std::endl;
+      }
+    }
+
+    UMFPackMatrix mat;
+    int verbose;
+    void *UMF_Symbolic;
+    void *UMF_Numeric;
+    double UMF_Info[UMFPACK_INFO];
+    double UMF_Control[UMFPACK_CONTROL];
+  };
+
+  template<typename T, typename A, int n, int m>
+  struct IsDirectSolver<UMFPack<BCRSMatrix<FieldMatrix<T,n,m>,A> > >
+  {
+    enum { value=true};
+  };
+
+  template<typename T, typename A, int n, int m>
+  struct StoresColumnCompressed<UMFPack<BCRSMatrix<FieldMatrix<T,n,m>,A> > >
+  {
+    enum { value = true };
+  };
+}
+
+#endif //HAVE_UMFPACK
+
+#endif //DUNE_UMFPACK_HH
diff --git a/m4/dune_istl.m4 b/m4/dune_istl.m4
index 1c03bc23..fcc3bed0 100644
--- a/m4/dune_istl.m4
+++ b/m4/dune_istl.m4
@@ -3,6 +3,7 @@ AC_DEFUN([DUNE_ISTL_CHECKS],
   AC_REQUIRE([DUNE_PATH_PARMETIS])
   AC_REQUIRE([DUNE_PATH_SUPERLU])
   AC_REQUIRE([DUNE_PATH_SUPERLU_DIST])
+  AC_REQUIRE([DUNE_PATH_UMFPACK])
   AC_REQUIRE([DUNE_PARDISO])
   AC_REQUIRE([__AC_FC_NAME_MANGLING])
   AC_REQUIRE([AC_PROG_F77])
diff --git a/m4/umfpack.m4 b/m4/umfpack.m4
new file mode 100644
index 00000000..189b04a7
--- /dev/null
+++ b/m4/umfpack.m4
@@ -0,0 +1,141 @@
+# courtesy of the dune-fem maintainers
+
+AC_DEFUN([DUNE_PATH_UMFPACK],[
+  AC_REQUIRE([AC_PROG_CC])
+
+  AC_ARG_WITH(umfpack,
+    AC_HELP_STRING([--with-umfpack=PATH],[directory with UMFPACK inside]))
+  AC_ARG_WITH(umfpack-includedir,
+    AC_HELP_STRING([--with-umfpack-includedir=PATH],[directory with UMFPACK headers inside]))
+  AC_ARG_WITH(umfpack-libdir,
+    AC_HELP_STRING([--with-umfpack-libdir=PATH],[directory with UMFPACK libraries inside]))
+
+# store old values
+ac_save_LDFLAGS="$LDFLAGS"
+ac_save_CPPFLAGS="$CPPFLAGS"
+ac_save_LIBS="$LIBS"
+
+UMFPACKYES=0
+## do nothing if no --with-umfpack was supplied
+if test x$with_umfpack != x && test x$with_umfpack != xno ; then
+  UMFPACKYES=1
+fi
+if test x$with_umfpack_includedir != x && test x$with_umfpack_includedir != xno ; then
+  UMFPACKYES=1
+fi
+if test x$with_umfpack_libdir != x && test x$with_umfpack_libdir != xno ; then
+  UMFPACKYES=1
+fi
+
+if test x$UMFPACKYES = x1 ; then
+
+  # is --with-umfpack=bla used?
+  if test "x$with_umfpack" != x ; then
+    UMFPACKROOT=`cd $with_umfpack && pwd`
+	  if ! test -d $UMFPACKROOT;  then
+      AC_MSG_WARN([UMFPACK directory $with_umfpack does not exist])
+  	fi
+
+    if test "x$UMFPACKROOT" = x; then
+      # use some default value...
+      UMFPACKROOT="/usr/local/umfpack"
+    fi
+
+    UMFAMD_LIB_PATH="$UMFPACKROOT/AMD/Lib"
+    UMFPACK_LIB_PATH="$UMFPACKROOT/UMFPACK/Lib"
+    UMFPACK_INCLUDE_PATH="$UMFPACKROOT/UMFPACK/Include"
+  else 
+    if test "x$with_umfpack_includedir" != x ; then 
+      UMFPACK_INCLUDE_PATH=`cd $with_umfpack_includedir && pwd`
+      if ! test -d $UMFPACK_INCLUDE_PATH;  then
+        AC_MSG_WARN([UMFPACK directory $with_umfpack_includedir does not exist])
+      fi
+    fi
+    if test "x$with_umfpack_libdir" != x ; then 
+      UMFPACK_LIB_PATH=`cd $with_umfpack_libdir && pwd`
+      if ! test -d $UMFPACK_LIB_PATH;  then
+        AC_MSG_WARN([UMFPACK directory $with_umfpack_libdir does not exist])
+      fi
+    fi
+    UMFAMD_LIB_PATH=$UMFPACK_LIB_PATH
+  fi
+
+  # set variables so that tests can use them
+  REM_CPPFLAGS=$CPPFLAGS
+
+  LDFLAGS="$LDFLAGS -L$UMFPACK_LIB_PATH -L$UMFAMD_LIB_PATH"
+  UMFPACK_INC_FLAG="-I$UMFPACK_INCLUDE_PATH -I$UMFPACKROOT/UFconfig -I$UMFPACKROOT/AMD/Include -I$UMFPACKROOT/SuiteSparse_config -DENABLE_UMFPACK=1"
+  CPPFLAGS="$CPPFLAGS $UMFPACK_INC_FLAG $MPI_CPPFLAGS"
+
+  # check for header
+  AC_LANG_PUSH([C])
+  AC_CHECK_HEADERS([umfpack.h], 
+     [UMFPACK_CPPFLAGS="$UMFPACK_INC_FLAG"
+    HAVE_UMFPACK="1"],
+    AC_MSG_WARN([umfpack.h not found in $UMFPACK_INCLUDE_PATH]))
+   
+  CPPFLAGS="$REM_CPPFLAGS"
+  REM_CPPFLAGS=
+
+  REM_LDFLAGS=$LDFLAGS
+
+  # if header is found...
+  if test x$HAVE_UMFPACK = x1 ; then
+    AC_CHECK_LIB(umfpack,[main],
+    [UMFPACK_LIBS="-lumfpack"
+           UMFPACK_LDFLAGS="-L$UMFPACK_LIB_PATH"],
+	  [HAVE_UMFPACK="0"
+	  AC_MSG_WARN(libumfpack not found!)])
+  fi
+
+  # if lib is found...
+  if test x$HAVE_UMFPACK = x1 ; then
+    AC_CHECK_LIB(amd,[main],
+    [UMFPACK_LIBS="$UMFPACK_LIBS -lamd"
+           UMFPACK_LDFLAGS="$UMFPACK_LDFLAGS -L$UMFAMD_LIB_PATH"
+           LIBS="$LIBS $UMFPACK_LIBS"],
+	  [HAVE_UMFPACK="0"
+	  AC_MSG_WARN(libamd not found!)])
+  fi
+
+  LDFLAGS=$REM_LDFLAGS
+  AC_LANG_POP
+
+## end of umfpack check (--without wasn't set)
+fi
+
+# survived all tests?
+if test x$HAVE_UMFPACK = x1 ; then
+  AC_SUBST(UMFPACK_LIBS, $UMFPACK_LIBS)
+  AC_SUBST(UMFPACK_LDFLAGS, $UMFPACK_LDFLAGS)
+  AC_SUBST(UMFPACK_CPPFLAGS, $UMFPACK_CPPFLAGS)
+  AC_DEFINE(HAVE_UMFPACK, ENABLE_UMFPACK,
+    [This is only true if umfpack-library was found by configure 
+     _and_ if the application uses the UMFPACK_CPPFLAGS])
+
+  # add to global list
+  DUNE_ADD_ALL_PKG([UMFPACK], [\${UMFPACK_CPPFLAGS}],
+                   [\${UMFPACK_LDFLAGS}], [\${UMFPACK_LIBS}])
+
+  # set variable for summary
+  with_umfpack="yes"
+
+else
+  AC_SUBST(UMFPACK_LIBS, "")
+  AC_SUBST(UMFPACK_LDFLAGS, "")
+  AC_SUBST(UMFPACK_CPPFLAGS, "")
+
+  # set variable for summary
+  with_umfpack="no"
+fi
+  
+# also tell automake
+AM_CONDITIONAL(UMFPACK, test x$HAVE_UMFPACK = x1)
+
+# reset old values
+LIBS="$ac_save_LIBS"
+CPPFLAGS="$ac_save_CPPFLAGS"
+LDFLAGS="$ac_save_LDFLAGS"
+
+  DUNE_ADD_SUMMARY_ENTRY([UMFPACK],[$with_umfpack])
+])
-- 
GitLab