diff --git a/dune/istl/.gitignore b/dune/istl/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..677993cc450715e48c301f6ac4c0e12a78a6120e
--- /dev/null
+++ b/dune/istl/.gitignore
@@ -0,0 +1,4 @@
+Makefile
+Makefile.in
+semantic.cache
+istl.tar.gz
\ No newline at end of file
diff --git a/dune/istl/matrixredistribute.hh b/dune/istl/matrixredistribute.hh
index bf74857246d8f8c7e783bc2759e30fcf5e817b51..8845d1f1dc69664a8b0a596351f1c2057210d52b 100644
--- a/dune/istl/matrixredistribute.hh
+++ b/dune/istl/matrixredistribute.hh
@@ -34,11 +34,27 @@ namespace Dune
     void setNoRows(std::size_t size)
     {}
 
+    void setNoCopyRows(std::size_t size)
+    {}
+
+    void setNoBackwardsCopyRows(std::size_t size)
+    {}
+
     std::size_t getRowSize(std::size_t index) const
     {
       return -1;
     }
 
+    std::size_t getCopyRowSize(std::size_t index) const
+    {
+      return -1;
+    }
+
+    std::size_t getBackwardsCopyRowSize(std::size_t index) const
+    {
+      return -1;
+    }
+
   };
 
 #if HAVE_MPI
@@ -143,14 +159,46 @@ namespace Dune
     {
       return rowSize[index];
     }
+
+    std::size_t& getCopyRowSize(std::size_t index)
+    {
+      return copyrowSize[index];
+    }
+
+    std::size_t getCopyRowSize(std::size_t index) const
+    {
+      return copyrowSize[index];
+    }
+
+    std::size_t& getBackwardsCopyRowSize(std::size_t index)
+    {
+      return backwardscopyrowSize[index];
+    }
+
+    std::size_t getBackwardsCopyRowSize(std::size_t index) const
+    {
+      return backwardscopyrowSize[index];
+    }
+
     void setNoRows(std::size_t rows)
     {
       rowSize.resize(rows, 0);
     }
 
+    void setNoCopyRows(std::size_t rows)
+    {
+      copyrowSize.resize(rows, 0);
+    }
+
+    void setNoBackwardsCopyRows(std::size_t rows)
+    {
+      backwardscopyrowSize.resize(rows, 0);
+    }
+
   private:
     std::vector<std::size_t> rowSize;
-
+    std::vector<std::size_t> copyrowSize;
+    std::vector<std::size_t> backwardscopyrowSize;
     RedistributeInterface interface;
     bool setup_;
   };
@@ -161,7 +209,7 @@ namespace Dune
    *
    * @tparam M The type of the matrix that the row size
    * is communicated of.
-   * @tparam I The type of the index set.
+   * @tparam RI The type of class for redistribution information
    */
   template<class M, class RI>
   struct CommMatrixRowSize
@@ -173,7 +221,7 @@ namespace Dune
     /**
      * @brief Constructor.
      * @param m_ The matrix whose sparsity pattern is communicated.
-     * @param[out] rowsize_ The vector containing the row sizes
+     * @param[out] rowsize_ RedistributeInformation object
      */
     CommMatrixRowSize(const M& m_, RI& rowsize_)
       : matrix(m_), rowsize(rowsize_)
@@ -192,7 +240,7 @@ namespace Dune
    * is communicated of.
    * @tparam I The type of the index set.
    */
-  template<class M, class I, class RI>
+  template<class M, class I>
   struct CommMatrixSparsityPattern
   {
     typedef typename M::size_type size_type;
@@ -215,12 +263,12 @@ namespace Dune
      * @param rowsize_ The row size for the redistributed owner rows.
      */
     CommMatrixSparsityPattern(const M& m_, const Dune::GlobalLookupIndexSet<I>& idxset_, const I& aggidxset_,
-                              const RI& rowsize_)
+                              const std::vector<typename M::size_type>& rowsize_)
       : matrix(m_), idxset(idxset_), aggidxset(aggidxset_), sparsity(aggidxset_.size()), rowsize(&rowsize_)
     {}
 
     /**
-     * @brief Creates ans stores the sparsity pattern of the redistributed matrix.
+     * @brief Creates and stores the sparsity pattern of the redistributed matrix.
      *
      * After the pattern is communicated this function can be used.
      * @param m The matrix to build.
@@ -282,18 +330,40 @@ namespace Dune
       }
     }
 
+    /**
+     * @brief Completes the sparsity pattern of the redistributed matrix with data
+     * from copy rows for the novlp case.
+     *
+     * After the pattern communication this function can be used.
+     * @param add_sparsity Sparsity pattern from the copy rows.
+     */
+    void completeSparsityPattern(std::vector<std::set<size_type> > add_sparsity)
+    {
+      typedef typename std::vector<std::set<size_type> >::const_iterator Iter;
+      for (unsigned int i = 0; i != sparsity.size(); ++i) {
+        if (add_sparsity[i].size() != 0) {
+          typedef std::set<size_type> Set;
+          Set tmp_set;
+          std::insert_iterator<Set> tmp_insert (tmp_set, tmp_set.begin());
+          std::set_union(add_sparsity[i].begin(), add_sparsity[i].end(),
+                         sparsity[i].begin(), sparsity[i].end(), tmp_insert);
+          sparsity[i].swap(tmp_set);
+        }
+      }
+    }
+
     const M& matrix;
     typedef Dune::GlobalLookupIndexSet<I> LookupIndexSet;
     const Dune::GlobalLookupIndexSet<I>& idxset;
     const I& aggidxset;
     std::vector<std::set<size_type> > sparsity;
-    const RI* rowsize;
+    const std::vector<size_type>* rowsize;
   };
 
-  template<class M, class I, class RI>
-  struct CommPolicy<CommMatrixSparsityPattern<M,I,RI> >
+  template<class M, class I>
+  struct CommPolicy<CommMatrixSparsityPattern<M,I> >
   {
-    typedef CommMatrixSparsityPattern<M,I,RI> Type;
+    typedef CommMatrixSparsityPattern<M,I> Type;
 
     /**
      *  @brief The indexed type we send.
@@ -310,8 +380,8 @@ namespace Dune
         return t.matrix[i].size();
       else
       {
-        assert(t.rowsize->getRowSize(i)>0);
-        return t.rowsize->getRowSize(i);
+        assert((*t.rowsize)[i]>0);
+        return (*t.rowsize)[i];
       }
     }
   };
@@ -322,7 +392,7 @@ namespace Dune
    * @tparam M The type of the matrix.
    * @tparam I The type of the ParallelIndexSet.
    */
-  template<class M, class I, class RI>
+  template<class M, class I>
   struct CommMatrixRow
   {
     /**
@@ -341,7 +411,7 @@ namespace Dune
      * @brief Constructor.
      */
     CommMatrixRow(M& m_, const Dune::GlobalLookupIndexSet<I>& idxset_, const I& aggidxset_,
-                  RI& rowsize_)
+                  std::vector<size_t>& rowsize_)
       : matrix(m_), idxset(idxset_), aggidxset(aggidxset_), rowsize(&rowsize_)
     {}
     /**
@@ -378,13 +448,13 @@ namespace Dune
     /** @brief Index set for the redistributed matrix. */
     const I& aggidxset;
     /** @brief row size information for the receiving side. */
-    RI* rowsize; // row sizes differ from sender side in ovelap!
+    std::vector<size_t>* rowsize; // row sizes differ from sender side in overlap!
   };
 
-  template<class M, class I, class RI>
-  struct CommPolicy<CommMatrixRow<M,I,RI> >
+  template<class M, class I>
+  struct CommPolicy<CommMatrixRow<M,I> >
   {
-    typedef CommMatrixRow<M,I,RI> Type;
+    typedef CommMatrixRow<M,I> Type;
 
     /**
      *  @brief The indexed type we send.
@@ -401,8 +471,8 @@ namespace Dune
         return t.matrix[i].size();
       else
       {
-        assert(t.rowsize->getRowSize(i)>0);
-        return t.rowsize->getRowSize(i);
+        assert((*t.rowsize)[i]>0);
+        return (*t.rowsize)[i];
       }
     }
   };
@@ -423,38 +493,76 @@ namespace Dune
     }
 
   };
+
   template<class M, class I, class RI>
+  struct MatrixCopyRowSizeGatherScatter
+  {
+    typedef CommMatrixRowSize<M,RI> Container;
+
+    static const typename M::size_type gather(const Container& cont, std::size_t i)
+    {
+      return cont.matrix[i].size();
+    }
+    static void scatter(Container& cont, const typename M::size_type& rowsize, std::size_t i)
+    {
+      assert(rowsize);
+      if (rowsize > cont.rowsize.getCopyRowSize(i))
+        cont.rowsize.getCopyRowSize(i)=rowsize;
+    }
+
+  };
+
+  template<class M, class I>
   struct MatrixSparsityPatternGatherScatter
   {
     typedef typename I::GlobalIndex GlobalIndex;
-    typedef CommMatrixSparsityPattern<M,I,RI> Container;
+    typedef CommMatrixSparsityPattern<M,I> Container;
     typedef typename M::ConstColIterator ColIter;
 
     static ColIter col;
+    static GlobalIndex numlimits;
 
     static const GlobalIndex& gather(const Container& cont, std::size_t i, std::size_t j)
     {
       if(j==0)
         col=cont.matrix[i].begin();
-      else
+      else if (col!=cont.matrix[i].end())
         ++col;
-      assert(col!=cont.matrix[i].end());
-      const typename I::IndexPair* index=cont.idxset.pair(col.index());
-      assert(index);
-      return index->global();
+
+      //copy communication: different row sizes for copy rows with the same global index
+      //are possible. If all values of current  matrix row are sent, send
+      //std::numeric_limits<GlobalIndex>::max()
+      //and receiver will ignore it.
+      if (col==cont.matrix[i].end()) {
+        numlimits = std::numeric_limits<GlobalIndex>::max();
+        return numlimits;
+      }
+      else {
+        const typename I::IndexPair* index=cont.idxset.pair(col.index());
+        assert(index);
+        // Only send index if col is no ghost
+        if ( index->local().attribute() != 2)
+          return index->global();
+        else {
+          numlimits = std::numeric_limits<GlobalIndex>::max();
+          return numlimits;
+        }
+      }
     }
     static void scatter(Container& cont, const GlobalIndex& gi, std::size_t i, std::size_t j)
     {
       try{
-        const typename I::IndexPair& ip=cont.aggidxset.at(gi);
-        assert(ip.global()==gi);
-        std::size_t col = ip.local();
-        cont.sparsity[i].insert(col);
-
-        typedef typename Dune::OwnerOverlapCopyCommunication<int>::OwnerSet OwnerSet;
-        if(!OwnerSet::contains(ip.local().attribute()))
-          // preserve symmetry for overlap
-          cont.sparsity[col].insert(i);
+        if (gi != std::numeric_limits<GlobalIndex>::max()) {
+          const typename I::IndexPair& ip=cont.aggidxset.at(gi);
+          assert(ip.global()==gi);
+          std::size_t col = ip.local();
+          cont.sparsity[i].insert(col);
+
+          typedef typename Dune::OwnerOverlapCopyCommunication<int>::OwnerSet OwnerSet;
+          if(!OwnerSet::contains(ip.local().attribute()))
+            // preserve symmetry for overlap
+            cont.sparsity[col].insert(i);
+        }
       }
       catch(Dune::RangeError er) {
         // Entry not present in the new index set. Ignore!
@@ -478,37 +586,61 @@ namespace Dune
     }
 
   };
-  template<class M, class I, class RI>
-  typename MatrixSparsityPatternGatherScatter<M,I,RI>::ColIter MatrixSparsityPatternGatherScatter<M,I,RI>::col;
+  template<class M, class I>
+  typename MatrixSparsityPatternGatherScatter<M,I>::ColIter MatrixSparsityPatternGatherScatter<M,I>::col;
 
-  template<class M, class I, class RI>
+  template<class M, class I>
+  typename MatrixSparsityPatternGatherScatter<M,I>::GlobalIndex MatrixSparsityPatternGatherScatter<M,I>::numlimits;
+
+
+  template<class M, class I>
   struct MatrixRowGatherScatter
   {
     typedef typename I::GlobalIndex GlobalIndex;
-    typedef CommMatrixRow<M,I,RI> Container;
+    typedef CommMatrixRow<M,I> Container;
     typedef typename M::ConstColIterator ColIter;
     typedef typename std::pair<GlobalIndex,typename M::block_type> Data;
     static ColIter col;
     static Data datastore;
+    static GlobalIndex numlimits;
 
     static const Data& gather(const Container& cont, std::size_t i, std::size_t j)
     {
       if(j==0)
         col=cont.matrix[i].begin();
-      else
+      else if (col!=cont.matrix[i].end())
         ++col;
-      // convert local column index to global index
-      const typename I::IndexPair* index=cont.idxset.pair(col.index());
-      assert(index);
-      // Store the data to prevent reference to temporary
-      datastore = std::make_pair(index->global(),*col);
-      return datastore;
+      // copy communication: different row sizes for copy rows with the same global index
+      // are possible. If all values of current  matrix row are sent, send
+      // std::numeric_limits<GlobalIndex>::max()
+      // and receiver will ignore it.
+      if (col==cont.matrix[i].end()) {
+        numlimits = std::numeric_limits<GlobalIndex>::max();
+        datastore = std::make_pair(numlimits,*col);
+        return datastore;
+      }
+      else {
+        // convert local column index to global index
+        const typename I::IndexPair* index=cont.idxset.pair(col.index());
+        assert(index);
+        // Store the data to prevent reference to temporary
+        // Only send index if col is no ghost
+        if ( index->local().attribute() != 2)
+          datastore = std::make_pair(index->global(),*col);
+        else {
+          numlimits = std::numeric_limits<GlobalIndex>::max();
+          datastore = std::make_pair(numlimits,*col);
+        }
+        return datastore;
+      }
     }
     static void scatter(Container& cont, const Data& data, std::size_t i, std::size_t j)
     {
       try{
-        typename M::size_type column=cont.aggidxset.at(data.first).local();
-        cont.matrix[i][column]=data.second;
+        if (data.first != std::numeric_limits<GlobalIndex>::max()) {
+          typename M::size_type column=cont.aggidxset.at(data.first).local();
+          cont.matrix[i][column]=data.second;
+        }
       }
       catch(Dune::RangeError er) {
         // This an overlap row and might therefore lack some entries!
@@ -517,33 +649,83 @@ namespace Dune
     }
   };
 
-  template<class M, class I, class RI>
-  typename MatrixRowGatherScatter<M,I,RI>::ColIter MatrixRowGatherScatter<M,I,RI>::col;
-
-  template<class M, class I, class RI>
-  typename MatrixRowGatherScatter<M,I,RI>::Data MatrixRowGatherScatter<M,I,RI>::datastore;
+  template<class M, class I>
+  typename MatrixRowGatherScatter<M,I>::ColIter MatrixRowGatherScatter<M,I>::col;
 
+  template<class M, class I>
+  typename MatrixRowGatherScatter<M,I>::Data MatrixRowGatherScatter<M,I>::datastore;
 
+  template<class M, class I>
+  typename MatrixRowGatherScatter<M,I>::GlobalIndex MatrixRowGatherScatter<M,I>::numlimits;
 
   template<typename M, typename C>
   void redistributeSparsityPattern(M& origMatrix, M& newMatrix, C& origComm, C& newComm,
                                    RedistributeInformation<C>& ri)
   {
+    typename C::CopySet copyflags;
+    typename C::OwnerSet ownerflags;
     typedef typename C::ParallelIndexSet IndexSet;
     typedef RedistributeInformation<C> RI;
+    std::vector<typename M::size_type> rowsize(newComm.indexSet().size(), 0);
+    std::vector<typename M::size_type> copyrowsize(newComm.indexSet().size(), 0);
+    std::vector<typename M::size_type> backwardscopyrowsize(origComm.indexSet().size(), 0);
+
+    // get owner rowsizes
     CommMatrixRowSize<M,RI> commRowSize(origMatrix, ri);
     ri.template redistribute<MatrixRowSizeGatherScatter<M,IndexSet,RI> >(commRowSize,commRowSize);
 
     origComm.buildGlobalLookup();
 
-    CommMatrixSparsityPattern<M,IndexSet,RedistributeInformation<C> >
+    for (std::size_t i=0; i < newComm.indexSet().size(); i++) {
+      rowsize[i] = ri.getRowSize(i);
+    }
+    // get sparsity pattern from owner rows
+    CommMatrixSparsityPattern<M,IndexSet>
     origsp(origMatrix, origComm.globalLookup(), newComm.indexSet());
-    CommMatrixSparsityPattern<M,IndexSet,RedistributeInformation<C> >
-    newsp(origMatrix, origComm.globalLookup(), newComm.indexSet(), ri);
+    CommMatrixSparsityPattern<M,IndexSet>
+    newsp(origMatrix, origComm.globalLookup(), newComm.indexSet(), rowsize);
+
+    ri.template redistribute<MatrixSparsityPatternGatherScatter<M,IndexSet> >(origsp,newsp);
+
+    // build copy to owner interface to get missing matrix values for novlp case
+    if (origComm.getSolverCategory() == static_cast<int>(SolverCategory::nonoverlapping)) {
+      RemoteIndices<IndexSet> *ris = new RemoteIndices<IndexSet>(origComm.indexSet(),
+                                                                 newComm.indexSet(),
+                                                                 origComm.communicator());
+      ris->template rebuild<true>();
 
-    ri.template redistribute<MatrixSparsityPatternGatherScatter<M,IndexSet,RI> >(origsp,newsp);
+      ri.getInterface().free();
+      ri.getInterface().build(*ris,copyflags,ownerflags);
 
-    newsp.storeSparsityPattern(newMatrix);
+      // get copy rowsizes
+      CommMatrixRowSize<M,RI> commRowSize_copy(origMatrix, ri);
+      ri.template redistribute<MatrixCopyRowSizeGatherScatter<M,IndexSet,RI> >(commRowSize_copy,
+                                                                               commRowSize_copy);
+
+      for (std::size_t i=0; i < newComm.indexSet().size(); i++) {
+        copyrowsize[i] = ri.getCopyRowSize(i);
+      }
+      //get copy rowsizes for sender
+      ri.redistributeBackward(backwardscopyrowsize,copyrowsize);
+      for (std::size_t i=0; i < origComm.indexSet().size(); i++) {
+        ri.getBackwardsCopyRowSize(i) = backwardscopyrowsize[i];
+      }
+
+      // get sparsity pattern from copy rows
+      CommMatrixSparsityPattern<M,IndexSet> origsp_copy(origMatrix,
+                                                        origComm.globalLookup(),
+                                                        newComm.indexSet(),
+                                                        backwardscopyrowsize);
+      CommMatrixSparsityPattern<M,IndexSet> newsp_copy(origMatrix, origComm.globalLookup(),
+                                                       newComm.indexSet(), copyrowsize);
+      ri.template redistribute<MatrixSparsityPatternGatherScatter<M,IndexSet> >(origsp_copy,
+                                                                                newsp_copy);
+
+      newsp.completeSparsityPattern(newsp_copy.sparsity);
+      newsp.storeSparsityPattern(newMatrix);
+    }
+    else
+      newsp.storeSparsityPattern(newMatrix);
 
 #ifdef DUNE_ISTL_WITH_CHECKING
     // Check for symmetry
@@ -576,13 +758,46 @@ namespace Dune
   {
     typedef typename C::ParallelIndexSet IndexSet;
     typedef RedistributeInformation<C> RI;
-    CommMatrixRow<M,IndexSet,RedistributeInformation<C> >
-    origrow(origMatrix, origComm.globalLookup(), newComm.indexSet());
-    CommMatrixRow<M,IndexSet,RedistributeInformation<C> >
-    newrow(newMatrix, origComm.globalLookup(), newComm.indexSet(),ri);
+    typename C::OwnerSet ownerflags;
+    std::vector<typename M::size_type> rowsize(newComm.indexSet().size(), 0);
+    std::vector<typename M::size_type> copyrowsize(newComm.indexSet().size(), 0);
+    std::vector<typename M::size_type> backwardscopyrowsize(origComm.indexSet().size(), 0);
+
+    for (std::size_t i=0; i < newComm.indexSet().size(); i++) {
+      rowsize[i] = ri.getRowSize(i);
+      if (origComm.getSolverCategory() == static_cast<int>(SolverCategory::nonoverlapping)) {
+        copyrowsize[i] = ri.getCopyRowSize(i);
+      }
+    }
 
-    ri.template redistribute<MatrixRowGatherScatter<M,IndexSet,RI> >(origrow,newrow);
-    newrow.setOverlapRowsToDirichlet();
+    for (std::size_t i=0; i < origComm.indexSet().size(); i++)
+      if (origComm.getSolverCategory() == static_cast<int>(SolverCategory::nonoverlapping))
+        backwardscopyrowsize[i] = ri.getBackwardsCopyRowSize(i);
+
+
+    if (origComm.getSolverCategory() == static_cast<int>(SolverCategory::nonoverlapping)) {
+      // fill sparsity pattern from copy rows
+      CommMatrixRow<M,IndexSet> origrow_copy(origMatrix, origComm.globalLookup(),
+                                             newComm.indexSet(), backwardscopyrowsize);
+      CommMatrixRow<M,IndexSet> newrow_copy(newMatrix, origComm.globalLookup(),
+                                            newComm.indexSet(),copyrowsize);
+      ri.template redistribute<MatrixRowGatherScatter<M,IndexSet> >(origrow_copy,
+                                                                    newrow_copy);
+      ri.getInterface().free();
+      RemoteIndices<IndexSet> *ris = new RemoteIndices<IndexSet>(origComm.indexSet(),
+                                                                 newComm.indexSet(),
+                                                                 origComm.communicator());
+      ris->template rebuild<true>();
+      ri.getInterface().build(*ris,ownerflags,ownerflags);
+    }
+
+    CommMatrixRow<M,IndexSet>
+    origrow(origMatrix, origComm.globalLookup(), newComm.indexSet());
+    CommMatrixRow<M,IndexSet>
+    newrow(newMatrix, origComm.globalLookup(), newComm.indexSet(),rowsize);
+    ri.template redistribute<MatrixRowGatherScatter<M,IndexSet> >(origrow,newrow);
+    if (origComm.getSolverCategory() != static_cast<int>(SolverCategory::nonoverlapping))
+      newrow.setOverlapRowsToDirichlet();
     if(newMatrix.N()>0&&newMatrix.N()<20)
       printmatrix(std::cout, newMatrix, "redist", "row");
   }
@@ -608,6 +823,8 @@ namespace Dune
                           RedistributeInformation<C>& ri)
   {
     ri.setNoRows(newComm.indexSet().size());
+    ri.setNoCopyRows(newComm.indexSet().size());
+    ri.setNoBackwardsCopyRows(origComm.indexSet().size());
     redistributeSparsityPattern(origMatrix, newMatrix, origComm, newComm, ri);
     redistributeMatrixEntries(origMatrix, newMatrix, origComm, newComm, ri);
   }
diff --git a/dune/istl/novlpschwarz.hh b/dune/istl/novlpschwarz.hh
index 10c33464bdefb22f7d1f30da027d9d05a3541065..30adf7e10dd4d3bd2c523d65a96adbcd998c9389 100644
--- a/dune/istl/novlpschwarz.hh
+++ b/dune/istl/novlpschwarz.hh
@@ -101,14 +101,19 @@ namespace Dune {
     {
       y = 0;
       novlp_op_apply(x,y,1);
-      communication.addOwnerCopyToAll(y,y);
+      communication.addOwnerCopyToOwnerCopy(y,y);
     }
 
     //! apply operator to x, scale and add:  \f$ y = y + \alpha A(x) \f$
     virtual void applyscaleadd (field_type alpha, const X& x, Y& y) const
     {
+      // only apply communication to alpha*A*x to make it consistent,
+      // y already has to be consistent.
+      Y y1(y);
+      y = 0;
       novlp_op_apply(x,y,alpha);
-      communication.addOwnerCopyToAll(y,y);
+      communication.addOwnerCopyToOwnerCopy(y,y);
+      y += y1;
     }
 
     //! get matrix via *
@@ -123,22 +128,23 @@ namespace Dune {
       const PIS& pis=communication.indexSet();
       const RI& ri = communication.remoteIndices();
 
-      // set up mask vector
-      if (mask.size()!=static_cast<typename std::vector<double>::size_type>(x.size())) {
-        mask.resize(x.size());
-        for (typename std::vector<double>::size_type i=0; i<mask.size(); i++)
-          mask[i] = 1;
-        for (typename PIS::const_iterator i=pis.begin(); i!=pis.end(); ++i)
-          if (i->local().attribute()!=OwnerOverlapCopyAttributeSet::owner)
-            mask[i->local().local()] = 0;
-          else if (i->local().attribute()==OwnerOverlapCopyAttributeSet::overlap)
-            mask[i->local().local()] = 2;
-      }
-
       // at the beginning make a multimap "bordercontribution".
       // process has i and j as border dofs but is not the owner
       // => only contribute to Ax if i,j is in bordercontribution
       if (buildcomm == true) {
+
+        // set up mask vector
+        if (mask.size()!=static_cast<typename std::vector<double>::size_type>(x.size())) {
+          mask.resize(x.size());
+          for (typename std::vector<double>::size_type i=0; i<mask.size(); i++)
+            mask[i] = 1;
+          for (typename PIS::const_iterator i=pis.begin(); i!=pis.end(); ++i)
+            if (i->local().attribute()==OwnerOverlapCopyAttributeSet::copy)
+              mask[i->local().local()] = 0;
+            else if (i->local().attribute()==OwnerOverlapCopyAttributeSet::overlap)
+              mask[i->local().local()] = 2;
+        }
+
         for (MM::iterator iter = bordercontribution.begin();
              iter != bordercontribution.end(); ++iter)
           bordercontribution.erase(iter);
@@ -170,15 +176,19 @@ namespace Dune {
                           && rindex->localIndexPair().local().local()==j.index()) {
                         if (rindex->attribute() == OwnerOverlapCopyAttributeSet::owner
                             || remote->first == iowner
-                            || remote->first << communication.communicator().rank())
+                            || remote->first < communication.communicator().rank()) {
                           flag = false;
+                          continue;
+                        }
                       }
+                    if (flag == false)
+                      continue;
                   }
-                //don´t contribute to Ax if
-                //1. the owner of j has i as interior/border dof
-                //2. iowner has j as interior/border dof
-                //3. there is another process with smaller rank that has i and j
-                //   as interor/border dofs
+                // don´t contribute to Ax if
+                // 1. the owner of j has i as interior/border dof
+                // 2. iowner has j as interior/border dof
+                // 3. there is another process with smaller rank that has i and j
+                // as interor/border dofs
                 // if the owner of j does not have i as interior/border dof,
                 // it will not be taken into account
                 if (flag==true)
@@ -195,9 +205,9 @@ namespace Dune {
         if (mask[i.index()] == 0) {
           //dof doesn't belong to process but is border (not ghost)
           for (ColIterator j = _A_[i.index()].begin(); j != _A_[i.index()].end(); ++j) {
-            if (mask[j.index()]==1) //j is owner => then sum entries
+            if (mask[j.index()] == 1) //j is owner => then sum entries
               (*j).usmv(alpha,x[j.index()],y[i.index()]);
-            else if (mask[j.index()]==0) {
+            else if (mask[j.index()] == 0) {
               std::pair<MM::iterator, MM::iterator> itp =
                 bordercontribution.equal_range(i.index());
               for (MM::iterator it = itp.first; it != itp.second; ++it)
@@ -208,7 +218,8 @@ namespace Dune {
         }
         else if (mask[i.index()] == 1) {
           for (ColIterator j = _A_[i.index()].begin(); j != _A_[i.index()].end(); ++j)
-            (*j).usmv(alpha,x[j.index()],y[i.index()]);
+            if (mask[j.index()] != 2)
+              (*j).usmv(alpha,x[j.index()],y[i.index()]);
         }
       }
     }
@@ -363,8 +374,11 @@ namespace Dune {
      */
     virtual void apply (domain_type& v, const range_type& d)
     {
+      // block preconditioner equivalent to WrappedPreconditioner from
+      // pdelab/backend/ovlpistsolverbackend.hh,
+      // but not to BlockPreconditioner from schwarz.hh
       preconditioner.apply(v,d);
-      communication.addOwnerCopyToAll(v,v);
+      communication.addOwnerCopyToOwnerCopy(v,v);
     }
 
     /*!
diff --git a/dune/istl/owneroverlapcopy.hh b/dune/istl/owneroverlapcopy.hh
index ed592986ed3356839d333363599ad2d4f185fb9c..7fe5aaf2b6751699539f3d26132072668e0829b5 100644
--- a/dune/istl/owneroverlapcopy.hh
+++ b/dune/istl/owneroverlapcopy.hh
@@ -186,6 +186,7 @@ namespace Dune {
     typedef Dune::BufferedCommunicator BC;
     typedef Dune::Interface IF;
     typedef EnumItem<AttributeSet,OwnerOverlapCopyAttributeSet::owner> OwnerSet;
+    typedef EnumItem<AttributeSet,OwnerOverlapCopyAttributeSet::copy> CopySet;
     typedef Combine<EnumItem<AttributeSet,OwnerOverlapCopyAttributeSet::owner>,EnumItem<AttributeSet,OwnerOverlapCopyAttributeSet::overlap>,AttributeSet> OwnerOverlapSet;
     typedef Dune::AllSet<AttributeSet> AllSet;
   protected:
@@ -257,11 +258,35 @@ namespace Dune {
       OwnerCopyToAllInterfaceBuilt = true;
     }
 
+    void buildOwnerCopyToOwnerCopyInterface () const
+    {
+      if (OwnerCopyToOwnerCopyInterfaceBuilt)
+        OwnerCopyToOwnerCopyInterface.free();
+      typedef Combine<EnumItem<AttributeSet,OwnerOverlapCopyAttributeSet::owner>,EnumItem<AttributeSet,OwnerOverlapCopyAttributeSet::copy>,AttributeSet> OwnerCopySet;
+      OwnerCopySet sourceFlags;
+      OwnerCopySet destFlags;
+      OwnerCopyToOwnerCopyInterface.build(ri,sourceFlags,destFlags);
+      OwnerCopyToOwnerCopyInterfaceBuilt = true;
+    }
 
   public:
-    //      enum{
-    //  category = SolverCategory::overlapping
-    //    };
+
+    /**
+     * @brief Set right Solver Category (default is overlapping).
+     */
+
+    void setSolverCategory (int set) {
+      category = set;
+    }
+
+    /**
+     * @brief Set Solver Category.
+     * @return The Solver Category.
+     */
+
+    const int getSolverCategory () const {
+      return category;
+    }
 
     const CollectiveCommunication<MPI_Comm>& communicator() const
     {
@@ -320,6 +345,24 @@ namespace Dune {
       communicator.free();
     }
 
+    /**
+     * @brief Communicate values from owner and copy data points to owner and copy data points and add them to those values.
+     *
+     * @brief source The data to send from.
+     * @brief dest The data to add the communicated values to.
+     */
+    template<class T>
+    void addOwnerCopyToOwnerCopy (const T& source, T& dest) const
+    {
+      if (!OwnerCopyToOwnerCopyInterfaceBuilt)
+        buildOwnerCopyToOwnerCopyInterface ();
+      BC communicator;
+      communicator.template build<T>(OwnerCopyToOwnerCopyInterface);
+      communicator.template forward<AddGatherScatter<T> >(source,dest);
+      communicator.free();
+    }
+
+
     /**
      * @brief Compute a global dot product of two vectors.
      *
@@ -421,6 +464,7 @@ namespace Dune {
     {
       return ri;
     }
+
     void buildGlobalLookup()
     {
       if(globalLookup_) {
@@ -471,7 +515,6 @@ namespace Dune {
           x[i->local().local()] = 0;
     }
 
-
     /**
      * @brief Construct the communication without any indices.
      *
@@ -479,9 +522,11 @@ namespace Dune {
      * later on.
      * @param comm_ The MPI Communicator to use, e. g. MPI_COMM_WORLD
      */
-    OwnerOverlapCopyCommunication (MPI_Comm comm_)
+    OwnerOverlapCopyCommunication (MPI_Comm comm_, int cat = Dune::SolverCategory::overlapping)
       : cc(comm_), pis(), ri(pis,pis,comm_),
-        OwnerToAllInterfaceBuilt(false), OwnerOverlapToAllInterfaceBuilt(false), OwnerCopyToAllInterfaceBuilt(false), globalLookup_(0)
+        OwnerToAllInterfaceBuilt(false), OwnerOverlapToAllInterfaceBuilt(false),
+        OwnerCopyToAllInterfaceBuilt(false), OwnerCopyToOwnerCopyInterfaceBuilt(false),
+        globalLookup_(0), category(cat)
     {}
 
     /**
@@ -490,9 +535,11 @@ namespace Dune {
      * The local index set and the remote indices have to be set up
      * later on.
      */
-    OwnerOverlapCopyCommunication ()
+    OwnerOverlapCopyCommunication (int cat = Dune::SolverCategory::overlapping)
       : cc(MPI_COMM_WORLD), pis(), ri(pis,pis,MPI_COMM_WORLD),
-        OwnerToAllInterfaceBuilt(false), OwnerOverlapToAllInterfaceBuilt(false), OwnerCopyToAllInterfaceBuilt(false),globalLookup_(0)
+        OwnerToAllInterfaceBuilt(false), OwnerOverlapToAllInterfaceBuilt(false),
+        OwnerCopyToAllInterfaceBuilt(false), OwnerCopyToOwnerCopyInterfaceBuilt(false),
+        globalLookup_(0), category(cat)
     {}
 
     /**
@@ -500,8 +547,10 @@ namespace Dune {
      * @param indexinfo The set of IndexTripels describing the local and remote indices.
      * @param comm_ The communicator to use in the communication.
      */
-    OwnerOverlapCopyCommunication (const IndexInfoFromGrid<GlobalIdType,LocalIdType>& indexinfo, MPI_Comm comm_)
-      : cc(comm_),OwnerToAllInterfaceBuilt(false),OwnerOverlapToAllInterfaceBuilt(false), OwnerCopyToAllInterfaceBuilt(false), globalLookup_(0)
+    OwnerOverlapCopyCommunication (const IndexInfoFromGrid<GlobalIdType, LocalIdType>& indexinfo, MPI_Comm comm_, int cat = Dune::SolverCategory::overlapping)
+      : cc(comm_), OwnerToAllInterfaceBuilt(false), OwnerOverlapToAllInterfaceBuilt(false),
+        OwnerCopyToAllInterfaceBuilt(false), OwnerCopyToOwnerCopyInterfaceBuilt(false),
+        globalLookup_(0), category(cat)
     {
       // set up an ISTL index set
       pis.beginResize();
@@ -564,6 +613,7 @@ namespace Dune {
       if (OwnerToAllInterfaceBuilt) OwnerToAllInterface.free();
       if (OwnerOverlapToAllInterfaceBuilt) OwnerOverlapToAllInterface.free();
       if (OwnerCopyToAllInterfaceBuilt) OwnerCopyToAllInterface.free();
+      if (OwnerCopyToOwnerCopyInterfaceBuilt) OwnerCopyToOwnerCopyInterface.free();
       if (globalLookup_) delete globalLookup_;
     }
 
@@ -579,9 +629,12 @@ namespace Dune {
     mutable bool OwnerOverlapToAllInterfaceBuilt;
     mutable IF OwnerCopyToAllInterface;
     mutable bool OwnerCopyToAllInterfaceBuilt;
+    mutable IF OwnerCopyToOwnerCopyInterface;
+    mutable bool OwnerCopyToOwnerCopyInterfaceBuilt;
     mutable std::vector<double> mask;
     int oldseqNo;
     GlobalLookupIndexSet* globalLookup_;
+    int category;
   };
 
 #endif
diff --git a/dune/istl/paamg/amg.hh b/dune/istl/paamg/amg.hh
index 4ec9851f0ecdea471db033df88926675ddf091b0..1adac01e31b341a2c3e6f0465cfdcab070583f24 100644
--- a/dune/istl/paamg/amg.hh
+++ b/dune/istl/paamg/amg.hh
@@ -360,15 +360,15 @@ namespace Dune
                && matrices_->parallelInformation().coarsest().getRedistributed().communicator().size()==1
                && matrices_->parallelInformation().coarsest().getRedistributed().communicator().size()>0)) { // redistribute and 1 proc
           std::cout<<"Using superlu"<<std::endl;
-          if(matrices_->parallelInformation().coarsest().isRedistributed()) {
-            if(matrices_->matrices().coarsest().getRedistributed().getmat().N()>0) {
+          if(matrices_->parallelInformation().coarsest().isRedistributed())
+          {
+            if(matrices_->matrices().coarsest().getRedistributed().getmat().N()>0)
               // We are still participating on this level
               solver_  = new SuperLU<typename M::matrix_type>(matrices_->matrices().coarsest().getRedistributed().getmat());
-            }else
+            else
               solver_ = 0;
-          }else{
+          }else
             solver_  = new SuperLU<typename M::matrix_type>(matrices_->matrices().coarsest()->getmat());
-          }
         }else
 #endif
         {
diff --git a/dune/istl/paamg/construction.hh b/dune/istl/paamg/construction.hh
index 51e05e34648f55907fcff0e096c14f52e2afed7f..0ce21c6160ba937651a983f6329e8c6089e27a18 100644
--- a/dune/istl/paamg/construction.hh
+++ b/dune/istl/paamg/construction.hh
@@ -102,6 +102,27 @@ namespace Dune
       C* comm_;
     };
 
+#if HAVE_MPI
+    struct OwnerOverlapCopyCommunicationArgs
+    {
+      OwnerOverlapCopyCommunicationArgs(MPI_Comm comm, int cat)
+        : comm_(comm), cat_(cat)
+      {}
+
+      MPI_Comm comm_;
+      int cat_;
+    };
+#endif
+
+    struct SequentialCommunicationArgs
+    {
+      SequentialCommunicationArgs(CollectiveCommunication<void*> comm, int cat)
+        : comm_(comm)
+      {}
+
+      CollectiveCommunication<void*> comm_;
+    };
+
   } // end Amg namspace
 
   // foward declaration
@@ -178,11 +199,10 @@ namespace Dune
     class ConstructionTraits<SequentialInformation>
     {
     public:
-      typedef const CollectiveCommunication<void*> Arguments;
-
+      typedef const SequentialCommunicationArgs Arguments;
       static inline SequentialInformation* construct(Arguments& args)
       {
-        return new SequentialInformation(args);
+        return new SequentialInformation(args.comm_);
       }
 
       static inline void deconstruct(SequentialInformation* si)
@@ -198,11 +218,11 @@ namespace Dune
     class ConstructionTraits<OwnerOverlapCopyCommunication<T1,T2> >
     {
     public:
-      typedef const MPI_Comm Arguments;
+      typedef const OwnerOverlapCopyCommunicationArgs Arguments;
 
       static inline OwnerOverlapCopyCommunication<T1,T2>* construct(Arguments& args)
       {
-        return new OwnerOverlapCopyCommunication<T1,T2>(args);
+        return new OwnerOverlapCopyCommunication<T1,T2>(args.comm_, args.cat_);
       }
 
       static inline void deconstruct(OwnerOverlapCopyCommunication<T1,T2>* com)
diff --git a/dune/istl/paamg/galerkin.hh b/dune/istl/paamg/galerkin.hh
index bc26260a6869c3a6c8633fed6fe8e8edfc24316d..c9fb8956f3ea091bcad550491670203ffa409c46 100644
--- a/dune/istl/paamg/galerkin.hh
+++ b/dune/istl/paamg/galerkin.hh
@@ -638,7 +638,6 @@ namespace Dune
       for(RowIterator row = fine.begin(); row != endRow; ++row)
         if(aggregates[row.index()] != AggregatesMap<V>::ISOLATED) {
           assert(aggregates[row.index()]!=AggregatesMap<V>::UNAGGREGATED);
-          //typedef typename RowIterator::Iterator ColIterator;
           typedef typename M::ConstColIterator ColIterator;
           ColIterator endCol = row->end();
 
@@ -649,8 +648,20 @@ namespace Dune
             }
         }
 
+      // get the right diagonal matrix values on copy lines from owner processes
+      typedef typename M::block_type BlockType;
+      std::vector<BlockType> rowsize(coarse.N(),BlockType(0));
+      for (RowIterator row = coarse.begin(); row != coarse.end(); ++row)
+        rowsize[row.index()]=coarse[row.index()][row.index()];
+      pinfo.copyOwnerToAll(rowsize,rowsize);
+      for (RowIterator row = coarse.begin(); row != coarse.end(); ++row)
+        coarse[row.index()][row.index()] = rowsize[row.index()];
+
+      // don't set dirichlet boundaries for copy lines to make novlp case work,
+      // the preconditioner yields slightly different results now.
+
       // Set the dirichlet border
-      DirichletBoundarySetter<P>::template set<M>(coarse, pinfo, copy);
+      //DirichletBoundarySetter<P>::template set<M>(coarse, pinfo, copy);
 
     }
 
diff --git a/dune/istl/paamg/hierarchy.hh b/dune/istl/paamg/hierarchy.hh
index ad0e4c46d6a08b757ca70ac17fe5e859feda6673..bc4446785fe3a5327383ec344dd20687025a7ee4 100644
--- a/dune/istl/paamg/hierarchy.hh
+++ b/dune/istl/paamg/hierarchy.hh
@@ -450,6 +450,7 @@ namespace Dune
 
     private:
       typedef typename ConstructionTraits<MatrixOperator>::Arguments MatrixArgs;
+      typedef typename ConstructionTraits<ParallelInformation>::Arguments CommunicationArgs;
       /** @brief The list of aggregates maps. */
       AggregatesMapList aggregatesMaps_;
       /** @brief The list of redistributes. */
@@ -768,13 +769,16 @@ namespace Dune
       : matrices_(const_cast<MatrixOperator&>(fineOperator)),
         parallelInformation_(const_cast<ParallelInformation&>(pinfo))
     {
-      // TODO: reestablish compile time checks.
-      /*      dune_static_assert((static_cast<int>(MatrixOperator::category) == static_cast<int>(SolverCategory::sequential) ||
-         static_cast<int>(MatrixOperator::category) == static_cast<int>(SolverCategory::overlapping)),
-                         "MatrixOperator must be of category sequential or overlapping");
-         dune_static_assert((static_cast<int>(MatrixOperator::category) == static_cast<int>(ParallelInformation::category)),
-                         "MatrixOperator and ParallelInformation must belong to the same category!");
-       */
+      dune_static_assert((static_cast<int>(MatrixOperator::category) ==
+                          static_cast<int>(SolverCategory::sequential) ||
+                          static_cast<int>(MatrixOperator::category) ==
+                          static_cast<int>(SolverCategory::overlapping) ||
+                          static_cast<int>(MatrixOperator::category) ==
+                          static_cast<int>(SolverCategory::nonoverlapping)),
+                         "MatrixOperator must be of category sequential or overlapping or nonoverlapping");
+      if (static_cast<int>(MatrixOperator::category) != pinfo.getSolverCategory())
+        DUNE_THROW(ISTLError, "MatrixOperator and ParallelInformation must belong to the same category!");
+
     }
 
     template<class M, class IS, class A>
@@ -983,7 +987,8 @@ namespace Dune
         unknowns =  noAggregates;
         dunknowns = dgnoAggregates;
 
-        parallelInformation_.addCoarser(info->communicator());
+        CommunicationArgs commargs(info->communicator(),info->getSolverCategory());
+        parallelInformation_.addCoarser(commargs);
 
         ++infoLevel; // parallel information on coarse level
 
diff --git a/dune/istl/paamg/indicescoarsener.hh b/dune/istl/paamg/indicescoarsener.hh
index 1d0440ad924abbb514ffe39650bc677a036c3db5..c96b08973e614a6199e2ffb4f08027f84731588a 100644
--- a/dune/istl/paamg/indicescoarsener.hh
+++ b/dune/istl/paamg/indicescoarsener.hh
@@ -348,9 +348,8 @@ namespace Dune
              AggregatesMap<typename Graph::VertexDescriptor>::ISOLATED)
           {
             assert(aggregates[index->localIndexPair().local()]<attributes.size());
-            assert(attributes[aggregates[index->localIndexPair().local()]] == std::numeric_limits<char>::max()
-                   || attributes[aggregates[index->localIndexPair().local()]] == index->attribute());
-            attributes[aggregates[index->localIndexPair().local()]] = index->attribute();
+            if (attributes[aggregates[index->localIndexPair().local()]] != 3)
+              attributes[aggregates[index->localIndexPair().local()]] = index->attribute();
           }
         }
 
diff --git a/dune/istl/paamg/pinfo.hh b/dune/istl/paamg/pinfo.hh
index 567dff0d57f68ec4065701276b4c53190dbf1b5b..9d0c3a3cb473b300e25f8ddc59a37d53214b9ea0 100644
--- a/dune/istl/paamg/pinfo.hh
+++ b/dune/istl/paamg/pinfo.hh
@@ -34,6 +34,10 @@ namespace Dune
         category = SolverCategory::sequential
       };
 
+      const int getSolverCategory () const {
+        return SolverCategory::sequential;
+      }
+
       MPICommunicator communicator() const
       {
         return comm_;