From 0ae6272142b092cd30691f5e3c66aaafd622510f Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Thu, 11 Jul 2013 16:33:40 +0200
Subject: [PATCH] Implement new buildmode for bcrs matrices

---
 dune/istl/bcrsmatrix.hh  | 370 +++++++++++++++++++++++++++++++++++++--
 dune/istl/matrixutils.hh |   9 +
 2 files changed, 362 insertions(+), 17 deletions(-)

diff --git a/dune/istl/bcrsmatrix.hh b/dune/istl/bcrsmatrix.hh
index 695ef937a..de2565492 100644
--- a/dune/istl/bcrsmatrix.hh
+++ b/dune/istl/bcrsmatrix.hh
@@ -11,6 +11,7 @@
 #include <algorithm>
 #include <numeric>
 #include <vector>
+#include <map>
 
 #include "istlexception.hh"
 #include "bvector.hh"
@@ -70,6 +71,27 @@ namespace Dune {
   template<typename M>
   struct MatrixDimension;
 
+  /**
+   * \brief export statistic about compression achieved in mymode mode
+   *
+   * To enable the user to tune parameters of mymode buildmode of a
+   * Dune::BCRSMatrix manually, some statistics are exported upon
+   * compression.
+   *
+   */
+  template<typename size_type>
+  struct CompressionStatistics
+  {
+    //! average number of non-zeroes per row
+    double avg;
+    //! maximum number of non-zeroes in a row_type
+    size_type maximum;
+    //! total number of elements written to the overflow area
+    size_type overflow_total;
+    //! percentage of wasted memory resulting from non-used overflow area
+    double mem_ratio;
+  };
+
   /**
      \brief A sparse block matrix with compressed row storage
 
@@ -81,6 +103,7 @@ namespace Dune {
 
      1. Row-wise scheme
      2. Random scheme
+     3. mymode scheme
 
      Error checking: no error checking is provided normally.
      Setting the compile time switch DUNE_ISTL_WITH_CHECKING
@@ -176,6 +199,72 @@ namespace Dune {
      B[3][0] = 7;
      B[3][3] = 8;
      \endcode
+
+     3. mymode scheme
+     With the above Random Scheme, the sparsity pattern often has to be determined
+     and stored before the matrix is assembled. This leads to an increase of
+     needs in memory and computation time. Often, one has good a priori
+     knowledge about the number of entries a row contains on average. mymode
+     mode tries to make use of that knowledge by allocating memory based on
+     that average. Entries in rows with more non-zeroes are written to an overflow
+     area. After all indices are added a compression procedure is used.
+     To use this mode use the following methods:
+
+     Construct the matrix via
+      - BCRSMatrix(size_type _n, size_type _m, size_type _avg, double _overflowsize, BuildMode bm)
+      - void setSize(size_type rows, size_type columns, size_type nnz=0) after setting
+        the buildmode to mymode and the compression parameter via setmymodeparameters(size_type _avg, double _overflow)
+
+     Start filling your matrix with entry(size_type row, size_type col).
+     Full access is possible also during buildstage, although not as fast
+     as after buildstage via the operator[] due to the necessity of searching
+     the overflow area for some matrix elements.
+
+     After the entry-method has been called for each matrix at least once,
+     a call to compress() reorganizes the data into one array for further use.
+     This sets the matrixs state to built. No matrix entries may be added after
+     this step. The return type Dune::CompressionStatistics allows for some parameter optimization.
+
+     Use of copy constructor, assignment operator and matrix vector arithmetics
+     are not supported until the matrix is fully built.
+
+     In the following sample code, an array with 28 entries will be reserved
+     \code
+     #include<dune/common/fmatrix.hh>
+     #include<dune/istl/bcrsmatrix.hh>
+
+     typedef Dune::BCRSMatrix<Dune::FieldMatrix<double,1,1> > M;
+     M m(10, 10, 2, 0.4, M::mymode);
+
+     //fill in some arbitrary entries, even operations on these would be possible,
+     //you get a reference to the entry! the order of these statements is irrelevant!
+     m.entry(0,0) = 0.;
+     m.entry(8,0) = 0.;
+     m.entry(1,8) = 0.; m.entry(1,0) = 0.; m.entry(1,5) = 0.;
+     m.entry(2,0) = 0.;
+     m.entry(3,5) = 0.; m.entry(3,0) = 0.;  m.entry(3,8) = 0.;
+     m.entry(4,0) = 0.;
+     m.entry(9,0) = 0.; m.entry(9,5) = 0.; m.entry(9,8) = 0.;
+     m.entry(5,0) = 0.; m.entry(5,5) = 0.; m.entry(5,8) = 0.;
+     m.entry(6,0) = 0.;
+     m.entry(7,0) = 0.; m.entry(7,5) = 0.; m.entry(7,8) = 0.;
+
+
+
+     // internally the index array now looks like this (second row are the row pointers):
+     // xxxxxxxx0x800x500x050x050x05
+     // ........|.|.|.|.|.|.|.|.|.|.
+     // and the overflow area contains (1,5,0.0), (3,8,0.0), (5,8,0.0), (7,8,0.0), (9,8,0.0)
+     // the data array has similar structure.
+
+     //finish building by compressing the array
+     Dune::CompressionStatistics<M::size_type> stats = m.compress();
+
+     // internally the index array now looks like this:
+     // 00580058005800580058xxxxxxxx
+     // ||..||..||..||..||..........
+
+     \endcode
    */
   template<class B, class A=std::allocator<B> >
   class BCRSMatrix
@@ -242,6 +331,15 @@ namespace Dune {
        * can not be defined in sequential order.
        */
       random,
+      /**
+       * @brief Build entries randomly with an educated guess on entries per row_type
+       *
+       * Allows random order generation as in random mode, but rowsizes do not need
+       * to be given first. Instead an average number of non-zeroes per row is passed
+       * to the constructor. Matrix setup is finished with compress(), full data access
+       * during buidstage is possible.
+       */
+      mymode,
       /**
        * @brief Build mode not set!
        */
@@ -465,6 +563,23 @@ namespace Dune {
       allocate(_n, _m);
     }
 
+    //! \brief construct matrix with a known average number of entries per row
+    /**
+     * Constructs a matrix in mymode buildmode.
+     *
+     * @param _n number of rows of the matrix
+     * @param _m number of columns of the matrix
+     * @param _avg expected average number of entries per row
+     * @param _overflowsize fraction of _n*_avg which is expected to be
+     *   needed for elements that exceed _avg entries per row.
+     *
+     */
+    BCRSMatrix (size_type _n, size_type _m, size_type _avg, double _overflowsize, BuildMode bm)
+      : build_mode(bm), ready(notbuilt), avg(_avg), overflowsize(_overflowsize)
+    {
+      mymode_allocate(_n,_m);
+    }
+
     /**
      * @brief copy constructor
      *
@@ -522,15 +637,40 @@ namespace Dune {
      * @param rows The number of rows the matrix should contain.
      * @param columns the number of columns the matrix should contain.
      * @param nnz The number of nonzero entries the matrix should hold (if omitted
-     * defaults to 0).
+     * defaults to 0). Must be omitted in mymode mode.
      */
     void setSize(size_type rows, size_type columns, size_type nnz=0)
     {
       // deallocate already setup memory
       deallocate();
 
-      // allocate matrix memory
-      allocate(rows, columns, nnz);
+      if (build_mode == mymode)
+      {
+        if (nnz>0)
+          DUNE_THROW(Dune::ISTLError,"number of non-zeroes may not be set in mymode mode, use setmymodeparameters() instead");
+
+        // mymode allocates differently
+        mymode_allocate(rows,columns);
+      }
+      else
+      {
+        // allocate matrix memory
+        allocate(rows, columns, nnz);
+      }
+    }
+
+    /** @brief Set parameters needed for creation in mymode.
+     *
+     * Use this method before setSize() to define storage behaviour of a matrix
+     * in mymode mode
+     * @param _avg expected average number of entries per row
+     * @param _overflowsize fraction of _n*_avg which is expected to be
+     *   needed for elements that exceed _avg entries per row.
+     */
+    void setmymodeparameters(size_type _avg, double _overflow)
+    {
+      avg = _avg;
+      overflow = _overflow;
     }
 
     /**
@@ -659,9 +799,12 @@ namespace Dune {
         {
           Mat.ready = built;
           if(Mat.nnz>0)
+          {
             // Set nnz to the exact number of nonzero blocks inserted
             // as some methods rely on it
             Mat.nnz=nnz;
+            Mat.allocationSize = nnz;
+          }
         }
         // done
         return *this;
@@ -873,6 +1016,169 @@ namespace Dune {
       ready = built;
     }
 
+    //===== mymode creation interface
+
+    //! \brief get reference to entry (row,col) of the matrix
+    /*!
+     * This method can only be used when the matrix is in mymode
+     * building mode.
+     *
+     * A reference to entry (row, col) of the matrix is returned.
+     * If this is the first call for (row, col) the matrix element
+     * is created.
+     *
+     * This method can only be used while building the matrix,
+     * after compression operator[] gives a much better performance.
+     */
+    B& entry(size_type row, size_type col)
+    {
+      if (build_mode!=mymode)
+        DUNE_THROW(ISTLError,"requires mymode build mode");
+      if (ready==built)
+        DUNE_THROW(ISTLError,"matrix already built up, use operator[] for entry access now");
+
+      if (row >= n)
+        DUNE_THROW(ISTLError,"row index exceeds matrix size");
+      if (col >= m)
+        DUNE_THROW(ISTLError,"column index exceeds matrix size");
+
+      size_type* begin = r[row].getindexptr();
+      size_type* end = begin + r[row].getsize();
+
+      size_type* pos = std::find(begin, end, col);
+
+      //treat the case that there was a match in the array
+      if (pos != end)
+        if (*pos == col)
+        {
+          std::ptrdiff_t offset = pos - r[row].getindexptr();
+          B* aptr = r[row].getptr() + offset;
+
+          return *aptr;
+        }
+
+      //determine whether overflow has to be taken into account or not
+      if (r[row].getsize() == avg)
+        return overflow[std::make_pair(row,col)];
+      else
+      {
+        //modify index array
+        *end = col;
+
+        //do simulatenous operations on data array a
+        std::ptrdiff_t offset = end - r[row].getindexptr();
+        B* apos = r[row].getptr() + offset;
+
+        //placement new to call constructor of B
+        new (apos) B;
+
+        //increase rowsize
+        r[row].setsize(r[row].getsize()+1);
+
+        //return reference to the newly created entry
+        return *apos;
+      }
+    }
+
+    //! @brief finishes the buildstage in mymode mode
+    /*! once called, matrix is going to built state and no indices
+     *  can be added to a matrix that is built in mymode mode.
+     *
+     *  performs compression of index and data arrays with linear
+     *  complexity
+     *
+     *  An object with some statistics about the compression form
+     *  future optimization is returned.
+     */
+    CompressionStatistics<size_type> compress()
+    {
+      if (build_mode!=mymode)
+        DUNE_THROW(ISTLError,"requires mymode build mode");
+      if (ready==built)
+        DUNE_THROW(ISTLError,"matrix already built up, no more need for compression");
+
+      //calculate statistics
+      CompressionStatistics<size_type> stats;
+      stats.overflow_total = overflow.size();
+
+      //get insertion iterators pointing to one before start (for later use of ++it)
+      size_type* jiit = j.get()-1;
+      B* aiit = a-1;
+
+      //get iterator to the smallest overflow element
+      typename OverflowType::iterator oit = overflow.begin();
+
+      //iterate over all rows and copy elements into their position in the compressed array
+      for (size_type i=0; i<n; i++)
+      {
+        //get old pointers into a and j and size without overflow changes
+        size_type* begin = r[i].getindexptr();
+        //B* apos = r[i].getptr();
+        size_type size = r[i].getsize();
+
+        //store a copy of index pointers on which to perform sortation
+        std::vector<size_type*> perm;
+
+        for (size_type* it = begin; it < begin + size; ++it)
+          perm.push_back(it);
+
+        //sort permutation array
+        std::sort(perm.begin(),perm.end(),PointerCompare<size_type>());
+
+        //change row window pointer to their new positions
+        r[i].setindexptr(jiit+1);
+        r[i].setptr(aiit+1);
+
+        for (typename std::vector<size_type*>::iterator it = perm.begin(); it != perm.end(); ++it)
+        {
+          //check whether there are elements in the overflow area which take precedence
+          while ((oit!=overflow.end()) && (oit->first < std::make_pair(i,**it)))
+          {
+            //copy and element from the overflow area to the insertion position in a and j
+            *(++jiit) = oit->first.second;
+            *(++aiit) = oit->second;
+            ++oit;
+            r[i].setsize(r[i].getsize()+1);
+          }
+
+          //copy element from array
+          *(++jiit) = **it;
+          B* apos = *it-j.get()+a;
+          *(++aiit) = *apos;
+        }
+
+        //copy remaining elements from the overflow area
+        while ((oit!=overflow.end()) && (oit->first.first == i))
+        {
+          //copy and element from the overflow area to the insertion position in a and j
+          *(++jiit) = oit->first.second;
+          *(++aiit) = oit->second;
+          ++oit;
+          r[i].setsize(r[i].getsize()+1);
+        }
+      }
+
+      // overflow area may be cleared
+      overflow.clear();
+
+      //determine average number of entries and memory usage
+      std::ptrdiff_t diff = (r[n-1].getindexptr() + r[n-1].getsize() - j.get());
+      nnz = diff;
+      stats.avg = (double) (nnz) / (double) n;
+      stats.mem_ratio = (double) (nnz)/(double) allocationSize;
+
+      //determine maximum number of entries in a row
+      stats.maximum = 0;
+      for (size_type i=0; i<n; i++)
+        if (r[i].getsize()>stats.maximum)
+          stats.maximum = r[i].getsize();
+
+      //matrix is now built
+      ready = built;
+
+      return stats;
+    }
+
     //===== vector space arithmetic
 
     //! vector space multiplication with scalar
@@ -1282,18 +1588,25 @@ namespace Dune {
     // size of the matrix
     size_type n;       // number of rows
     size_type m;       // number of columns
-    size_type nnz;     // number of nonzeros allocated in the a and j array below
+    size_type nnz;     // number of nonzeroes contained in the matrix
+    size_type allocationSize; //allocated size of a and j arrays, except in mymode mode: nnz==allocationsSize
     // zero means that memory is allocated separately for each row.
 
     // the rows are dynamically allocated
     row_type* r;     // [n] the individual rows having pointers into a,j arrays
 
     // dynamically allocated memory
-    B*   a;      // [nnz] non-zero entries of the matrix in row-wise ordering
+    B*   a;      // [allocationSize] non-zero entries of the matrix in row-wise ordering
     // If a single array of column indices is used, it can be shared
     // between different matrices with the same sparsity pattern
-    Dune::shared_ptr<size_type> j;  // [nnz] column indices of entries
+    Dune::shared_ptr<size_type> j;  // [allocationSize] column indices of entries
 
+    // additional data is needed in mymode buildmode
+    size_type avg;
+    double overflowsize;
+
+    typedef std::map<std::pair<size_type,size_type>, B> OverflowType;
+    OverflowType overflow;
 
     void setWindowPointers(ConstRowIterator row)
     {
@@ -1336,13 +1649,13 @@ namespace Dune {
     void deallocate(bool deallocateRows=true)
     {
 
-      if (nnz>0)
+      if (allocationSize>0)
       {
         // a,j have been allocated as one long vector
         j.reset();
-        for(B *aiter=a+(nnz-1), *aend=a-1; aiter!=aend; --aiter)
+        for(B *aiter=a+(allocationSize-1), *aend=a-1; aiter!=aend; --aiter)
           allocator_.destroy(aiter);
-        allocator_.deallocate(a,n);
+        allocator_.deallocate(a,allocationSize);
       }
       else
       {
@@ -1397,17 +1710,18 @@ namespace Dune {
      *
      * @param row The number of rows the matrix should contain.
      * @param columns the number of columns the matrix should contain.
-     * @param nnz The number of nonzero entries the matrix should hold (if omitted
+     * @param allocationSize_ The number of nonzero entries the matrix should hold (if omitted
      * defaults to 0).
      * @param allocateRow Whether we have to allocate the row pointers, too. (Defaults to
      * true)
      */
-    void allocate(size_type rows, size_type columns, size_type nnz_=0, bool allocateRows=true)
+    void allocate(size_type rows, size_type columns, size_type allocationSize_=0, bool allocateRows=true)
     {
       // Store size
       n = rows;
       m = columns;
-      nnz = nnz_;
+      nnz = allocationSize_;
+      allocationSize = allocationSize_;
 
       // allocate rows
       if(allocateRows) {
@@ -1418,17 +1732,16 @@ namespace Dune {
         }
       }
 
-
       // allocate a and j array
-      if (nnz>0) {
-        a = allocator_.allocate(nnz);
+      if (allocationSize>0) {
+        a = allocator_.allocate(allocationSize);
         // use placement new to call constructor that allocates
         // additional memory.
-        new (a) B[nnz];
+        new (a) B[allocationSize];
 
         // allocate column indices only if not yet present (enable sharing)
         if (!j.get())
-          j.reset(sizeAllocator_.allocate(nnz),Deallocator(sizeAllocator_));
+          j.reset(sizeAllocator_.allocate(allocationSize),Deallocator(sizeAllocator_));
       }else{
         a = 0;
         j.reset();
@@ -1440,6 +1753,29 @@ namespace Dune {
       ready = notbuilt;
     }
 
+    /** @brief organizes allocation mymode mode
+     * calculates correct array size to be allocated and sets the
+     * the window pointers to their correct positions for insertion.
+     * internally uses allocate() for the real allocation.
+     */
+    void mymode_allocate(size_type _n, size_type _m)
+    {
+      //calculate size of overflow region
+      size_type osize = (size_type) (_n*avg)*overflowsize;
+      allocationSize = _n*avg + osize;
+
+      allocate(_n, _m, allocationSize);
+
+      //set row pointers correctly
+      size_type* jptr = j.get() + osize;
+      B* aptr = a + osize;
+      for (size_type i=0; i<n; i++)
+      {
+        r[i].set(0,aptr,jptr);
+        jptr = jptr + avg;
+        aptr = aptr + avg;
+      }
+    }
   };
 
 
diff --git a/dune/istl/matrixutils.hh b/dune/istl/matrixutils.hh
index e64246351..c2f67e80b 100644
--- a/dune/istl/matrixutils.hh
+++ b/dune/istl/matrixutils.hh
@@ -480,5 +480,14 @@ namespace Dune
     };
   };
 
+  template<typename T>
+  struct PointerCompare
+  {
+    bool operator()(const T* l, const T* r)
+    {
+      return *l < *r;
+    }
+  };
+
 }
 #endif
-- 
GitLab