From a6431942b9fca363fc089339cf9132e00eb830e9 Mon Sep 17 00:00:00 2001
From: Robert Kloefkorn <robertk@ucar.edu>
Date: Wed, 29 May 2013 16:59:13 -0600
Subject: [PATCH] try to make codegen run with new caching mem layout

---
 dune/fem-dg/main/caching2.hh | 344 +++++++++++++++++++++++++++++++++++
 dune/fem-dg/main/codegen2.hh |  20 +-
 dune/fem-dg/main/default.hh  |  22 ++-
 3 files changed, 366 insertions(+), 20 deletions(-)
 create mode 100644 dune/fem-dg/main/caching2.hh

diff --git a/dune/fem-dg/main/caching2.hh b/dune/fem-dg/main/caching2.hh
new file mode 100644
index 00000000..696d2f8a
--- /dev/null
+++ b/dune/fem-dg/main/caching2.hh
@@ -0,0 +1,344 @@
+#ifndef DUNE_FEM_SPACE_SHAPEFUNCTIONSET_CACHING_HH
+#define DUNE_FEM_SPACE_SHAPEFUNCTIONSET_CACHING_HH
+
+// C++ includes
+#include <cstddef>
+#include <vector>
+
+// dune-common includes
+#include <dune/common/nullptr.hh>
+#include <dune/common/typetraits.hh>
+
+// dune-fem includes
+#include <dune/fem/misc/functor.hh>
+#include <dune/fem/quadrature/caching/registry.hh>
+#include <dune/fem/quadrature/cachingpointlist.hh>
+#include <dune/fem/quadrature/quadrature.hh>
+
+namespace Dune
+{
+
+  namespace Fem
+  {
+
+    // CachingShapeFunctionSet
+    // -----------------------
+
+    template< class ShapeFunctionSet >
+    class CachingShapeFunctionSet
+    : private QuadratureStorageRegistry::StorageInterface
+    {
+      typedef CachingShapeFunctionSet< ShapeFunctionSet > ThisType;
+
+    public:
+      typedef typename ShapeFunctionSet::FunctionSpaceType FunctionSpaceType;
+      
+      typedef typename ShapeFunctionSet::DomainType DomainType;
+      typedef typename ShapeFunctionSet::RangeType RangeType;
+      typedef typename ShapeFunctionSet::JacobianRangeType JacobianRangeType;
+      typedef typename ShapeFunctionSet::HessianRangeType HessianRangeType;
+
+      typedef std::vector< RangeType >         RangeVectorType ;
+      typedef std::vector< JacobianRangeType > JacobianRangeVectorType ;
+
+      typedef std::vector< RangeVectorType > ValueCacheVectorType;
+      typedef std::vector< JacobianRangeVectorType > JacobianCacheVectorType;
+
+      explicit CachingShapeFunctionSet ( const GeometryType &type,
+                                         const ShapeFunctionSet &shapeFunctionSet = ShapeFunctionSet() )
+      : type_( type ),
+        shapeFunctionSet_( shapeFunctionSet )
+      {
+        QuadratureStorageRegistry::registerStorage( *this );
+      }
+
+      ~CachingShapeFunctionSet ();
+
+      int order () const { return shapeFunctionSet_.order(); }
+
+      std::size_t size () const
+      {
+        return shapeFunctionSet_.size();
+      }
+
+      template< class Point, class Functor >
+      void evaluateEach ( const Point &x, Functor functor ) const
+      {
+        return shapeFunctionSet_.evaluateEach( x, functor );
+      }
+
+      template< class Quadrature, class Functor >
+      void evaluateEach ( const QuadraturePointWrapper< Quadrature > &x, Functor functor ) const
+      {
+        const bool cacheable = Conversion< Quadrature, CachingInterface >::exists;
+        evaluateEach( x.quadrature(), x.point(), functor, integral_constant< bool, cacheable >() );
+      }
+     
+      template< class Point, class Functor > 
+      void jacobianEach ( const Point &x, Functor functor ) const
+      {
+        return shapeFunctionSet_.jacobianEach( x, functor );
+      }
+
+      template< class Quadrature, class Functor >
+      void jacobianEach ( const QuadraturePointWrapper< Quadrature > &x, Functor functor ) const
+      {
+        const bool cacheable = Conversion< Quadrature, CachingInterface >::exists;
+        jacobianEach( x.quadrature(), x.point(), functor, integral_constant< bool, cacheable >() );
+      }
+
+      template< class Point, class Functor > 
+      void hessianEach ( const Point &x, Functor functor ) const
+      {
+        return shapeFunctionSet_.hessianEach( x, functor );
+      }
+
+      GeometryType type () const DUNE_DEPRECATED {  return type_; }
+      
+      GeometryType geometryType () const DUNE_DEPRECATED {  return type(); }
+
+      template < class QuadratureType >
+      const RangeVectorType& rangeCache( const QuadratureType& quadrature ) const
+      {
+        return ReturnCache< QuadratureType, Conversion< QuadratureType, CachingInterface >::exists > ::
+          ranges( *this, quadrature, valueCaches_, localRangeCache_ );
+      }
+
+      template < class QuadratureType >
+      const JacobianRangeVectorType& jacobianCache( const QuadratureType& quadrature ) const
+      {
+        return ReturnCache< QuadratureType, Conversion< QuadratureType, CachingInterface >::exists > ::
+          jacobians( *this, quadrature, jacobianCaches_, localJacobianCache_ );
+      }
+
+    private:
+      template< class Quad, bool cacheable >
+      struct ReturnCache
+      {
+        static const RangeVectorType&
+        ranges( const ThisType& shapeFunctionSet,
+                const Quad& quad,
+                const ValueCacheVectorType&,
+                RangeVectorType& storage )
+        {
+          abort();
+          // evaluate all basis functions and multiply with dof value 
+          const unsigned int nop  = quad.nop();
+          const unsigned int size = shapeFunctionSet.size();
+
+          // make sure cache has the appropriate size 
+          storage.resize( size * nop * 1000 );
+
+          for( unsigned int qp = 0 ; qp < nop; ++ qp )
+          {
+            const int cacheQp = quad.cachingPoint( qp );
+            AssignFunctor< RangeType* > funztor( &(storage[ cacheQp * size ]) );
+            shapeFunctionSet.evaluateEach( quad[ qp ], funztor );
+          }
+          return storage;
+        }
+
+        static const JacobianRangeVectorType&
+        jacobians( const ThisType& shapeFunctionSet,
+                   const Quad& quad,
+                   const JacobianCacheVectorType&,
+                   JacobianRangeVectorType& storage )
+        {
+          // evaluate all basis functions and multiply with dof value 
+          const unsigned int nop  = quad.nop();
+          const unsigned int size = shapeFunctionSet.size();
+
+          // make sure cache has the appropriate size 
+          storage.resize( size * nop * 1000 );
+
+          for( unsigned int qp = 0 ; qp < nop; ++ qp )
+          {
+            const int cacheQp = quad.cachingPoint( qp );
+            AssignFunctor< JacobianRangeType* > funztor( &storage[ cacheQp * size ] );
+            shapeFunctionSet.jacobianEach( quad[ qp ], funztor );
+          }
+          return storage;
+        }
+      };
+
+      template< class Quad >
+      struct ReturnCache< Quad, true >
+      {
+        static const RangeVectorType&
+        ranges( const ThisType& shapeFunctionSet,
+                const Quad& quad,
+                const ValueCacheVectorType& cache,
+                const RangeVectorType& )
+        {
+          return cache[ quad.id() ];
+        }
+
+        static const JacobianRangeVectorType&
+        jacobians( const ThisType& shapeFunctionSet,
+                   const Quad& quad,
+                   const JacobianCacheVectorType& cache,
+                   const JacobianRangeVectorType& )
+        {
+          return cache[ quad.id() ];
+        }
+      };
+
+
+      template< class Quadrature, class Functor >
+      void evaluateEach ( const Quadrature &quadrature, std::size_t pt, Functor functor,
+                          integral_constant< bool, false > ) const
+      {
+        evaluateEach( quadrature.point( pt ), functor );
+      }
+
+      template< class Quadrature, class Functor >
+      void evaluateEach ( const Quadrature &quadrature, std::size_t pt, Functor functor,
+                          integral_constant< bool, true > ) const;
+
+      template< class Quadrature, class Functor >
+      void jacobianEach ( const Quadrature &quadrature, std::size_t pt, Functor functor,
+                          integral_constant< bool, false > ) const
+      {
+        jacobianEach( quadrature.point( pt ), functor );
+      }
+
+      template< class Quadrature, class Functor >
+      void jacobianEach ( const Quadrature &quadrature, std::size_t pt, Functor functor,
+                          integral_constant< bool, true > ) const;
+
+
+      void cacheQuadrature( std::size_t id, std::size_t codim, std::size_t size );
+
+      template< class PointVector >
+      void cachePoints ( std::size_t id, const PointVector &points );
+
+      // prohibit copying and assignment
+      CachingShapeFunctionSet ( const ThisType & );
+      const ThisType &operator= ( const ThisType & );
+
+      GeometryType type_;
+      ShapeFunctionSet shapeFunctionSet_;
+      ValueCacheVectorType valueCaches_;
+      JacobianCacheVectorType jacobianCaches_;
+
+      mutable RangeVectorType          localRangeCache_ ;
+      mutable JacobianRangeVectorType  localJacobianCache_;
+
+    };
+
+
+
+    // Implementation of CachingShapeFunctionSet
+    // -----------------------------------------
+
+    template< class ShapeFunctionSet >
+    inline CachingShapeFunctionSet< ShapeFunctionSet >::~CachingShapeFunctionSet ()
+    {
+      QuadratureStorageRegistry::unregisterStorage( *this );
+      //for( typename ValueCacheVectorType::iterator it = valueCaches_.begin(); it != valueCaches_.end(); ++it )
+      //  delete *it;
+      //for( typename JacobianCacheVectorType::iterator it = jacobianCaches_.begin(); it != jacobianCaches_.end(); ++it )
+      //  delete *it;
+    }
+
+
+    template< class ShapeFunctionSet >
+    template< class Quadrature, class Functor >
+    inline void CachingShapeFunctionSet< ShapeFunctionSet >
+      ::evaluateEach ( const Quadrature &quadrature, std::size_t pt, Functor functor,
+                       integral_constant< bool, true > ) const
+    {
+      assert( (quadrature.id() < valueCaches_.size()) && valueCaches_[ quadrature.id() ].size() );
+      const RangeVectorType& cache = valueCaches_[ quadrature.id() ];
+
+      const std::size_t numShapeFunctions = size();
+      const std::size_t cpt = quadrature.cachingPoint( pt );
+      for( std::size_t i = 0; i < numShapeFunctions; ++i )
+        functor( i, cache[ cpt*numShapeFunctions + i ] );
+    }
+
+
+    template< class ShapeFunctionSet >
+    template< class Quadrature, class Functor >
+    inline void CachingShapeFunctionSet< ShapeFunctionSet >
+      ::jacobianEach ( const Quadrature &quadrature, std::size_t pt, Functor functor,
+                       integral_constant< bool, true > ) const
+    {
+      assert( (quadrature.id() < jacobianCaches_.size()) && jacobianCaches_[ quadrature.id() ].size() );
+      const JacobianRangeVectorType& cache = jacobianCaches_[ quadrature.id() ];
+
+      const std::size_t numShapeFunctions = size();
+      const std::size_t cpt = quadrature.cachingPoint( pt );
+      for( std::size_t i = 0; i < numShapeFunctions; ++i )
+        functor( i, cache[ cpt*numShapeFunctions + i ] );
+    }
+
+
+    template< class ShapeFunctionSet >
+    inline void CachingShapeFunctionSet< ShapeFunctionSet >
+      ::cacheQuadrature( std::size_t id, std::size_t codim, std::size_t size )
+    {
+      if( id >= valueCaches_.size() )
+      {
+        valueCaches_.resize( id+1 );
+        jacobianCaches_.resize( id+1 );
+      }
+      assert( valueCaches_[ id ].size() == jacobianCaches_[ id ].size() );
+
+      if( valueCaches_[ id ].size() == 0 )
+      {
+        typedef typename FunctionSpaceType::DomainFieldType ctype;
+        const int dim = FunctionSpaceType::dimDomain;
+        switch( codim )
+        {
+        case 0:
+          cachePoints( id, PointProvider< ctype, dim, 0 >::getPoints( id, type_ ) );
+          break;
+
+        case 1:
+          cachePoints( id, PointProvider< ctype, dim, 1 >::getPoints( id, type_ ) );
+          break;
+
+        default:
+          DUNE_THROW( NotImplemented, "Caching for codim > 1 not implemented." );
+        }
+      }
+    }
+
+
+    template< class ShapeFunctionSet >
+    template< class PointVector >
+    inline void CachingShapeFunctionSet< ShapeFunctionSet >
+      ::cachePoints ( std::size_t id, const PointVector &points )
+    {
+      const std::size_t numShapeFunctions = size();
+      const std::size_t numPoints = points.size();
+
+      RangeVectorType& values = valueCaches_[ id ];
+      values.resize( numShapeFunctions * numPoints );
+
+      JacobianRangeVectorType& jacobians = jacobianCaches_[ id ];
+      jacobians.resize( numShapeFunctions * numPoints );
+
+      //RangeType *values = new RangeType[ numShapeFunctions * numPoints ];
+      //JacobianRangeType *jacobians = new JacobianRangeType[ numShapeFunctions * numPoints ];
+      //if( !values || !jacobians )
+      //  DUNE_THROW( OutOfMemoryError, "Unable to allocate shape function set caches." );
+
+      std::cout << "cache point size = " << numPoints << std::endl;
+
+      for( std::size_t pt = 0; pt < numPoints; ++pt )
+      {
+        evaluateEach( points[ pt ], AssignFunctor< RangeType* >( &values[ pt*numShapeFunctions ] ) );
+        jacobianEach( points[ pt ], AssignFunctor< JacobianRangeType* >( &jacobians[ pt*numShapeFunctions ] ) );
+      }
+
+      //std::cout << "cache point size = " << numPoints << " ptr  = " << values << std::endl;
+      //std::cout << "cache point size = " << numPoints << " jptr = " << jacobians << std::endl;
+    }
+
+  } // namespace Fem
+
+} // namespace Dune
+
+#endif // #ifndef DUNE_FEM_SPACE_SHAPEFUNCTIONSET_CACHING_HH
diff --git a/dune/fem-dg/main/codegen2.hh b/dune/fem-dg/main/codegen2.hh
index bc7fe2d9..be1ff641 100644
--- a/dune/fem-dg/main/codegen2.hh
+++ b/dune/fem-dg/main/codegen2.hh
@@ -127,12 +127,7 @@ namespace Fem {
       out << std::endl;
 
       // make length simd conform 
-      out << "    field_type resultTmp[ " << numRows * dimRange << " ] = { 0";
-      for( size_t row = 1; row < numRows * dimRange; ++ row )
-      {
-        out << ", 0";
-      }
-      out << " };" << std::endl << std::endl;
+      out << "    field_type resultTmp[ " << numRows * dimRange << " ] = { 0 };" << std::endl << std::endl;
 
       for(int r=0; r<dimRange ; ++r ) 
       {
@@ -290,12 +285,8 @@ namespace Fem {
       out << "    typedef typename ScalarRangeType :: field_type field_type;" << std::endl;
       out << std::endl;
 
-      out << "    double dofResult[ " << numCols * dimRange << " ] = { 0";
+      out << "    double dofResult[ " << numCols * dimRange << " ] = { 0 };" << std::endl << std::endl;
       const size_t simdRows  = simdWidth * (numRows / simdWidth) ;
-      for( size_t col = 1; col < dimRange * numCols; ++ col )
-        out << ", 0";
-      out << " };" << std::endl;
-      out << std::endl;
 
       if( simdRows > 0 ) 
       {
@@ -483,12 +474,7 @@ namespace Fem {
       for( int d = 0; d < dim ; ++ d ) 
       {
         // make length simd conform 
-        out << "    field_type resultTmp" << d << "[ " << numRows * dimRange << " ] = { 0";
-        for( size_t row = 1; row < numRows * dimRange; ++ row )
-        {
-          out << ", 0";
-        }
-        out << " };" << std::endl;
+        out << "    field_type resultTmp" << d << "[ " << numRows * dimRange << " ] = { 0 };" << std::endl;
       }
       out << std::endl;
 
diff --git a/dune/fem-dg/main/default.hh b/dune/fem-dg/main/default.hh
index 47d70c36..73aa4fac 100644
--- a/dune/fem-dg/main/default.hh
+++ b/dune/fem-dg/main/default.hh
@@ -22,7 +22,8 @@
 #define NEWBASEFCT_CACHING
 
 #ifdef NEWBASEFCT_CACHING
-#include <dune/fem/space/shapefunctionset/caching.hh>
+//#include <dune/fem/space/shapefunctionset/caching.hh>
+#include "caching2.hh"
 #else 
 #include "caching.hh"
 #endif
@@ -116,8 +117,13 @@ namespace Dune
       enum { dimDomain = FunctionSpaceType::dimDomain };
       enum { dimRange  = FunctionSpaceType::dimRange  };
 
+#ifdef NEWBASEFCT_CACHING
+      typedef std::vector< ScalarRangeType >          RangeVectorType;
+      typedef std::vector< ScalarJacobianRangeType >  JacobianRangeVectorType;
+#else
       typedef MutableArray< MutableArray< ScalarRangeType > >         RangeVectorType;
       typedef MutableArray< MutableArray< ScalarJacobianRangeType > > JacobianRangeVectorType;
+#endif
 
       //! \brief constructor
       DefaultBasisFunctionSet ()
@@ -418,24 +424,34 @@ namespace Dune
       GeometryType geometry () const { return entity().geometry(); }
 
       template <class QuadratureType>
+        /*
 #ifdef NEWBASEFCT_CACHING
       const ScalarRangeType* rangeCache( const QuadratureType& quad ) const 
+      { 
+        return shapeFunctionSet().scalarShapeFunctionSet().impl().rangeCache( quad );
+      }
 #else
+*/
       const RangeVectorType& rangeCache( const QuadratureType& quad ) const 
-#endif
       { 
         return shapeFunctionSet().scalarShapeFunctionSet().impl().rangeCache( quad );
       }
+//#endif
 
       template <class QuadratureType>
+        /*
 #ifdef NEWBASEFCT_CACHING
       const ScalarJacobianRangeType* jacobianCache( const QuadratureType& quad ) const 
+      { 
+        return shapeFunctionSet().scalarShapeFunctionSet().impl().jacobianCache( quad );
+      }
 #else
+*/
       const JacobianRangeVectorType& jacobianCache( const QuadratureType& quad ) const 
-#endif
       { 
         return shapeFunctionSet().scalarShapeFunctionSet().impl().jacobianCache( quad );
       }
+//#endif
     private:
       const EntityType *entity_;
       ShapeFunctionSetType shapeFunctionSet_;
-- 
GitLab