diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 0740c40e51fb7240b1d22fe5e2e82a7ef2b24570..8a2ce3811d9b31a93eaeee7c72d21edea6504d5d 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -1,3 +1,8 @@
+if(dune-common_VERSION VERSION_GREATER_EQUAL 2.11.0)
+  add_executable("biharmonic" biharmonic.cc)
+  dune_target_enable_all_packages(biharmonic)
+endif()
+
 add_executable("hyperelasticity" hyperelasticity.cc)
 dune_target_enable_all_packages(hyperelasticity)
 
@@ -15,6 +20,14 @@ dune_target_enable_all_packages(poisson-pq2)
 add_executable("stokes-taylorhood" stokes-taylorhood.cc)
 dune_target_enable_all_packages("stokes-taylorhood")
 
+if(dune-common_VERSION VERSION_GREATER_EQUAL 2.11.0)
+  add_executable("poisson-hermite" poisson-hermite.cc)
+  dune_target_enable_all_packages("poisson-hermite")
+
+  add_executable("poisson-hermite-nitsche" poisson-hermite-nitsche.cc)
+  dune_target_enable_all_packages("poisson-hermite-nitsche")
+endif()
+
 add_executable("poisson-mfem" poisson-mfem.cc)
 dune_target_enable_all_packages("poisson-mfem")
 
diff --git a/examples/biharmonic.cc b/examples/biharmonic.cc
new file mode 100644
index 0000000000000000000000000000000000000000..89584d74851b7016c67d4af461d533ffe32b43ba
--- /dev/null
+++ b/examples/biharmonic.cc
@@ -0,0 +1,348 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#include <config.h>
+
+#include <vector>
+#include <cmath>
+
+#include <dune/common/bitsetvector.hh>
+#include <dune/common/version.hh>
+
+#include <dune/geometry/quadraturerules.hh>
+
+#include <dune/grid/yaspgrid.hh>
+#include <dune/grid/uggrid.hh>
+
+#include <dune/istl/matrix.hh>
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/matrixindexset.hh>
+#include <dune/istl/preconditioners.hh>
+#include <dune/istl/solvers.hh>
+
+#include <dune/functions/functionspacebases/interpolate.hh>
+#include <dune/functions/functionspacebases/morleybasis.hh>
+#include <dune/functions/functionspacebases/boundarydofs.hh>
+#include <dune/functions/gridfunctions/discreteglobalbasisfunction.hh>
+#include <dune/functions/gridfunctions/gridviewfunction.hh>
+
+#if DUNE_VERSION_GT(DUNE_VTK, 2, 9)
+#include <dune/vtk/vtkwriter.hh>
+#include <dune/vtk/datacollectors/discontinuouslagrangedatacollector.hh>
+#else
+#include <dune/grid/io/file/vtk/subsamplingvtkwriter.hh>
+#endif
+
+#include <dune/fufem/functions/adolcfunction.hh>
+#include <dune/fufem/parallel/elementcoloring.hh>
+#include <dune/fufem/boundarypatch.hh>
+#include <dune/fufem/functiontools/boundarydofs.hh>
+#include <dune/fufem/forms/forms.hh>
+
+#include "utilities.hh"
+
+template<class B, class TP, std::size_t argIndex>
+class FEFunctionHessianOperator : public Dune::Fufem::Forms::FEOperatorBase<B, TP, argIndex>
+{
+  using Base = Dune::Fufem::Forms::FEOperatorBase<B, TP, argIndex>;
+  using Node = typename Base::Node;
+  using LeafLBTraits = typename Base::LeafNode::FiniteElement::Traits::LocalBasisType::Traits;
+
+public:
+
+  using Element = typename Base::Element;
+  using Range = typename LeafLBTraits::HessianType;
+
+  using Base::Base;
+
+  class LocalOperator : public Base::LocalOperator
+  {
+    using Base::LocalOperator::quadratureRuleKey_;
+    using Base::LocalOperator::leafNode;
+    using Base::LocalOperator::leafNodeCache;
+    using Base::LocalOperator::LocalOperator;
+
+  public:
+
+    using Range = typename FEFunctionHessianOperator::Range;
+
+    void bind(const Element&)
+    {
+      quadratureRuleKey_ = QuadratureRuleKey(leafNode().finiteElement()).derivative().derivative();
+    }
+
+    template<class CacheManager>
+    void bindToCaches(CacheManager& cacheManager)
+    {
+      const auto& rule = cacheManager.rule();
+      const auto& localBasis = leafNode().finiteElement().localBasis();
+      const auto& geometry = leafNode().element().geometry();
+      const auto& center = leafNode().element().geometry().center();
+      const auto& invJ = geometry.jacobianInverse(center);
+      const auto& invJT = geometry.jacobianInverseTransposed(center);
+      globalHessianCache_.resize(rule.size());
+      for(auto i : Dune::range(rule.size()))
+      {
+        globalHessianCache_[i].resize(localBasis.size());
+        localBasis.evaluateHessian(rule[i].position(), globalHessianCache_[i]);
+        for(auto k : Dune::range(localBasis.size()))
+        {
+          auto localHessian = globalHessianCache_[i][k];
+          globalHessianCache_[i][k] = invJT*localHessian*invJ;
+        }
+      }
+    }
+
+    template<class AugmentedLocalDomain>
+    auto operator()(const AugmentedLocalDomain& x) const
+    {
+      using namespace Dune::Fufem::Forms::Impl::Tensor;
+      const auto& globalHessians = globalHessianCache_[x.index()];
+      return LazyTensorBlock(
+        [&](const auto& i) {
+            return globalHessians[i];
+          },
+          Extents(leafNode().size()),
+          Offsets(leafNode().localIndex(0))
+        );
+    }
+
+  private:
+    std::vector<std::vector<Range>> globalHessianCache_;
+  };
+
+  friend LocalOperator localOperator(const FEFunctionHessianOperator& op)
+  {
+    return LocalOperator(op);
+  }
+
+};
+
+template<class B, class TP, std::size_t argIndex>
+auto hessian(const Dune::Fufem::Forms::FEFunctionOperator<B, TP, argIndex>& op)
+{
+  return FEFunctionHessianOperator<B, TP, argIndex>(std::get<0>(op.basis()), std::get<0>(op.treePath()), op.isAffine());
+}
+
+using namespace Dune;
+
+
+int main (int argc, char *argv[]) try
+{
+  std::size_t threadCount = 4;
+  if (argc>1)
+    threadCount = std::stoul(std::string(argv[1]));
+
+  std::size_t refinements = 4;
+  if (argc>2)
+    refinements = std::stoul(std::string(argv[2]));
+
+
+  // Set up MPI, if available
+  MPIHelper::instance(argc, argv);
+
+  ///////////////////////////////////
+  //   Generate the grid
+  ///////////////////////////////////
+
+  auto grid = Dune::StructuredGridFactory<Dune::UGGrid<2>>::createSimplexGrid({0,0}, {1, 1}, {2, 2});
+
+  grid->globalRefine(refinements);
+
+  auto gridView = grid->leafGridView();
+  using GridView = decltype(gridView);
+
+  /////////////////////////////////////////////////////////
+  //   Choose a finite element space
+  /////////////////////////////////////////////////////////
+
+  using namespace Functions::BasisFactory;
+
+  auto basis = makeBasis(gridView, morley());
+
+  /////////////////////////////////////////////////////////
+  //   Stiffness matrix and right hand side vector
+  /////////////////////////////////////////////////////////
+
+  using Vector = Dune::BlockVector<double>;
+  using BitVector = Dune::BlockVector<double>;
+  using Matrix = Dune::BCRSMatrix<double>;
+
+  Vector rhs;
+  Vector sol;
+  BitVector isConstrained;
+  Matrix stiffnessMatrix;
+
+  /////////////////////////////////////////////////////////
+  //  Assemble the system
+  /////////////////////////////////////////////////////////
+
+  std::cout << "Number of DOFs is " << basis.dimension() << std::endl;
+
+  auto dirichletIndicatorFunction = [](auto x) { return true; };
+
+  auto dirichletPatch = BoundaryPatch(basis.gridView());
+  dirichletPatch.insertFacesByProperty([&](auto&& intersection) { return dirichletIndicatorFunction(intersection.geometry().center()); });
+
+  auto pi = std::acos(-1.0);
+
+//  auto exactSol = [pi](const auto& x) { return std::pow(std::sin(pi*x[0]), 2.)*std::pow(std::sin(pi*x[1]), 2.); };
+//  auto rightHandSide = [exactSol,pi] (const auto& x) {
+//    return std::pow(pi, 4.)*(8.-24.*std::pow(std::sin(pi*x[0]), 2.)-24.*std::pow(std::sin(pi*x[1]), 2.) + 64*exactSol(x));
+//  };
+//  auto dirichletValuesC0 = [pi](const auto& x){
+//    return 0.0;
+//  };
+
+  auto exactSol = [&](const auto& x) { return 16*x[0]*(x[0]-1)*x[1]*(x[1]-1); };
+  auto rightHandSide = [] (const auto& x) { return 8*16;};
+  auto dirichletValuesC0 = [&](const auto& x){
+    return exactSol(x);
+  };
+
+//  auto exactSol = [&](const auto& x) { return 0; };
+//  auto rightHandSide = [] (const auto& x) { return 10;};
+//  auto dirichletValuesC0 = [pi](const auto& x){
+//    using std::sin;
+//    return sin(2*pi*x[0]);
+//  };
+
+  using SignatureTag = Dune::Functions::SignatureTag<double(Dune::FieldVector<double,2>)>;
+  auto dirichletValues = Dune::Fufem::makeAdolCFunction(SignatureTag(), dirichletValuesC0);
+
+// Disable parallel assembly for UGGrid before 2.10
+#if DUNE_VERSION_GT(DUNE_FUNCTIONS, 2, 9)
+  auto gridViewPartition = Dune::Fufem::coloredGridViewPartition(gridView);
+  auto globalAssembler = GlobalAssembler(basis, basis, 1, std::cref(gridViewPartition), threadCount);
+#else
+  auto globalAssembler = GlobalAssembler(basis, basis, 1);
+#endif
+
+  {
+    using namespace ::Dune::Fufem::Forms;
+    namespace F = ::Dune::Fufem::Forms;
+
+    auto v = testFunction(basis, NonAffineFamiliy{});
+    auto u = trialFunction(basis, NonAffineFamiliy{});
+    auto f = Coefficient(Functions::makeGridViewFunction(rightHandSide, gridView));
+
+//    auto a = integrate(dot(grad(u), grad(v)));
+    auto a = integrate(dot(hessian(u), hessian(v)));
+    auto b = integrate(f*v);
+
+    globalAssembler.assembleOperator(stiffnessMatrix, a);
+    globalAssembler.assembleFunctional(rhs, b);
+  }
+
+  // *********************************************
+  // Initialize solution vector
+  // *********************************************
+
+  Dune::Functions::istlVectorBackend(sol).resize(basis);
+  Dune::Functions::istlVectorBackend(sol) = 0;
+
+  // *********************************************
+  // Incorporate Dirichlet boundary conditions
+  // *********************************************
+
+  Dune::Functions::istlVectorBackend(isConstrained).resize(basis);
+  Dune::Functions::istlVectorBackend(isConstrained) = 0;
+
+  {
+    auto normalDerivativeOrder = [](const auto& functionalDescriptor, const auto& unitNormal) -> std::size_t
+    {
+      auto&& partialOrder = functionalDescriptor.partialDerivativeOrder();
+      for(auto i : Dune::range(unitNormal.size()))
+      {
+        if ((1-std::fabs(unitNormal[i]) < 1e-10))
+          return partialOrder[i] + functionalDescriptor.normalDerivativeOrder();
+      }
+      return functionalDescriptor.normalDerivativeOrder();
+    };
+    auto isConstrainedBackend = Dune::Functions::istlVectorBackend(isConstrained);
+    auto localView = basis.localView();
+    auto seDOFs = subEntityDOFs(basis);
+    for(auto&& intersection : dirichletPatch)
+    {
+      localView.bind(intersection.inside());
+      seDOFs.bind(localView, intersection);
+      Dune::TypeTree::forEachLeafNode(localView.tree(), [&](auto&& node, auto&& treePath) {
+        for(auto k : Dune::range(node.size()))
+        {
+          auto localIndex = node.localIndex(k);
+          if (seDOFs.contains(localIndex))
+          {
+            auto functionalDescriptor = node.finiteElement().localInterpolation().functionalDescriptor(k);
+            auto normal = intersection.centerUnitOuterNormal();
+            if (normalDerivativeOrder(functionalDescriptor, normal)<=1)
+              isConstrainedBackend[localView.index(localIndex)] = true;
+          }
+        }
+      });
+    }
+  }
+
+  interpolate(basis, sol, dirichletValues, isConstrained);
+
+  incorporateEssentialConstraints(basis, stiffnessMatrix, rhs, isConstrained, sol);
+
+
+  ////////////////////////////
+  //   Compute solution
+  ////////////////////////////
+
+  // Technicality:  turn the matrix into a linear operator
+  MatrixAdapter<Matrix,Vector,Vector> op(stiffnessMatrix);
+
+  // Sequential incomplete LU decomposition as the preconditioner
+  SeqILDL<Matrix,Vector,Vector> ildl(stiffnessMatrix,1.0);
+
+  // Preconditioned conjugate-gradient solver
+  CGSolver<Vector> cg(op,
+                          ildl, // preconditioner
+                          1e-4, // desired residual reduction factor
+                          100,   // maximum number of iterations
+                          2);   // verbosity of the solver
+
+  // Object storing some statistics about the solving process
+  InverseOperatorResult statistics;
+
+  // Solve!
+  cg.apply(sol, rhs, statistics);
+
+  ////////////////////////////////////////////////////////////////////////////
+  //  Make a discrete function from the FE basis and the coefficient vector
+  ////////////////////////////////////////////////////////////////////////////
+
+  auto solFunction = Functions::makeDiscreteGlobalBasisFunction<double>(basis, sol);
+  auto exactSolFunction = Functions::makeGridViewFunction(exactSol, gridView);
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  //  Write result to VTK file
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  using namespace ::Dune::Fufem::Forms;
+  auto u = bindToCoefficients(trialFunction(basis, NonAffineFamiliy{}), sol);
+
+#if DUNE_VERSION_GT(DUNE_VTK, 2, 9)
+  using Dune::Vtk::DiscontinuousLagrangeDataCollector;
+  using Dune::Vtk::UnstructuredGridWriter;
+  auto vtkWriter = UnstructuredGridWriter(DiscontinuousLagrangeDataCollector(basis.gridView(), 3));
+  vtkWriter.addPointData(u, VTK::FieldInfo("sol", VTK::FieldInfo::Type::scalar, 1));
+  vtkWriter.addPointData(jacobian(u), VTK::FieldInfo("grad_sol", VTK::FieldInfo::Type::vector, 2));
+  vtkWriter.addPointData(exactSolFunction, VTK::FieldInfo("sol_exact", VTK::FieldInfo::Type::scalar, 1));
+  vtkWriter.write("biharmonic");
+#else
+  // We need to use the SubsamplingVTKWriter, because dune-vtk's LagrangeDataCollector
+  // does not work with mixed grids in 2.9.
+  SubsamplingVTKWriter<GridView> vtkWriter(gridView, Dune::refinementLevels(2));
+  vtkWriter.addVertexData(u, VTK::FieldInfo("sol", VTK::FieldInfo::Type::scalar, 1));
+  vtkWriter.addVertexData(grad(u), VTK::FieldInfo("grad_sol", VTK::FieldInfo::Type::vector, 2));
+  vtkWriter.addVertexData(exactSolFunction, VTK::FieldInfo("sol_exact", VTK::FieldInfo::Type::scalar, 1));
+  vtkWriter.write("biharmonic");
+#endif
+
+ }
+// Error handling
+ catch (Exception& e) {
+    std::cout << e.what() << std::endl;
+ }
diff --git a/examples/poisson-hermite-nitsche.cc b/examples/poisson-hermite-nitsche.cc
new file mode 100644
index 0000000000000000000000000000000000000000..cd18d5a47b0990d825a9f7552fb3953d6095d9ad
--- /dev/null
+++ b/examples/poisson-hermite-nitsche.cc
@@ -0,0 +1,320 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#include <config.h>
+
+#include <vector>
+#include <cmath>
+
+#include <dune/common/bitsetvector.hh>
+#include <dune/common/version.hh>
+
+#include <dune/geometry/quadraturerules.hh>
+
+#include <dune/grid/yaspgrid.hh>
+#include <dune/grid/uggrid.hh>
+
+#include <dune/istl/matrix.hh>
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/matrixindexset.hh>
+#include <dune/istl/preconditioners.hh>
+#include <dune/istl/solvers.hh>
+
+#include <dune/functions/functionspacebases/interpolate.hh>
+#include <dune/functions/functionspacebases/boundarydofs.hh>
+#include <dune/functions/functionspacebases/lagrangebasis.hh>
+#include <dune/functions/functionspacebases/cubichermitebasis.hh>
+#include <dune/functions/gridfunctions/discreteglobalbasisfunction.hh>
+#include <dune/functions/gridfunctions/gridviewfunction.hh>
+#include <dune/functions/gridfunctions/facenormalgridfunction.hh>
+#include <dune/functions/gridfunctions/composedgridfunction.hh>
+
+#if DUNE_VERSION_GT(DUNE_VTK, 2, 9)
+#include <dune/vtk/vtkwriter.hh>
+#include <dune/vtk/datacollectors/lagrangedatacollector.hh>
+#else
+#include <dune/grid/io/file/vtk/subsamplingvtkwriter.hh>
+#endif
+
+#include <dune/grid/io/file/vtk/subsamplingvtkwriter.hh>
+
+#include <dune/fufem/parallel/elementcoloring.hh>
+#include <dune/fufem/assemblers/dunefunctionsoperatorassembler.hh>
+#include <dune/fufem/assemblers/dunefunctionsfunctionalassembler.hh>
+#include <dune/fufem/boundarypatch.hh>
+#include <dune/fufem/functiontools/boundarydofs.hh>
+#include <dune/fufem/forms/forms.hh>
+
+#include "utilities.hh"
+
+using namespace Dune;
+
+
+// A (rather strange) grid function that maps x to a tuple
+// of element, local geometry, and local coordinate.
+// By composition, this can be can use to implements element
+// or geometry-based grid functions like, e.g. the local mesh
+// or element index.
+template<class GV>
+class ElementGeometryGridFunction
+{
+public:
+  using GridView = GV;
+  using EntitySet = Dune::Functions::GridViewEntitySet<GridView, 0>;
+  using Element = typename EntitySet::Element;
+
+  using LocalDomain = typename EntitySet::LocalCoordinate;
+  using Domain = typename EntitySet::GlobalCoordinate;
+  using Range = std::tuple<const Element&, const typename Element::Geometry&, LocalDomain>;
+
+private:
+
+  using Traits = Dune::Functions::Imp::GridFunctionTraits<Range(Domain), EntitySet, Dune::Functions::DefaultDerivativeTraits, 16>;
+
+  class LocalFunction
+  {
+    using Geometry = typename Element::Geometry;
+    static const int dimension = GV::dimension;
+  public:
+
+    void bind(const Element& element)
+    {
+      element_ = element;
+      geometry_.emplace(element_.geometry());
+    }
+
+    void unbind()
+    {
+      geometry_.reset();
+    }
+
+    /** \brief Return if the local function is bound to a grid element
+     */
+    bool bound() const
+    {
+      return static_cast<bool>(geometry_);
+    }
+
+    Range operator()(const LocalDomain& x) const
+    {
+      return {element_, *geometry_, x};
+    }
+
+    //! Return the bound element stored as copy in the \ref bind function.
+    const Element& localContext() const
+    {
+      return element_;
+    }
+
+    //! Not implemented.
+    friend typename Traits::LocalFunctionTraits::DerivativeInterface derivative(const LocalFunction& t)
+    {
+      DUNE_THROW(NotImplemented,"not implemented");
+    }
+
+  private:
+    std::optional<Geometry> geometry_;
+    Element element_;
+  };
+
+public:
+
+  ElementGeometryGridFunction(const GridView& gridView) :
+    entitySet_(gridView)
+  {}
+
+  Range operator()(const Domain& x) const
+  {
+    DUNE_THROW(NotImplemented,"not implemented");
+  }
+
+  friend typename Traits::DerivativeInterface derivative(const ElementGeometryGridFunction& t)
+  {
+    DUNE_THROW(NotImplemented,"not implemented");
+  }
+
+  friend LocalFunction localFunction(const ElementGeometryGridFunction& t)
+  {
+    return LocalFunction{};
+  }
+
+  const EntitySet& entitySet() const
+  {
+    return entitySet_;
+  }
+
+private:
+  EntitySet entitySet_;
+};
+
+
+
+template<class GridView>
+auto elementSizeGridFunction(const GridView& gridView)
+{
+  static const int dimension = GridView::dimension;
+
+  auto meshSizeOfGeometry = [](auto arg)
+  {
+    const auto& [element, geometry, x] = arg;
+    auto&& re = Dune::referenceElement(geometry);
+    return std::pow(geometry.integrationElement(re.position(0, 0)), 1./dimension);
+  };
+
+  return Dune::Functions::makeComposedGridFunction(meshSizeOfGeometry, ElementGeometryGridFunction(gridView));
+}
+
+
+
+int main (int argc, char *argv[]) try
+{
+  std::size_t threadCount = 4;
+  if (argc>1)
+    threadCount = std::stoul(std::string(argv[1]));
+
+  std::size_t refinements = 4;
+  if (argc>2)
+    refinements = std::stoul(std::string(argv[2]));
+
+
+  // Set up MPI, if available
+  MPIHelper::instance(argc, argv);
+
+  ///////////////////////////////////
+  //   Generate the grid
+  ///////////////////////////////////
+
+  auto grid = Dune::StructuredGridFactory<Dune::UGGrid<2>>::createSimplexGrid({0,0}, {1, 1}, {2, 2});
+
+  grid->globalRefine(refinements);
+
+  auto gridView = grid->leafGridView();
+  using GridView = decltype(gridView);
+
+  /////////////////////////////////////////////////////////
+  //   Choose a finite element space
+  /////////////////////////////////////////////////////////
+
+  using namespace Functions::BasisFactory;
+
+  auto feBasis = makeBasis(gridView, cubicHermite());
+//  auto feBasis = makeBasis(gridView, reducedCubicHermite());
+  std::size_t order = 3;
+
+  /////////////////////////////////////////////////////////
+  //   Stiffness matrix and right hand side vector
+  /////////////////////////////////////////////////////////
+
+  using Vector = Dune::BlockVector<double>;
+  using Matrix = Dune::BCRSMatrix<double>;
+
+  Vector rhs;
+  Matrix stiffnessMatrix;
+
+  /////////////////////////////////////////////////////////
+  //  Assemble the system
+  /////////////////////////////////////////////////////////
+
+  std::cout << "Number of DOFs is " << feBasis.dimension() << std::endl;
+
+  auto dirichletIndicatorFunction = [](auto x) { return true; };
+
+  auto boundary = BoundaryPatch(feBasis.gridView(), true);
+
+  auto rightHandSide = [] (const auto& x) { return 10;};
+  auto pi = std::acos(-1.0);
+  auto dirichletValues = [pi](const auto& x){ return std::sin(2*pi*x[0]); };
+
+
+// Disable parallel assembly for UGGrid before 2.10
+#if DUNE_VERSION_GT(DUNE_FUNCTIONS, 2, 9)
+  auto gridViewPartition = Dune::Fufem::coloredGridViewPartition(gridView);
+  auto globalAssembler = GlobalAssembler(feBasis, feBasis, 1, std::cref(gridViewPartition), threadCount);
+#else
+  auto globalAssembler = GlobalAssembler(feBasis, feBasis, 1);
+#endif
+
+  {
+    using namespace ::Dune::Fufem::Forms;
+    namespace F = ::Dune::Fufem::Forms;
+
+    auto v = testFunction(feBasis, NonAffineFamiliy{});
+    auto u = trialFunction(feBasis, NonAffineFamiliy{});
+    auto f = Coefficient(Functions::makeGridViewFunction(rightHandSide, gridView), 2);
+
+    auto nu = Coefficient(Dune::Functions::FaceNormalGridFunction(gridView));
+    auto h = Coefficient(elementSizeGridFunction(gridView));
+
+    auto u_D = Coefficient(Functions::makeGridViewFunction(dirichletValues, gridView), 2);
+
+    auto D = [&](auto&& v, auto&& w) { return dot(grad(v), w); };
+    auto gamma = 10;
+
+    auto a = integrate(dot(grad(u), grad(v)));
+    auto b = integrate(f*v);
+
+    // Incorporate Dirichlet conditions weakly using Nitsche's method (Joachim Nitsche '71)
+    auto a_h = a + integrate(gamma/h*u*v - D(u,nu)*v - u*D(v,nu), boundary);
+    auto b_h = b + integrate(gamma/h*u_D*v - u_D*D(v,nu), boundary);
+
+    globalAssembler.assembleOperator(stiffnessMatrix, a_h);
+    globalAssembler.assembleFunctional(rhs, b_h);
+  }
+
+
+  /////////////////////////////////////////////////
+  //   Choose an initial iterate
+  /////////////////////////////////////////////////
+  Vector x(feBasis.size());
+  x = 0;
+
+  ////////////////////////////
+  //   Compute solution
+  ////////////////////////////
+
+  // Technicality:  turn the matrix into a linear operator
+  MatrixAdapter<Matrix,Vector,Vector> op(stiffnessMatrix);
+
+  // Sequential incomplete LU decomposition as the preconditioner
+  SeqILDL<Matrix,Vector,Vector> ildl(stiffnessMatrix,1.0);
+
+  // Preconditioned conjugate-gradient solver
+  CGSolver<Vector> cg(op,
+                          ildl, // preconditioner
+                          1e-4, // desired residual reduction factor
+                          500,   // maximum number of iterations
+                          2);   // verbosity of the solver
+
+  // Object storing some statistics about the solving process
+  InverseOperatorResult statistics;
+
+  // Solve!
+  cg.apply(x, rhs, statistics);
+
+  ////////////////////////////////////////////////////////////////////////////
+  //  Make a discrete function from the FE basis and the coefficient vector
+  ////////////////////////////////////////////////////////////////////////////
+
+  auto xFunction = Functions::makeDiscreteGlobalBasisFunction<double>(feBasis, x);
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  //  Write result to VTK file
+  //////////////////////////////////////////////////////////////////////////////////////////////
+#if DUNE_VERSION_GT(DUNE_VTK, 2, 9)
+  using Dune::Vtk::LagrangeDataCollector;
+  using Dune::Vtk::UnstructuredGridWriter;
+  auto vtkWriter = UnstructuredGridWriter(LagrangeDataCollector(feBasis.gridView(), order));
+  vtkWriter.addPointData(xFunction, VTK::FieldInfo("x", VTK::FieldInfo::Type::scalar, 1));
+  vtkWriter.write("poisson-hermite-nitsche");
+#else
+  // We need to use the SubsamplingVTKWriter, because dune-vtk's LagrangeDataCollector
+  // does not work with mixed grids in 2.9.
+  SubsamplingVTKWriter<GridView> vtkWriter(gridView, Dune::refinementLevels(2));
+  vtkWriter.addVertexData(xFunction, VTK::FieldInfo("x", VTK::FieldInfo::Type::scalar, 1));
+  vtkWriter.write("poisson-hermite-nitsche");
+#endif
+
+ }
+// Error handling
+ catch (Exception& e) {
+    std::cout << e.what() << std::endl;
+ }
diff --git a/examples/poisson-hermite.cc b/examples/poisson-hermite.cc
new file mode 100644
index 0000000000000000000000000000000000000000..14ddc46b004e6e9fdec4d296041492de711932a1
--- /dev/null
+++ b/examples/poisson-hermite.cc
@@ -0,0 +1,242 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#include <config.h>
+
+#include <vector>
+#include <cmath>
+
+#include <dune/common/bitsetvector.hh>
+#include <dune/common/version.hh>
+
+#include <dune/geometry/quadraturerules.hh>
+
+#include <dune/grid/yaspgrid.hh>
+#include <dune/grid/uggrid.hh>
+
+#include <dune/istl/matrix.hh>
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/matrixindexset.hh>
+#include <dune/istl/preconditioners.hh>
+#include <dune/istl/solvers.hh>
+
+#include <dune/functions/functionspacebases/interpolate.hh>
+#include <dune/functions/functionspacebases/cubichermitebasis.hh>
+#include <dune/functions/functionspacebases/boundarydofs.hh>
+#include <dune/functions/gridfunctions/discreteglobalbasisfunction.hh>
+#include <dune/functions/gridfunctions/gridviewfunction.hh>
+
+#if DUNE_VERSION_GT(DUNE_VTK, 2, 9)
+#include <dune/vtk/vtkwriter.hh>
+#include <dune/vtk/datacollectors/lagrangedatacollector.hh>
+#else
+#include <dune/grid/io/file/vtk/subsamplingvtkwriter.hh>
+#endif
+
+#include <dune/fufem/functions/adolcfunction.hh>
+#include <dune/fufem/parallel/elementcoloring.hh>
+#include <dune/fufem/boundarypatch.hh>
+#include <dune/fufem/functiontools/boundarydofs.hh>
+#include <dune/fufem/forms/forms.hh>
+
+#include "utilities.hh"
+
+using namespace Dune;
+
+
+
+int main (int argc, char *argv[]) try
+{
+  std::size_t threadCount = 4;
+  if (argc>1)
+    threadCount = std::stoul(std::string(argv[1]));
+
+  std::size_t refinements = 4;
+  if (argc>2)
+    refinements = std::stoul(std::string(argv[2]));
+
+
+  // Set up MPI, if available
+  MPIHelper::instance(argc, argv);
+
+  ///////////////////////////////////
+  //   Generate the grid
+  ///////////////////////////////////
+
+  auto grid = Dune::StructuredGridFactory<Dune::UGGrid<2>>::createSimplexGrid({0,0}, {1, 1}, {2, 2});
+
+  grid->globalRefine(refinements);
+
+  auto gridView = grid->leafGridView();
+  using GridView = decltype(gridView);
+
+  /////////////////////////////////////////////////////////
+  //   Choose a finite element space
+  /////////////////////////////////////////////////////////
+
+  using namespace Functions::BasisFactory;
+
+//  auto basis = makeBasis(gridView, cubicHermite());
+  auto basis = makeBasis(gridView, reducedCubicHermite());
+
+  /////////////////////////////////////////////////////////
+  //   Stiffness matrix and right hand side vector
+  /////////////////////////////////////////////////////////
+
+  using Vector = Dune::BlockVector<double>;
+  using BitVector = Dune::BlockVector<double>;
+  using Matrix = Dune::BCRSMatrix<double>;
+
+  Vector rhs;
+  Vector sol;
+  BitVector isConstrained;
+  Matrix stiffnessMatrix;
+
+  /////////////////////////////////////////////////////////
+  //  Assemble the system
+  /////////////////////////////////////////////////////////
+
+  std::cout << "Number of DOFs is " << basis.dimension() << std::endl;
+
+  auto dirichletIndicatorFunction = [](auto x) { return true; };
+
+  auto dirichletPatch = BoundaryPatch(basis.gridView());
+  dirichletPatch.insertFacesByProperty([&](auto&& intersection) { return dirichletIndicatorFunction(intersection.geometry().center()); });
+
+  auto rightHandSide = [] (const auto& x) { return 10;};
+  auto pi = std::acos(-1.0);
+  auto dirichletValuesC0 = [pi](const auto& x){
+    using std::sin;
+    return sin(2*pi*x[0]);
+  };
+
+  using SignatureTag = Dune::Functions::SignatureTag<double(Dune::FieldVector<double,2>)>;
+  auto dirichletValues = Dune::Fufem::makeAdolCFunction(SignatureTag(), dirichletValuesC0);
+
+// Disable parallel assembly for UGGrid before 2.10
+#if DUNE_VERSION_GT(DUNE_FUNCTIONS, 2, 9)
+  auto gridViewPartition = Dune::Fufem::coloredGridViewPartition(gridView);
+  auto globalAssembler = GlobalAssembler(basis, basis, 1, std::cref(gridViewPartition), threadCount);
+#else
+  auto globalAssembler = GlobalAssembler(basis, basis, 1);
+#endif
+
+  {
+    using namespace ::Dune::Fufem::Forms;
+    namespace F = ::Dune::Fufem::Forms;
+
+    auto v = testFunction(basis, NonAffineFamiliy{});
+    auto u = trialFunction(basis, NonAffineFamiliy{});
+    auto f = Coefficient(Functions::makeGridViewFunction(rightHandSide, gridView));
+
+    auto a = integrate(dot(grad(u), grad(v)));
+    auto b = integrate(f*v);
+
+    globalAssembler.assembleOperator(stiffnessMatrix, a);
+    globalAssembler.assembleFunctional(rhs, b);
+  }
+
+  // *********************************************
+  // Initialize solution vector
+  // *********************************************
+
+  Dune::Functions::istlVectorBackend(sol).resize(basis);
+  Dune::Functions::istlVectorBackend(sol) = 0;
+
+  // *********************************************
+  // Incorporate Dirichlet boundary conditions
+  // *********************************************
+
+  Dune::Functions::istlVectorBackend(isConstrained).resize(basis);
+  Dune::Functions::istlVectorBackend(isConstrained) = 0;
+
+  {
+    auto normalDerivativeOrder = [](const auto& functionalDescriptor, const auto& unitNormal) -> std::size_t
+    {
+      auto&& partialOrder = functionalDescriptor.partialDerivativeOrder();
+      for(auto i : Dune::range(unitNormal.size()))
+      {
+        if ((1-std::fabs(unitNormal[i]) < 1e-10))
+          return partialOrder[i] + functionalDescriptor.normalDerivativeOrder();
+      }
+      return functionalDescriptor.normalDerivativeOrder();
+    };
+    auto isConstrainedBackend = Dune::Functions::istlVectorBackend(isConstrained);
+    auto localView = basis.localView();
+    auto seDOFs = subEntityDOFs(basis);
+    for(auto&& intersection : dirichletPatch)
+    {
+      localView.bind(intersection.inside());
+      seDOFs.bind(localView, intersection);
+      Dune::TypeTree::forEachLeafNode(localView.tree(), [&](auto&& node, auto&& treePath) {
+        for(auto k : Dune::range(node.size()))
+        {
+          auto localIndex = node.localIndex(k);
+          if (seDOFs.contains(localIndex))
+          {
+            auto functionalDescriptor = node.finiteElement().localInterpolation().functionalDescriptor(k);
+            auto normal = intersection.centerUnitOuterNormal();
+            if (normalDerivativeOrder(functionalDescriptor, normal)==0)
+              isConstrainedBackend[localView.index(localIndex)] = true;
+          }
+        }
+      });
+    }
+  }
+
+  interpolate(basis, sol, dirichletValues, isConstrained);
+
+  incorporateEssentialConstraints(basis, stiffnessMatrix, rhs, isConstrained, sol);
+
+
+  ////////////////////////////
+  //   Compute solution
+  ////////////////////////////
+
+  // Technicality:  turn the matrix into a linear operator
+  MatrixAdapter<Matrix,Vector,Vector> op(stiffnessMatrix);
+
+  // Sequential incomplete LU decomposition as the preconditioner
+  SeqILDL<Matrix,Vector,Vector> ildl(stiffnessMatrix,1.0);
+
+  // Preconditioned conjugate-gradient solver
+  CGSolver<Vector> cg(op,
+                          ildl, // preconditioner
+                          1e-4, // desired residual reduction factor
+                          100,   // maximum number of iterations
+                          2);   // verbosity of the solver
+
+  // Object storing some statistics about the solving process
+  InverseOperatorResult statistics;
+
+  // Solve!
+  cg.apply(sol, rhs, statistics);
+
+  ////////////////////////////////////////////////////////////////////////////
+  //  Make a discrete function from the FE basis and the coefficient vector
+  ////////////////////////////////////////////////////////////////////////////
+
+  auto solFunction = Functions::makeDiscreteGlobalBasisFunction<double>(basis, sol);
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  //  Write result to VTK file
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+#if DUNE_VERSION_GT(DUNE_VTK, 2, 9)
+  using Dune::Vtk::LagrangeDataCollector;
+  using Dune::Vtk::UnstructuredGridWriter;
+  auto vtkWriter = UnstructuredGridWriter(LagrangeDataCollector(basis.gridView(), 3));
+  vtkWriter.addPointData(solFunction, VTK::FieldInfo("sol", VTK::FieldInfo::Type::scalar, 1));
+  vtkWriter.write("poisson-hermite");
+#else
+  // We need to use the SubsamplingVTKWriter, because dune-vtk's LagrangeDataCollector
+  // does not work with mixed grids in 2.9.
+  SubsamplingVTKWriter<GridView> vtkWriter(gridView, Dune::refinementLevels(2));
+  vtkWriter.addVertexData(solFunction, VTK::FieldInfo("sol", VTK::FieldInfo::Type::scalar, 1));
+  vtkWriter.write("poisson-hermite");
+#endif
+
+ }
+// Error handling
+ catch (Exception& e) {
+    std::cout << e.what() << std::endl;
+ }