diff --git a/dune/istl/solvers.hh b/dune/istl/solvers.hh index 148e16b536cd8b9cc97be10850ee8f3056f20c0c..9b6ab7a67fb42b6293e3a7e1996bbd55b931b5bb 100644 --- a/dune/istl/solvers.hh +++ b/dune/istl/solvers.hh @@ -841,127 +841,126 @@ namespace Dune { */ virtual void apply (X& x, X& b, InverseOperatorResult& res) { - res.clear(); // clear solver statistics - Timer watch; // start a timer - _prec.pre(x,b); // prepare preconditioner - _op.applyscaleadd(-1,x,b); // overwrite b with defect/residual + // clear solver statistics + res.clear(); + // start a timer + Dune::Timer watch; + watch.reset(); + // prepare preconditioner + _prec.pre(x,b); + // overwrite rhs with defect + _op.applyscaleadd(-1,x,b); - real_type def0 = _sp.norm(b); // compute residual norm + // compute residual norm + field_type def0 = _sp.norm(b); - if (def0<1E-30) // convergence check - { - res.converged = true; - res.iterations = 0; // fill statistics - res.reduction = 0; - res.conv_rate = 0; - res.elapsed=0; - if (_verbose>0) // final print - std::cout << "=== rate=" << res.conv_rate << ", T=" << res.elapsed << ", TIT=" << res.elapsed << ", IT=0" << std::endl; - return; - } - - if (_verbose>0) // printing - { + // printing + if(_verbose > 0) { std::cout << "=== MINRESSolver" << std::endl; - if (_verbose>1) { + if(_verbose > 1) { this->printHeader(std::cout); this->printOutput(std::cout,0,def0); } } - // some local variables - real_type def=def0; // the defect/residual norm - field_type alpha, // recurrence coefficients as computed in the Lanczos alg making up the matrix T - c[2]={0.0, 0.0}, // diagonal entry of Givens rotation - s[2]={0.0, 0.0}; // off-diagonal entries of Givens rotation - real_type beta; + // check for convergence + if(def0 < 1e-30 ) { + res.converged = true; + res.iterations = 0; + res.reduction = 0; + res.conv_rate = 0; + res.elapsed = 0.0; + // final print + if(_verbose > 0) + std::cout << "=== rate=" << res.conv_rate + << ", T=" << res.elapsed + << ", TIT=" << res.elapsed + << ", IT=" << res.iterations + << std::endl; + return; + } - field_type T[3]={0.0, 0.0, 0.0}; // recurrence coefficients (column k of Matrix T) + // the defect norm + field_type def = def0; + // recurrence coefficients as computed in Lanczos algorithm + field_type alpha, beta; + // diagonal entries of givens rotation + Dune::array<field_type,2> c{{0.0,0.0}}; + // off-diagonal entries of givens rotation + Dune::array<field_type,2> s{{0.0,0.0}}; - X z(b.size()), // some temporary vectors - dummy(b.size()); + // recurrence coefficients (column k of tridiag matrix T_k) + Dune::array<field_type,3> T{{0.0,0.0,0.0}}; - field_type xi[2]={1.0, 0.0}; + // the rhs vector of the min problem + Dune::array<field_type,2> xi{{1.0,0.0}}; - // initialize - z = 0.0; // clear correction + // some temporary vectors + X z(b), dummy(b); - _prec.apply(z,b); // apply preconditioner z=M^-1*b + // initialize and clear correction + z = 0.0; + _prec.apply(z,b); beta = std::sqrt(std::abs(_sp.dot(z,b))); - real_type beta0 = beta; - - X p[3]; // the search directions - X q[3]; // Orthonormal basis vectors (in unpreconditioned case) - - q[0].resize(b.size()); - q[1].resize(b.size()); - q[2].resize(b.size()); - q[0] = 0.0; - q[1] = b; - q[1] /= beta; - q[2] = 0.0; + field_type beta0 = beta; - p[0].resize(b.size()); - p[1].resize(b.size()); - p[2].resize(b.size()); + // the search directions + Dune::array<X,3> p{{b,b,b}}; p[0] = 0.0; p[1] = 0.0; p[2] = 0.0; + // orthonormal basis vectors (in unpreconditioned case) + Dune::array<X,3> q{{b,b,b}}; + q[0] = 0.0; + q[1] *= 1.0/beta; + q[2] = 0.0; - z /= beta; // this is w_current + z *= 1.0/beta; // the loop - int i=1; - for ( ; i<=_maxit; i++) - { - dummy = z; // remember z_old for the computation of the search direction p in the next iteration + int i = 1; + for( ; i<=_maxit; i++) { + dummy = z; int i1 = i%3, - i0 = (i1+2)%3, - i2 = (i1+1)%3; + i0 = (i1+2)%3, + i2 = (i1+1)%3; - // Symmetrically Preconditioned Lanczos (Greenbaum p.121) - _op.apply(z,q[i2]); // q[i2] = Az - q[i2].axpy(-beta, q[i0]); + // symmetrically preconditioned Lanczos algorithm (see Greenbaum p.121) + _op.apply(z,q[i2]); // q[i2] = Az + q[i2].axpy(-beta,q[i0]); alpha = _sp.dot(q[i2],z); - q[i2].axpy(-alpha, q[i1]); + q[i2].axpy(-alpha,q[i1]); - z=0.0; + z = 0.0; _prec.apply(z,q[i2]); beta = std::sqrt(std::abs(_sp.dot(q[i2],z))); - q[i2] /= beta; - z /= beta; + q[i2] *= 1.0/beta; + z *= 1.0/beta; // QR Factorization of recurrence coefficient matrix - // apply previous Givens rotations to last column of T + // apply previous givens rotations to last column of T T[1] = T[2]; - if (i>2) - { + if(i>2) { T[0] = s[i%2]*T[1]; T[1] = c[i%2]*T[1]; } - if (i>1) - { + if(i>1) { T[2] = c[(i+1)%2]*alpha - s[(i+1)%2]*T[1]; T[1] = c[(i+1)%2]*T[1] + s[(i+1)%2]*alpha; } else T[2] = alpha; - // recompute c, s -> current Givens rotation \TODO use BLAS-routine drotg instead for greater robustness - // cblas_drotg (a, b, c, s); - c[i%2] = 1.0/std::sqrt(T[2]*T[2] + beta*beta); - s[i%2] = beta*c[i%2]; - c[i%2] *= T[2]; - - // apply current Givens rotation to T eliminating the last entry... + // update QR factorization + generateGivensRotation(T[2],beta,c[i%2],s[i%2]); + // to last column of T_k T[2] = c[i%2]*T[2] + s[i%2]*beta; - - // ...and to xi, the right hand side of the least squares problem min_y||beta*xi-T*y|| + // and to the rhs xi of the min problem xi[i%2] = -s[i%2]*xi[(i+1)%2]; xi[(i+1)%2] *= c[i%2]; @@ -969,53 +968,46 @@ namespace Dune { p[i2] = dummy; p[i2].axpy(-T[1],p[i1]); p[i2].axpy(-T[0],p[i0]); - p[i2] /= T[2]; + p[i2] *= 1.0/T[2]; // apply correction/update solution - x.axpy(beta0*xi[(i+1)%2], p[i2]); + x.axpy(beta0*xi[(i+1)%2],p[i2]); // remember beta_old T[2] = beta; - // update residual - not necessary if in the preconditioned case we are content with the residual norm of the - // preconditioned system as convergence test - // _op.apply(p[i2],dummy); - // b.axpy(-beta0*xi[(i+1)%2],dummy); - - // convergence test - real_type defnew = std::abs(beta0*xi[i%2]); // the last entry the QR-transformed least squares RHS is the new residual norm + // check for convergence + // the last entry in the rhs of the min-problem is the residual + field_type defnew = std::abs(beta0*xi[i%2]); - if (_verbose>1) // print - this->printOutput(std::cout,i,defnew,def); + if(_verbose > 1) + this->printOutput(std::cout,i,defnew,def); - def = defnew; // update norm - if (def<def0*_reduction || def<1E-30 || i==_maxit) // convergence check - { - res.converged = true; - break; - } - } - - //correct i which is wrong if convergence was not achieved. - i=std::min(_maxit,i); + def = defnew; + if(def < def0*_reduction || def < 1e-30 || i == _maxit ) { + res.converged = true; + break; + } + } // end for - if (_verbose==1) // printing for non verbose - this->printOutput(std::cout,i,def); + if(_verbose == 1) + this->printOutput(std::cout,i,def); - _prec.post(x); // postprocess preconditioner - res.iterations = i; // fill statistics - res.reduction = def/def0; - res.conv_rate = pow(res.reduction,1.0/i); - res.elapsed = watch.elapsed(); - - if (_verbose>0) // final print - { - std::cout << "=== rate=" << res.conv_rate - << ", T=" << res.elapsed - << ", TIT=" << res.elapsed/i - << ", IT=" << i << std::endl; - } + // postprocess preconditioner + _prec.post(x); + // fill statistics + res.iterations = i; + res.reduction = def/def0; + res.conv_rate = pow(res.reduction,1.0/i); + res.elapsed = watch.elapsed(); + // final print + if(_verbose > 0) { + std::cout << "=== rate=" << res.conv_rate + << ", T=" << res.elapsed + << ", TIT=" << res.elapsed/i + << ", IT=" << i << std::endl; + } } /*! @@ -1031,6 +1023,25 @@ namespace Dune { } private: + + void generateGivensRotation(field_type& dx, field_type& dy, field_type& cs, field_type& sn) + { + field_type temp = std::abs(dy); + if(temp < 1e-16) { + cs = 1.0; + sn = 0.0; + } + else if(temp > std::abs(dx)) { + temp = dx/dy; + sn = 1.0/std::sqrt(1.0 + temp*temp); + cs = temp * sn; + } else { + temp = dy/dx; + cs = 1.0/std::sqrt(1.0 + temp*temp); + sn = temp * cs; + } + } + SeqScalarProduct<X> ssp; LinearOperator<X,X>& _op; Preconditioner<X,X>& _prec; @@ -1047,7 +1058,9 @@ namespace Dune { Generalized Minimal Residual method as described the SIAM Templates book (http://www.netlib.org/templates/templates.pdf). - \todo construct F via rebind and an appropriate field_type + \tparam X trial vector, vector type of the solution + \tparam Y test vector, vector type of the RHS + \tparam F vector type for orthonormal basis of Krylov space */ @@ -1066,19 +1079,36 @@ namespace Dune { //! \brief The field type of the basis vectors typedef F basis_type; + template<class L, class P> + DUNE_DEPRECATED_MSG("recalc_defect is a unused parameter! Use RestartedGMResSolver(L& op, P& prec, real_type reduction, int restart, int maxit, int verbose) instead") + RestartedGMResSolver (L& op, P& prec, real_type reduction, int restart, int maxit, int verbose, bool recalc_defect) + : _A(op) + , _W(prec) + , ssp() + , _sp(ssp) + , _restart(restart) + , _reduction(reduction) + , _maxit(maxit) + , _verbose(verbose) + { + static_assert(static_cast<int>(P::category) == static_cast<int>(L::category), + "P and L must be the same category!"); + static_assert(static_cast<int>(L::category) == static_cast<int>(SolverCategory::sequential), + "L must be sequential!"); + } + + /*! \brief Set up solver. \copydoc LoopSolver::LoopSolver(L&,P&,double,int,int) \param restart number of GMRes cycles before restart - \param recalc_defect recalculate the defect after everey restart or not [default=false] */ template<class L, class P> - RestartedGMResSolver (L& op, P& prec, double reduction, int restart, int maxit, int verbose, bool recalc_defect = false) : - _A_(op), _M(prec), + RestartedGMResSolver (L& op, P& prec, real_type reduction, int restart, int maxit, int verbose) : + _A(op), _W(prec), ssp(), _sp(ssp), _restart(restart), - _reduction(reduction), _maxit(maxit), _verbose(verbose), - _recalc_defect(recalc_defect) + _reduction(reduction), _maxit(maxit), _verbose(verbose) { static_assert(static_cast<int>(P::category) == static_cast<int>(L::category), "P and L must be the same category!"); @@ -1086,19 +1116,34 @@ namespace Dune { "L must be sequential!"); } + template<class L, class S, class P> + DUNE_DEPRECATED_MSG("recalc_defect is a unused parameter! Use RestartedGMResSolver(L& op, S& sp, P& prec, real_type reduction, int restart, int maxit, int verbose) instead") + RestartedGMResSolver(L& op, S& sp, P& prec, real_type reduction, int restart, int maxit, int verbose, bool recalc_defect) + : _A(op) + , _W(prec) + , _sp(sp) + , _restart(restart) + , _reduction(reduction) + , _maxit(maxit) + , _verbose(verbose) + { + static_assert(static_cast<int>(P::category) == static_cast<int>(L::category), + " P and L must have the same category!"); + static_assert(static_cast<int>(P::category) == static_cast<int>(S::category), + "P and S must have the same category!"); + } + /*! \brief Set up solver. \copydoc LoopSolver::LoopSolver(L&,S&,P&,double,int,int) \param restart number of GMRes cycles before restart - \param recalc_defect recalculate the defect after everey restart or not [default=false] */ template<class L, class S, class P> - RestartedGMResSolver (L& op, S& sp, P& prec, double reduction, int restart, int maxit, int verbose, bool recalc_defect = false) : - _A_(op), _M(prec), + RestartedGMResSolver (L& op, S& sp, P& prec, real_type reduction, int restart, int maxit, int verbose) : + _A(op), _W(prec), _sp(sp), _restart(restart), - _reduction(reduction), _maxit(maxit), _verbose(verbose), - _recalc_defect(recalc_defect) + _reduction(reduction), _maxit(maxit), _verbose(verbose) { static_assert(static_cast<int>(P::category) == static_cast<int>(L::category), "P and L must have the same category!"); @@ -1107,7 +1152,7 @@ namespace Dune { } //! \copydoc InverseOperator::apply(X&,Y&,InverseOperatorResult&) - virtual void apply (X& x, X& b, InverseOperatorResult& res) + virtual void apply (X& x, Y& b, InverseOperatorResult& res) { apply(x,b,_reduction,res); } @@ -1119,209 +1164,189 @@ namespace Dune { */ virtual void apply (X& x, Y& b, double reduction, InverseOperatorResult& res) { - int m =_restart; - real_type norm; - real_type norm_old = 0.0; - real_type norm_0; - real_type beta; - int i, j = 1, k; + const double EPSILON = 1e-80; + const int m = _restart; + real_type norm, norm_old = 0.0, norm_0; + int j = 1; std::vector<field_type> s(m+1), cs(m), sn(m); + // need copy of rhs if GMRes has to be restarted + Y b2(b); // helper vector - X w(b); + Y w(b); std::vector< std::vector<field_type> > H(m+1,s); std::vector<F> v(m+1,b); // start timer - Timer watch; // start a timer + Dune::Timer watch; + watch.reset(); - // clear solver statistics + // clear solver statistics and set res.converged to false res.clear(); - _M.pre(x,b); - if (_recalc_defect) - { - // norm_0 = norm(M^-1 b) - w = 0.0; _M.apply(w,b); // w = M^-1 b - norm_0 = _sp.norm(w); // use defect of preconditioned residual - // r = _M.solve(b - A * x); - w = b; - _A_.applyscaleadd(-1,x, /* => */ w); // w = b - Ax; - v[0] = 0.0; _M.apply(v[0],w); // r = M^-1 w - beta = _sp.norm(v[0]); - } - else - { - // norm_0 = norm(b-Ax) - _A_.applyscaleadd(-1,x, /* => */ b); // b = b - Ax; - v[0] = 0.0; _M.apply(v[0],b); // r = M^-1 b - beta = _sp.norm(v[0]); - norm_0 = beta; // use defect of preconditioned residual - } + _W.pre(x,b); - // avoid division by zero - if (norm_0 == 0.0) - norm_0 = 1.0; - norm = norm_old = beta; + // calculate defect and overwrite rhs with it + _A.applyscaleadd(-1.0,x,b); // b -= Ax + // calculate preconditioned defect + v[0] = 0.0; _W.apply(v[0],b); // r = W^-1 b + norm_0 = _sp.norm(v[0]); + norm = norm_0; + norm_old = norm; // print header - if (_verbose > 0) - { - std::cout << "=== RestartedGMResSolver" << std::endl; - if (_verbose > 1) + if(_verbose > 0) { - this->printHeader(std::cout); - this->printOutput(std::cout,0,norm_0); + std::cout << "=== RestartedGMResSolver" << std::endl; + if(_verbose > 1) { + this->printHeader(std::cout); + this->printOutput(std::cout,0,norm_0); + } } - } - // check convergence - if (norm <= reduction * norm_0) { - _M.post(x); // postprocess preconditioner - res.converged = true; - if (_verbose > 0) // final print + if(norm_0 < EPSILON) { + _W.post(x); + res.converged = true; + if(_verbose > 0) // final print print_result(res); - return; } - while (j <= _maxit && res.converged != true) { - v[0] *= (1.0 / beta); - for (i=1; i<=m; i++) s[i] = 0.0; - s[0] = beta; - int end=std::min(m, _maxit-j+1); - for (i = 0; i < end && res.converged != true; i++, j++) { + while(j <= _maxit && res.converged != true) { + + int i = 0; + v[0] *= 1.0/norm; + s[0] = norm; + for(i=1; i<m+1; i++) + s[i] = 0.0; + + for(i=0; i < m && j <= _maxit && res.converged != true; i++, j++) { w = 0.0; - v[i+1] = 0.0; // use v[i+1] as temporary vector - _A_.apply(v[i], /* => */ v[i+1]); - _M.apply(w, v[i+1]); - for (k = 0; k <= i; k++) { - H[k][i] = _sp.dot(w, v[k]); - // w -= H[k][i] * v[k]; - w.axpy(-H[k][i], v[k]); + // use v[i+1] as temporary vector + v[i+1] = 0.0; + // do Arnoldi algorithm + _A.apply(v[i],v[i+1]); + _W.apply(w,v[i+1]); + for(int k=0; k<i+1; k++) { + H[k][i] = _sp.dot(w,v[k]); + // w -= H[k][i] * v[k] + w.axpy(-H[k][i],v[k]); } H[i+1][i] = _sp.norm(w); - if (H[i+1][i] == 0.0) - DUNE_THROW(ISTLError,"breakdown in GMRes - |w| " - << " == 0.0 after " << j << " iterations"); - // v[i+1] = w * (1.0 / H[i+1][i]); - v[i+1] = w; v[i+1] *= (1.0 / H[i+1][i]); + if(std::abs(H[i+1][i]) < EPSILON) + DUNE_THROW(ISTLError, + "breakdown in GMRes - |w| == 0.0 after " << j << " iterations"); - for (k = 0; k < i; k++) - applyPlaneRotation(H[k][i], H[k+1][i], cs[k], sn[k]); + // normalize new vector + v[i+1] = w; v[i+1] *= 1.0/H[i+1][i]; - generatePlaneRotation(H[i][i], H[i+1][i], cs[i], sn[i]); - applyPlaneRotation(H[i][i], H[i+1][i], cs[i], sn[i]); - applyPlaneRotation(s[i], s[i+1], cs[i], sn[i]); + // update QR factorization + for(int k=0; k<i; k++) + applyPlaneRotation(H[k][i],H[k+1][i],cs[k],sn[k]); + // compute new givens rotation + generatePlaneRotation(H[i][i],H[i+1][i],cs[i],sn[i]); + // finish updating QR factorization + applyPlaneRotation(H[i][i],H[i+1][i],cs[i],sn[i]); + applyPlaneRotation(s[i],s[i+1],cs[i],sn[i]); + + // norm of the defect is the last component the vector s norm = std::abs(s[i+1]); - if (_verbose > 1) // print - { + // print current iteration statistics + if(_verbose > 1) { this->printOutput(std::cout,j,norm,norm_old); } norm_old = norm; - if (norm < reduction * norm_0) { + // check convergence + if(norm < reduction * norm_0) res.converged = true; - } - } - if (_recalc_defect) - { - // update x - update(x, i - 1, H, s, v); - - // update residuum - // r = M^-1 (b - A * x); - w = b; _A_.applyscaleadd(-1,x, /* => */ w); - _M.apply(v[0], w); - beta = _sp.norm(v[0]); - norm = beta; - } - else - { - // calc update vector - w = 0; - update(w, i - 1, H, s, v); - - // update x - x += w; - - // r = M^-1 (b - A * x); - // update defect - _A_.applyscaleadd(-1,w, /* => */ b); - // r = M^-1 (b - A * x); - v[0] = 0.0; _M.apply(v[0],b); // r = M^-1 b - beta = _sp.norm(v[0]); - norm = beta; - - res.converged = false; - } - - norm_old = norm; - - if (norm < reduction * norm_0) { - // fill statistics - res.converged = true; + } // end for + + // calculate update vector + w = 0.0; + update(w,i,H,s,v); + // and current iterate + x += w; + + // restart GMRes if convergence was not achieved, + // i.e. linear defect has not reached desired reduction + // and if j < _maxit + if( res.converged != true && j <= _maxit ) { + + if(_verbose > 0) + std::cout << "=== GMRes::restart" << std::endl; + // get saved rhs + b = b2; + // calculate new defect + _A.applyscaleadd(-1.0,x,b); // b -= Ax; + // calculate preconditioned defect + v[0] = 0.0; + _W.apply(v[0],b); + norm = _sp.norm(v[0]); + norm_old = norm; } - if (res.converged != true && _verbose > 0) - std::cout << "=== GMRes::restart\n"; - } + } //end while - _M.post(x); // postprocess preconditioner + // postprocess preconditioner + _W.post(x); - res.iterations = j; - res.reduction = norm / norm_0; - res.conv_rate = pow(res.reduction,1.0/j); + // save solver statistics + res.iterations = j-1; // it has to be j-1!!! + res.reduction = norm/norm_0; + res.conv_rate = pow(res.reduction,1.0/(j-1)); res.elapsed = watch.elapsed(); - if (_verbose>0) + if(_verbose>0) print_result(res); + } - private: - void - print_result (const InverseOperatorResult & res) const - { - int j = res.iterations>0 ? res.iterations : 1; + private : + + void print_result(const InverseOperatorResult& res) const { + int k = res.iterations>0 ? res.iterations : 1; std::cout << "=== rate=" << res.conv_rate << ", T=" << res.elapsed - << ", TIT=" << res.elapsed/j + << ", TIT=" << res.elapsed/k << ", IT=" << res.iterations << std::endl; } - static void - update(X &x, int k, - const std::vector< std::vector<field_type> > & h, - const std::vector<field_type> & s, const std::vector<F> &v) - { + void update(X& w, int i, + const std::vector<std::vector<field_type> >& H, + const std::vector<field_type>& s, + const std::vector<X>& v) { + // solution vector of the upper triangular system std::vector<field_type> y(s); - // Backsolve: - for (int i = k; i >= 0; i--) { - y[i] /= h[i][i]; - for (int j = i - 1; j >= 0; j--) - y[j] -= h[j][i] * y[i]; - } + // backsolve + for(int a=i-1; a>=0; a--) { + field_type rhs(s[a]); + for(int b=a+1; b<i; b++) + rhs -= H[a][b]*y[b]; + y[a] = rhs/H[a][a]; - for (int j = 0; j <= k; j++) - // x += v[j] * y[j]; - x.axpy(y[j],v[j]); + // compute update on the fly + // w += y[a]*v[a] + w.axpy(y[a],v[a]); + } } void generatePlaneRotation(field_type &dx, field_type &dy, field_type &cs, field_type &sn) { - if (dy == 0.0) { + field_type temp = std::abs(dy); + if (std::abs(dy) < 1e-15 ) { cs = 1.0; sn = 0.0; - } else if (std::abs(dy) > std::abs(dx)) { - field_type temp = dx / dy; + } else if (temp > std::abs(dx)) { + temp = dx / dy; sn = 1.0 / std::sqrt( 1.0 + temp*temp ); cs = temp * sn; } else { - field_type temp = dy / dx; + temp = dy / dx; cs = 1.0 / std::sqrt( 1.0 + temp*temp ); sn = temp * cs; } @@ -1336,15 +1361,14 @@ namespace Dune { dx = temp; } - LinearOperator<X,X>& _A_; - Preconditioner<X,X>& _M; + LinearOperator<X,Y>& _A; + Preconditioner<X,Y>& _W; SeqScalarProduct<X> ssp; ScalarProduct<X>& _sp; int _restart; - double _reduction; + real_type _reduction; int _maxit; int _verbose; - bool _recalc_defect; };