ROL
ROL_LinMore.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Rapid Optimization Library (ROL) Package
4//
5// Copyright 2014 NTESS and the ROL contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef ROL_LINMORE_H
11#define ROL_LINMORE_H
12
17#include "ROL_TrustRegion.hpp"
18#include "ROL_LinMoreModel.hpp"
19#include "ROL_Elementwise_Function.hpp"
20#include "ROL_Elementwise_Reduce.hpp"
21
22namespace ROL {
23
24template<class Real>
25class LinMore : public TrustRegion<Real> {
26private:
27
28 Ptr<Vector<Real>> x_, s_, g_;
29 Ptr<Vector<Real>> pwa1_, pwa2_, dwa1_, dwa2_;
30
32 int maxit_;
33
34 unsigned verbosity_;
35
36 class LowerBreakPoint : public Elementwise::BinaryFunction<Real> {
37 public:
38 Real apply( const Real &x, const Real &y) const {
39 const Real zero(0), one(1);
40 return (x > zero && y < zero) ? -x/y : -one;
41 }
43
44 class UpperBreakPoint : public Elementwise::BinaryFunction<Real> {
45 public:
46 Real apply( const Real &x, const Real &y) const {
47 const Real zero(0), one(1);
48 return (x > zero && y > zero) ? x/y : -one;
49 }
51
52 class PositiveMin : public Elementwise::ReductionOp<Real> {
53 public:
54 void reduce(const Real &input, Real &output) const {
55 const Real zero(0);
56 output = (input<output && input>zero) ? input : output;
57 }
58 void reduce( const volatile Real &input, Real volatile &output ) const {
59 const Real zero(0);
60 output = (input<output && input>zero) ? input : output;
61 }
62 Real initialValue() const {
63 return ROL_INF<Real>();
64 }
65 Elementwise::EReductionType reductionType() const {
66 return Elementwise::REDUCE_MIN;
67 }
69
70 class PositiveMax : public Elementwise::ReductionOp<Real> {
71 public:
72 void reduce(const Real &input, Real &output) const {
73 const Real zero(0);
74 output = (input>output && input>zero) ? input : output;
75 }
76 void reduce( const volatile Real &input, Real volatile &output ) const {
77 const Real zero(0);
78 output = (input>output && input>zero) ? input : output;
79 }
80 Real initialValue() const {
81 return static_cast<Real>(0);
82 }
83 Elementwise::EReductionType reductionType() const {
84 return Elementwise::REDUCE_MAX;
85 }
87
88public:
89
90 // Constructor
91 LinMore( ROL::ParameterList &parlist ) : TrustRegion<Real>(parlist), alpha_(1) {
92 // Unravel Parameter List
93 Real em4(1e-4), em2(1e-2);
94 maxit_ = parlist.sublist("General").sublist("Krylov").get("Iteration Limit",20);
95 tol1_ = parlist.sublist("General").sublist("Krylov").get("Absolute Tolerance",em4);
96 tol2_ = parlist.sublist("General").sublist("Krylov").get("Relative Tolerance",em2);
97 // Get verbosity level
98 verbosity_ = parlist.sublist("General").get("Print Verbosity", 0);
99 }
100
101 void initialize( const Vector<Real> &x, const Vector<Real> &s, const Vector<Real> &g) {
103 x_ = x.clone(); s_ = x.clone(); g_ = g.clone();
104 pwa1_ = x.clone(); pwa2_ = x.clone();
105 dwa1_ = g.clone(); dwa2_ = g.clone();
106 }
107
109 Real &snorm,
110 int &iflag,
111 int &iter,
112 const Real del,
113 TrustRegionModel<Real> &model ) {
114 const Real zero(0), half(0.5), one(1);
115 Real tol0 = std::sqrt(ROL_EPSILON<Real>());
116 Real gfnorm(0), gfnormf(0), tol(0), stol(0);
117 int dim = s.dimension();
118 // Compute Cauchy point (TRON notation: x_ = x[1])
119 snorm = dcauchy(*s_,alpha_,*model.getIterate(),model.getGradient()->dual(),
120 del,model,*pwa1_,*pwa2_,*dwa1_); // Solve 1D optimization problem for alpha_
121 x_->set(*model.getIterate()); // TRON notation: model.getIterate() = x[0]
122 x_->plus(*s_); // Set x_ = x[0] + alpha_*g
123 model.getBoundConstraint()->project(*x_); // Project x_ onto bounds
124
125 // Model gradient at s = x[1] - x[0]
126 s.set(*x_); s.axpy(-one,*model.getIterate()); // s_ = x[i+1]-x[0]
127 dynamic_cast<LinMoreModel<Real>&>(model).applyFullHessian(*g_,s,tol0);
128 g_->plus(*model.getGradient());
129 model.getBoundConstraint()->pruneActive(*g_,*x_,zero);
130 gfnorm = g_->norm();
131 if (verbosity_ > 0) {
132 std::cout << std::endl;
133 std::cout << " Computation of Cauchy point" << std::endl;
134 std::cout << " Step length (alpha): " << alpha_ << std::endl;
135 std::cout << " Step length (alpha*g): " << snorm << std::endl;
136 std::cout << " Norm of Cauchy point: " << x_->norm() << std::endl;
137 std::cout << " Norm of free gradient components: " << gfnorm << std::endl;
138 }
139
140 // Main step computation loop
141 // There are at most dim iterations since at least one
142 // face becomes active at each iteration
143 iter = 0;
144 for (int i = 0; i < dim; ++i) {
145 // Run Truncated CG
146 int flagCG = 0, iterCG = 0;
147 tol = tol2_*gfnorm;
148 stol = zero;
149 snorm = dtrpcg(*s_,flagCG,iterCG,*g_,*x_,del,model,
150 tol,stol,maxit_,
151 *pwa1_,*dwa1_,*pwa2_,*dwa2_);
152 iter += iterCG;
153 if (verbosity_ > 0) {
154 std::cout << std::endl;
155 std::cout << " Computation of CG step" << std::endl;
156 std::cout << " Number of faces: " << dim << std::endl;
157 std::cout << " Current face (i): " << i << std::endl;
158 std::cout << " CG step length: " << snorm << std::endl;
159 std::cout << " Number of CG iterations: " << iterCG << std::endl;
160 std::cout << " CG flag: " << flagCG << std::endl;
161 std::cout << " Total number of iterations: " << iter << std::endl;
162 }
163
164 // Projected search
165 snorm = dprsrch(*x_,*s_,g_->dual(),model,*pwa1_,*dwa1_);
166 if (verbosity_ > 0) {
167 std::cout << " Step length (beta*s): " << snorm << std::endl;
168 std::cout << " Iterate length: " << x_->norm() << std::endl;
169 }
170
171 // Model gradient at s = x[i+1] - x[0]
172 s.set(*x_); s.axpy(-one,*model.getIterate()); // s_ = x[i+1]-x[0]
173 dynamic_cast<LinMoreModel<Real>&>(model).applyFullHessian(*g_,s,tol0);
174 g_->plus(*model.getGradient());
175 model.getBoundConstraint()->pruneActive(*g_,*x_,zero);
176 gfnormf = g_->norm();
177 if (verbosity_ > 0) {
178 std::cout << std::endl;
179 std::cout << " Update model gradient" << std::endl;
180 std::cout << " Step length: " << s.norm() << std::endl;
181 std::cout << " Norm of free gradient components: " << gfnormf << std::endl;
182 std::cout << std::endl;
183 }
184
185 // Termination check
186 if (gfnormf <= tol2_*gfnorm) {
187 iflag = 0;
188 break;
189 }
190 else if (iter >= maxit_) {
191 iflag = 1;
192 break;
193 }
194 else if (flagCG == 2) {
195 iflag = 2;
196 break;
197 }
198 else if (flagCG == 3) {
199 iflag = 3;
200 break;
201 }
202
203 // Update free gradient norm
204 gfnorm = gfnormf;
205 }
206 // Update norm of step and update model predicted reduction
207 snorm = s.norm();
208 Real gs(0), q(0);
209 dynamic_cast<LinMoreModel<Real>&>(model).applyFullHessian(*dwa1_,s,tol0);
210 gs = s.dot(model.getGradient()->dual());
211 q = half * s.dot(dwa1_->dual()) + gs;
213 }
214
215private:
216
217 // Compute the projected step s = P(x + alpha*w) - x
218 // Returns the norm of the projected step s
219 // s -- The projected step upon return
220 // w -- The direction vector w (unchanged)
221 // x -- The anchor vector x (unchanged)
222 // alpha -- The step size (unchanged)
223 // model -- Contains the bound constraint information
225 const Vector<Real> &x, const Real alpha,
226 TrustRegionModel<Real> &model) const {
227 s.set(x); s.axpy(alpha,w);
228 model.getBoundConstraint()->project(s);
229 s.axpy(static_cast<Real>(-1),x);
230 return s.norm();
231 }
232
233 // Compute minimal and maximal break points of x+alpha*s
234 // with in the interval [xl,xu] specified by the bound constraint
235 // x -- The anchor vector x (unchanged)
236 // s -- The descent vector s (unchanged)
237 // model -- Contains the bound constraint information
238 // bpmin -- The minimum break point
239 // bpmax -- The maximum break point
240 // pwa -- A primal working vector
241 void dbreakpt(const Vector<Real> &x, const Vector<Real> &s,
243 Real &bpmin, Real &bpmax,
244 Vector<Real> &pwa) {
245 const Real zero(0), one(1);
246 bpmin = one; bpmax = zero;
247 Real lbpmin = one, lbpmax = zero, ubpmin = one, ubpmax = zero;
248 // Compute lower break points
249 if (model.getBoundConstraint()->isLowerActivated()) {
250 pwa.set(x);
251 pwa.axpy(-one,*model.getBoundConstraint()->getLowerBound());
252 pwa.applyBinary(lbp_,s);
253 if (pwa.norm() != zero) {
254 lbpmin = pwa.reduce(pmin_);
255 lbpmax = pwa.reduce(pmax_);
256 }
257 }
258 // Compute upper break points
259 if (model.getBoundConstraint()->isUpperActivated()) {
260 pwa.set(*model.getBoundConstraint()->getUpperBound());
261 pwa.axpy(-one,x);
262 pwa.applyBinary(ubp_,s);
263 if (pwa.norm() != zero) {
264 ubpmin = pwa.reduce(pmin_);
265 ubpmax = pwa.reduce(pmax_);
266 }
267 }
268 bpmin = std::min(lbpmin,ubpmin);
269 bpmax = std::max(lbpmax,ubpmax);
270 if (bpmin > bpmax) {
271 bpmin = zero;
272 bpmax = zero;
273 }
274 if (verbosity_ > 0) {
275 std::cout << std::endl;
276 std::cout << " Computation of break points" << std::endl;
277 std::cout << " Minimum break point: " << bpmin << std::endl;
278 std::cout << " Maximum break point: " << bpmax << std::endl;
279 }
280 }
281
282 // Compute Cauchy point, i.e., the minimizer of q(P(x - alpha*g)-x)
283 // subject to the trust region constraint ||P(x - alpha*g)-x|| <= del
284 // s -- The Cauchy point upon return
285 // alpha -- The step length for the Cauchy point upon return
286 // x -- The anchor vector x (unchanged)
287 // g -- The (dual) gradient vector g (unchanged)
288 // del -- The trust region radius (unchanged)
289 // model -- Contains the objective and bound constraint information
290 // pwa1 -- Primal working array
291 // pwa2 -- Primal working array
292 // dwa -- Dual working array
293 Real dcauchy(Vector<Real> &s, Real &alpha,
294 const Vector<Real> &x, const Vector<Real> &g,
295 const Real del, TrustRegionModel<Real> &model,
296 Vector<Real> &pwa1, Vector<Real> &pwa2, Vector<Real> &dwa) {
297 const Real half(0.5), one(1), mu0(0.01), interpf(0.1), extrapf(10);
298 // const Real zero(0); // Unused
299 Real tol = std::sqrt(ROL_EPSILON<Real>());
300 bool interp = false;
301 Real q(0), gs(0), bpmin(0), bpmax(0), snorm(0);
302 // Compute minimal and maximal break points of x[0] - alpha g[0]
303 pwa1.set(g); pwa1.scale(-one);
304 dbreakpt(x,pwa1,model,bpmin,bpmax,pwa2);
305 // Compute s = P(x[0] - alpha g[0].dual())
306 snorm = dgpstep(s,g,x,-alpha,model);
307 if (snorm > del) {
308 interp = true;
309 }
310 else {
311 dynamic_cast<LinMoreModel<Real>&>(model).applyFullHessian(dwa,s,tol);
312 gs = s.dot(g);
313 q = half * s.dot(dwa.dual()) + gs;
314 interp = (q > mu0*gs);
315 }
316 // Either increase or decrease alpha to find approximate Cauchy point
317 if (interp) {
318 bool search = true;
319 while (search) {
320 alpha *= interpf;
321 snorm = dgpstep(s,g,x,-alpha,model);
322 if (snorm <= del) {
323 dynamic_cast<LinMoreModel<Real>&>(model).applyFullHessian(dwa,s,tol);
324 gs = s.dot(g);
325 q = half * s.dot(dwa.dual()) + gs;
326 search = (q > mu0*gs);
327 }
328 }
329 }
330 else {
331 bool search = true;
332 Real alphas = alpha;
333 while (search && alpha <= bpmax) {
334 alpha *= extrapf;
335 snorm = dgpstep(s,g,x,-alpha,model);
336 if (snorm <= del) {
337 dynamic_cast<LinMoreModel<Real>&>(model).applyFullHessian(dwa,s,tol);
338 gs = s.dot(g);
339 q = half * s.dot(dwa.dual()) + gs;
340 if (q < mu0*gs) {
341 search = true;
342 alphas = alpha;
343 }
344 }
345 else {
346 search = false;
347 }
348 }
349 alpha = alphas;
350 snorm = dgpstep(s,g,x,-alpha,model);
351 }
352 return snorm;
353 }
354
355 // Perform projected search to determine beta such that
356 // q(P(x + beta*s)-x) <= mu0*g'(P(x + beta*s)-x) for mu0 in (0,1)
357 // x -- The anchor vector x, upon return x = P(x + beta*s)
358 // s -- The direction vector s, upon return s = P(x + beta*s) - x
359 // g -- The free components of the gradient vector g (unchanged)
360 // model -- Contains objective and bound constraint information
361 // pwa -- Primal working array
362 // dwa -- Dual working array
364 const Vector<Real> &g, TrustRegionModel<Real> &model,
365 Vector<Real> &pwa, Vector<Real> &dwa) {
366 const Real half(0.5), one(1), mu0(0.01), interpf(0.5);
367 Real tol = std::sqrt(ROL_EPSILON<Real>());
368 Real beta(1), snorm(0), q(0), gs(0), bpmin(0), bpmax(0);
369 int nsteps = 0;
370 // Compute break points of x+beta*s;
371 dbreakpt(x,s,model,bpmin,bpmax,pwa);
372 // Reduce beta until sufficient decrease is satisfied
373 bool search = true;
374 while (search && beta > bpmin) {
375 nsteps++;
376 snorm = dgpstep(pwa,s,x,beta,model);
377 dynamic_cast<LinMoreModel<Real>&>(model).applyFreeHessian(dwa,pwa,x,tol);
378 gs = pwa.dot(g);
379 q = half * s.dot(dwa.dual()) + gs;
380 if (q <= mu0*gs) {
381 search = false;
382 }
383 else {
384 beta *= interpf;
385 }
386 }
387 if (beta < one && beta < bpmin) {
388 beta = bpmin;
389 }
390 snorm = dgpstep(pwa,s,x,beta,model);
391 s.set(pwa);
392 x.plus(s);
393 if (verbosity_ > 0) {
394 std::cout << std::endl;
395 std::cout << " Projected search" << std::endl;
396 std::cout << " Step length (beta): " << beta << std::endl;
397 }
398 return snorm;
399 }
400
401 // Compute sigma such that ||x+sigma*p||_inv(M) = del. This is called
402 // if dtrpcg detects negative curvature or if the step violates
403 // the trust region bound
404 // xtx -- The dot product <x, inv(M)x> (unchanged)
405 // ptp -- The dot product <p, inv(M)p> (unchanged)
406 // ptx -- The dot product <p, inv(M)x> (unchanged)
407 // del -- Trust region radius (unchanged)
408 Real dtrqsol(const Real xtx, const Real ptp, const Real ptx, const Real del) {
409 const Real zero(0);
410 Real dsq = del*del;
411 Real rad = ptx*ptx + ptp*(dsq-xtx);
412 rad = std::sqrt(std::max(rad,zero));
413 Real sigma(0);
414 if (ptx > zero) {
415 sigma = (dsq-xtx)/(ptx+rad);
416 }
417 else if (rad > zero) {
418 sigma = (rad-ptx)/ptp;
419 }
420 else {
421 sigma = zero;
422 }
423 return sigma;
424 }
425
426 // Solve the trust region subproblem: minimize q(w) subject to the
427 // trust region constraint ||w||_inv(M) <= del using the Steihaug-Toint
428 // Conjugate Gradients algorithm
429 // w -- The step vector to be computed
430 // iflag -- Termination flag
431 // iter -- Number of CG iterations
432 // del -- Trust region radius (unchanged)
433 // model -- Contains the objective and bound constraint information
434 // tol -- Residual stopping tolerance (unchanged)
435 // stol -- Preconditioned residual stopping tolerance (unchanged)
436 // itermax -- Maximum number of iterations
437 // p -- Primal working array that stores the CG step
438 // q -- Dual working array that stores the Hessian applied to p
439 // r -- Primal working array that stores the preconditioned residual
440 // t -- Dual working array that stores the residual
441 Real dtrpcg(Vector<Real> &w, int &iflag, int &iter,
442 const Vector<Real> &g, const Vector<Real> &x,
443 const Real del, TrustRegionModel<Real> &model,
444 const Real tol, const Real stol, const int itermax,
446 Vector<Real> &t) {
447 Real tol0 = std::sqrt(ROL_EPSILON<Real>());
448 const Real zero(0), one(1), two(2);
449 Real rho(0), tnorm(0), rnorm(0), rnorm0(0), kappa(0), beta(0), sigma(0), alpha(0), rtr(0);
450 Real sMs(0), pMp(0), sMp(0);
451 iter = 0; iflag = 0;
452 // Initialize step
453 w.zero();
454 // Compute residual
455 t.set(g); t.scale(-one);
456 // Preconditioned residual
457 dynamic_cast<LinMoreModel<Real>&>(model).applyFreePrecond(r,t,x,tol0);
458 rho = r.dot(t.dual());
459 rnorm0 = std::sqrt(rho);
460 if ( rnorm0 == zero ) {
461 return zero;
462 }
463 // Initialize direction
464 p.set(r);
465 pMp = rho;
466 // Iterate CG
467 for (iter = 0; iter < itermax; ++iter) {
468 // Apply Hessian to direction dir
469 dynamic_cast<LinMoreModel<Real>&>(model).applyFreeHessian(q,p,x,tol0);
470 // Compute sigma such that ||s+sigma*dir|| = del
471 kappa = p.dot(q.dual());
472 alpha = (kappa>zero) ? rho/kappa : zero;
473 sigma = dtrqsol(sMs,pMp,sMp,del);
474 // Check for negative curvature or if iterate exceeds trust region
475 if (kappa <= zero || alpha >= sigma) {
476 w.axpy(sigma,p);
477 iflag = (kappa<=zero) ? 2 : 3;
478 break;
479 }
480 // Update iterate and residuals
481 w.axpy(alpha,p);
482 t.axpy(-alpha,q);
483 dynamic_cast<LinMoreModel<Real>&>(model).applyFreePrecond(r,t,x,tol0);
484 // Exit if residual tolerance is met
485 rtr = r.dot(t.dual());
486 rnorm = std::sqrt(rtr);
487 tnorm = t.norm();
488 if (rnorm <= stol || tnorm <= tol) {
489 iflag = 0;
490 break;
491 }
492 // Compute p = r + beta * p
493 beta = rtr/rho;
494 p.scale(beta); p.plus(r);
495 rho = rtr;
496 // Update dot products
497 // sMs = <s, inv(M)s>
498 // sMp = <s, inv(M)p>
499 // pMp = <p, inv(M)p>
500 sMs = sMs + two*alpha*sMp + alpha*alpha*pMp;
501 sMp = beta*(sMp + alpha*pMp);
502 pMp = rho + beta*beta*pMp;
503 }
504 // Check iteration count
505 if (iter == itermax) {
506 iflag = 1;
507 }
508 if (iflag != 1) {
509 iter++;
510 }
511 return w.norm();
512 }
513
514};
515
516}
517
518#endif
Objective_SerialSimOpt(const Ptr< Obj > &obj, const V &ui) z0_ zero()
Provides the interface to evaluate projected trust-region model functions from the Kelley-Sachs bound...
Real apply(const Real &x, const Real &y) const
void reduce(const volatile Real &input, Real volatile &output) const
Elementwise::EReductionType reductionType() const
void reduce(const Real &input, Real &output) const
void reduce(const Real &input, Real &output) const
void reduce(const volatile Real &input, Real volatile &output) const
Elementwise::EReductionType reductionType() const
Real apply(const Real &x, const Real &y) const
Provides interface for truncated CG trust-region subproblem solver.
Ptr< Vector< Real > > dwa1_
Ptr< Vector< Real > > pwa2_
Real dtrpcg(Vector< Real > &w, int &iflag, int &iter, const Vector< Real > &g, const Vector< Real > &x, const Real del, TrustRegionModel< Real > &model, const Real tol, const Real stol, const int itermax, Vector< Real > &p, Vector< Real > &q, Vector< Real > &r, Vector< Real > &t)
Real dcauchy(Vector< Real > &s, Real &alpha, const Vector< Real > &x, const Vector< Real > &g, const Real del, TrustRegionModel< Real > &model, Vector< Real > &pwa1, Vector< Real > &pwa2, Vector< Real > &dwa)
Real dtrqsol(const Real xtx, const Real ptp, const Real ptx, const Real del)
ROL::LinMore::UpperBreakPoint ubp_
unsigned verbosity_
void dbreakpt(const Vector< Real > &x, const Vector< Real > &s, TrustRegionModel< Real > &model, Real &bpmin, Real &bpmax, Vector< Real > &pwa)
Ptr< Vector< Real > > g_
LinMore(ROL::ParameterList &parlist)
void initialize(const Vector< Real > &x, const Vector< Real > &s, const Vector< Real > &g)
Real dgpstep(Vector< Real > &s, const Vector< Real > &w, const Vector< Real > &x, const Real alpha, TrustRegionModel< Real > &model) const
Real dprsrch(Vector< Real > &x, Vector< Real > &s, const Vector< Real > &g, TrustRegionModel< Real > &model, Vector< Real > &pwa, Vector< Real > &dwa)
Ptr< Vector< Real > > x_
Ptr< Vector< Real > > s_
Ptr< Vector< Real > > dwa2_
void run(Vector< Real > &s, Real &snorm, int &iflag, int &iter, const Real del, TrustRegionModel< Real > &model)
Ptr< Vector< Real > > pwa1_
ROL::LinMore::PositiveMin pmin_
ROL::LinMore::LowerBreakPoint lbp_
ROL::LinMore::PositiveMax pmax_
Provides the interface to evaluate trust-region model functions.
virtual const Ptr< const Vector< Real > > getGradient(void) const
virtual const Ptr< BoundConstraint< Real > > getBoundConstraint(void) const
virtual const Ptr< const Vector< Real > > getIterate(void) const
Provides interface for and implements trust-region subproblem solvers.
virtual void initialize(const Vector< Real > &x, const Vector< Real > &s, const Vector< Real > &g)
void setPredictedReduction(const Real pRed)
Defines the linear algebra or vector space interface.
virtual Real norm() const =0
Returns where .
virtual void set(const Vector &x)
Set where .
virtual void applyBinary(const Elementwise::BinaryFunction< Real > &f, const Vector &x)
virtual void scale(const Real alpha)=0
Compute where .
virtual const Vector & dual() const
Return dual representation of , for example, the result of applying a Riesz map, or change of basis,...
virtual void plus(const Vector &x)=0
Compute , where .
virtual void zero()
Set to zero vector.
virtual ROL::Ptr< Vector > clone() const =0
Clone to make a new (uninitialized) vector.
virtual Real reduce(const Elementwise::ReductionOp< Real > &r) const
virtual int dimension() const
Return dimension of the vector space.
virtual void axpy(const Real alpha, const Vector &x)
Compute where .
virtual Real dot(const Vector &x) const =0
Compute where .
constexpr auto dim