Anasazi Version of the Day
Loading...
Searching...
No Matches
BlockPCGSolver.cpp
1// @HEADER
2// *****************************************************************************
3// Anasazi: Block Eigensolvers Package
4//
5// Copyright 2004 NTESS and the Anasazi contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10// This software is a result of the research described in the report
11//
12// "A comparison of algorithms for modal analysis in the absence
13// of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
14// Sandia National Laboratories, Technical report SAND2003-1028J.
15//
16// It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
17// framework ( http://trilinos.org/ ).
18
19#include "BlockPCGSolver.h"
20#include <stdexcept>
21#include <Teuchos_Assert.hpp>
22
23
24BlockPCGSolver::BlockPCGSolver(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
25 double _tol, int _iMax, int _verb)
26 : MyComm(_Comm),
27 callBLAS(),
28 callLAPACK(),
29 callFortran(),
30 K(KK),
31 Prec(0),
32 vectorPCG(0),
33 tolCG(_tol),
34 iterMax(_iMax),
35 verbose(_verb),
36 workSpace(0),
37 lWorkSpace(0),
38 numSolve(0),
39 maxIter(0),
40 sumIter(0),
41 minIter(10000)
42 {
43
44}
45
46
47BlockPCGSolver::BlockPCGSolver(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
48 Epetra_Operator *PP,
49 double _tol, int _iMax, int _verb)
50 : MyComm(_Comm),
51 callBLAS(),
52 callLAPACK(),
53 callFortran(),
54 K(KK),
55 Prec(PP),
56 vectorPCG(0),
57 tolCG(_tol),
58 iterMax(_iMax),
59 verbose(_verb),
60 workSpace(0),
61 lWorkSpace(0),
62 numSolve(0),
63 maxIter(0),
64 sumIter(0),
65 minIter(10000)
66 {
67
68}
69
70
71BlockPCGSolver::~BlockPCGSolver() {
72
73 if (vectorPCG) {
74 delete vectorPCG;
75 vectorPCG = 0;
76 }
77
78 if (workSpace) {
79 delete[] workSpace;
80 workSpace = 0;
81 lWorkSpace = 0;
82 }
83
84}
85
86
87void BlockPCGSolver::setPreconditioner(Epetra_Operator *PP) {
88
89 Prec = PP;
90
91}
92
93
94int BlockPCGSolver::Apply(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const {
95
96 return K->Apply(X, Y);
97
98}
99
100
101int BlockPCGSolver::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const {
102
103 int xcol = X.NumVectors();
104 int info = 0;
105
106 if (Y.NumVectors() < xcol)
107 return -1;
108
109// // Use AztecOO's PCG for one right-hand side
110// if (xcol == 1) {
111//
112// // Define the AztecOO object
113// if (vectorPCG == 0) {
114// vectorPCG = new AztecOO();
115//
116// // Cast away the constness for AztecOO
117// Epetra_Operator *KK = const_cast<Epetra_Operator*>(K);
118// if (dynamic_cast<Epetra_RowMatrix*>(KK) == 0)
119// vectorPCG->SetUserOperator(KK);
120// else
121// vectorPCG->SetUserMatrix(dynamic_cast<Epetra_RowMatrix*>(KK));
122//
123// vectorPCG->SetAztecOption(AZ_max_iter, iterMax);
124// //vectorPCG->SetAztecOption(AZ_kspace, iterMax);
125// vectorPCG->SetAztecOption(AZ_output, AZ_all);
126// if (verbose < 3)
127// vectorPCG->SetAztecOption(AZ_output, AZ_last);
128// if (verbose < 2)
129// vectorPCG->SetAztecOption(AZ_output, AZ_none);
130//
131// vectorPCG->SetAztecOption(AZ_solver, AZ_cg);
132//
133// ////////////////////////////////////////////////
134// //if (K->HasNormInf()) {
135// // vectorPCG->SetAztecOption(AZ_precond, AZ_Neumann);
136// // vectorPCG->SetAztecOption(AZ_poly_ord, 3);
137// //}
138// ////////////////////////////////////////////////
139//
140// if (Prec)
141// vectorPCG->SetPrecOperator(Prec);
142//
143// }
144//
145// double *valX = X.Values();
146// double *valY = Y.Values();
147//
148// int xrow = X.MyLength();
149//
150// bool allocated = false;
151// if (valX == valY) {
152// valX = new double[xrow];
153// allocated = true;
154// // Copy valY into valX
155// memcpy(valX, valY, xrow*sizeof(double));
156// }
157//
158// Epetra_MultiVector rhs(View, X.Map(), valX, xrow, xcol);
159// vectorPCG->SetRHS(&rhs);
160//
161// Y.PutScalar(0.0);
162// vectorPCG->SetLHS(&Y);
163//
164// vectorPCG->Iterate(iterMax, tolCG);
165//
166// numSolve += xcol;
167//
168// int iter = vectorPCG->NumIters();
169// maxIter = (iter > maxIter) ? iter : maxIter;
170// minIter = (iter < minIter) ? iter : minIter;
171// sumIter += iter;
172//
173// if (allocated == true)
174// delete[] valX;
175//
176// return info;
177//
178// } // if (xcol == 1)
179
180 // Use block PCG for multiple right-hand sides
181 info = (xcol == 1) ? Solve(X, Y) : Solve(X, Y, xcol);
182
183 return info;
184
185}
186
187
188int BlockPCGSolver::Solve(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const {
189
190 int info = 0;
191 int localVerbose = verbose*(MyComm.MyPID() == 0);
192
193 int xr = X.MyLength();
194
195 int wSize = 3*xr;
196
197 if (lWorkSpace < wSize) {
198 if (workSpace)
199 delete[] workSpace;
200 workSpace = new (nothrow) double[wSize];
201 if (workSpace == 0) {
202 info = -1;
203 return info;
204 }
205 lWorkSpace = wSize;
206 } // if (lWorkSpace < wSize)
207
208 double *pointer = workSpace;
209
210 Epetra_Vector r(View, X.Map(), pointer);
211 pointer = pointer + xr;
212
213 Epetra_Vector p(View, X.Map(), pointer);
214 pointer = pointer + xr;
215
216 // Note: Kp and z uses the same memory space
217 Epetra_Vector Kp(View, X.Map(), pointer);
218 Epetra_Vector z(View, X.Map(), pointer);
219
220 double tmp;
221 double initNorm = 0.0, rNorm = 0.0, newRZ = 0.0, oldRZ = 0.0, alpha = 0.0;
222 double tolSquare = tolCG*tolCG;
223
224 memcpy(r.Values(), X.Values(), xr*sizeof(double));
225 tmp = callBLAS.DOT(xr, r.Values(), r.Values());
226 MyComm.SumAll(&tmp, &initNorm, 1);
227
228 Y.PutScalar(0.0);
229
230 if (localVerbose > 1) {
231 cout << endl;
232 cout << " --- PCG Iterations --- " << endl;
233 }
234
235 int iter;
236 for (iter = 1; iter <= iterMax; ++iter) {
237
238 if (Prec) {
239 Prec->ApplyInverse(r, z);
240 }
241 else {
242 memcpy(z.Values(), r.Values(), xr*sizeof(double));
243 }
244
245 if (iter == 1) {
246 tmp = callBLAS.DOT(xr, r.Values(), z.Values());
247 MyComm.SumAll(&tmp, &newRZ, 1);
248 memcpy(p.Values(), z.Values(), xr*sizeof(double));
249 }
250 else {
251 oldRZ = newRZ;
252 tmp = callBLAS.DOT(xr, r.Values(), z.Values());
253 MyComm.SumAll(&tmp, &newRZ, 1);
254 p.Update(1.0, z, newRZ/oldRZ);
255 }
256
257 K->Apply(p, Kp);
258
259 tmp = callBLAS.DOT(xr, p.Values(), Kp.Values());
260 MyComm.SumAll(&tmp, &alpha, 1);
261 alpha = newRZ/alpha;
262
263 if (alpha <= 0.0) {
264 if (MyComm.MyPID() == 0) {
265 cerr << endl << endl;
266 cerr.precision(4);
267 cerr.setf(ios::scientific, ios::floatfield);
268 cerr << " !!! Non-positive value for p^TKp (" << alpha << ") !!!";
269 cerr << endl << endl;
270 }
271 assert(alpha > 0.0);
272 }
273
274 callBLAS.AXPY(xr, alpha, p.Values(), Y.Values());
275
276 alpha *= -1.0;
277 callBLAS.AXPY(xr, alpha, Kp.Values(), r.Values());
278
279 // Check convergence
280 tmp = callBLAS.DOT(xr, r.Values(), r.Values());
281 MyComm.SumAll(&tmp, &rNorm, 1);
282
283 if (localVerbose > 1) {
284 cout << " Iter. " << iter;
285 cout.precision(4);
286 cout.setf(ios::scientific, ios::floatfield);
287 cout << " Residual reduction " << sqrt(rNorm/initNorm) << endl;
288 }
289
290 if (rNorm <= tolSquare*initNorm)
291 break;
292
293 } // for (iter = 1; iter <= iterMax; ++iter)
294
295 if (localVerbose == 1) {
296 cout << endl;
297 cout << " --- End of PCG solve ---" << endl;
298 cout << " Iter. " << iter;
299 cout.precision(4);
300 cout.setf(ios::scientific, ios::floatfield);
301 cout << " Residual reduction " << sqrt(rNorm/initNorm) << endl;
302 cout << endl;
303 }
304
305 if (localVerbose > 1) {
306 cout << endl;
307 }
308
309 numSolve += 1;
310
311 minIter = (iter < minIter) ? iter : minIter;
312 maxIter = (iter > maxIter) ? iter : maxIter;
313 sumIter += iter;
314
315 return info;
316
317}
318
319
320int BlockPCGSolver::Solve(const Epetra_MultiVector &X, Epetra_MultiVector &Y, int blkSize) const {
321
322 int xrow = X.MyLength();
323 int xcol = X.NumVectors();
324 int ycol = Y.NumVectors();
325
326 int info = 0;
327 int localVerbose = verbose*(MyComm.MyPID() == 0);
328
329 // Machine epsilon to check singularities
330 double eps = 0.0;
331 callLAPACK.LAMCH('E', eps);
332
333 double *valX = X.Values();
334
335 int NB = 3 + callFortran.LAENV(1, "dsytrd", "u", blkSize, -1, -1, -1, 6, 1);
336 int lworkD = (blkSize > NB) ? blkSize*blkSize : NB*blkSize;
337
338 int wSize = 4*blkSize*xrow + 3*blkSize + 2*blkSize*blkSize + lworkD;
339
340 bool useY = true;
341 if (ycol % blkSize != 0) {
342 // Allocate an extra block to store the solutions
343 wSize += blkSize*xrow;
344 useY = false;
345 }
346
347 if (lWorkSpace < wSize) {
348 delete[] workSpace;
349 workSpace = new (nothrow) double[wSize];
350 if (workSpace == 0) {
351 info = -1;
352 return info;
353 }
354 lWorkSpace = wSize;
355 } // if (lWorkSpace < wSize)
356
357 double *pointer = workSpace;
358
359 // Array to store the matrix PtKP
360 double *PtKP = pointer;
361 pointer = pointer + blkSize*blkSize;
362
363 // Array to store coefficient matrices
364 double *coeff = pointer;
365 pointer = pointer + blkSize*blkSize;
366
367 // Workspace array
368 double *workD = pointer;
369 pointer = pointer + lworkD;
370
371 // Array to store the eigenvalues of P^t K P
372 double *da = pointer;
373 pointer = pointer + blkSize;
374
375 // Array to store the norms of right hand sides
376 double *initNorm = pointer;
377 pointer = pointer + blkSize;
378
379 // Array to store the norms of residuals
380 double *resNorm = pointer;
381 pointer = pointer + blkSize;
382
383 // Array to store the residuals
384 double *valR = pointer;
385 pointer = pointer + xrow*blkSize;
386 Epetra_MultiVector R(View, X.Map(), valR, xrow, blkSize);
387
388 // Array to store the preconditioned residuals
389 double *valZ = pointer;
390 pointer = pointer + xrow*blkSize;
391 Epetra_MultiVector Z(View, X.Map(), valZ, xrow, blkSize);
392
393 // Array to store the search directions
394 double *valP = pointer;
395 pointer = pointer + xrow*blkSize;
396 Epetra_MultiVector P(View, X.Map(), valP, xrow, blkSize);
397
398 // Array to store the image of the search directions
399 double *valKP = pointer;
400 pointer = pointer + xrow*blkSize;
401 Epetra_MultiVector KP(View, X.Map(), valKP, xrow, blkSize);
402
403 // Pointer to store the solutions
404 double *valSOL = (useY == true) ? Y.Values() : pointer;
405
406 int iRHS;
407 for (iRHS = 0; iRHS < xcol; iRHS += blkSize) {
408
409 int numVec = (iRHS + blkSize < xcol) ? blkSize : xcol - iRHS;
410
411 // Set the initial residuals to the right hand sides
412 if (numVec < blkSize) {
413 R.Random();
414 }
415 memcpy(valR, valX + iRHS*xrow, numVec*xrow*sizeof(double));
416
417 // Set the initial guess to zero
418 valSOL = (useY == true) ? Y.Values() + iRHS*xrow : valSOL;
419 Epetra_MultiVector SOL(View, X.Map(), valSOL, xrow, blkSize);
420 SOL.PutScalar(0.0);
421
422 int ii;
423 int iter;
424 int nFound;
425
426 R.Norm2(initNorm);
427
428 if (localVerbose > 1) {
429 cout << endl;
430 cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1 << endl;
431 if (localVerbose > 2) {
432 fprintf(stderr,"\n");
433 for (ii = 0; ii < numVec; ++ii) {
434 cout << " ... Initial Residual Norm " << ii << " = " << initNorm[ii] << endl;
435 }
436 cout << endl;
437 }
438 }
439
440 // Iteration loop
441 for (iter = 1; iter <= iterMax; ++iter) {
442
443 // Apply the preconditioner
444 if (Prec)
445 Prec->ApplyInverse(R, Z);
446 else
447 Z = R;
448
449 // Define the new search directions
450 if (iter == 1) {
451 P = Z;
452 }
453 else {
454 // Compute P^t K Z
455 callBLAS.GEMM('T', 'N', blkSize, blkSize, xrow, 1.0, KP.Values(), xrow, Z.Values(), xrow,
456 0.0, workD, blkSize);
457 MyComm.SumAll(workD, coeff, blkSize*blkSize);
458
459 // Compute the coefficient (P^t K P)^{-1} P^t K Z
460 callBLAS.GEMM('T', 'N', blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize,
461 0.0, workD, blkSize);
462 for (ii = 0; ii < blkSize; ++ii)
463 callFortran.SCAL_INCX(blkSize, da[ii], workD + ii, blkSize);
464 callBLAS.GEMM('N', 'N', blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize,
465 0.0, coeff, blkSize);
466
467 // Update the search directions
468 // Note: Use KP as a workspace
469 memcpy(KP.Values(), P.Values(), xrow*blkSize*sizeof(double));
470 callBLAS.GEMM('N', 'N', xrow, blkSize, blkSize, 1.0, KP.Values(), xrow, coeff, blkSize,
471 0.0, P.Values(), xrow);
472
473 P.Update(1.0, Z, -1.0);
474
475 } // if (iter == 1)
476
477 K->Apply(P, KP);
478
479 // Compute P^t K P
480 callBLAS.GEMM('T', 'N', blkSize, blkSize, xrow, 1.0, P.Values(), xrow, KP.Values(), xrow,
481 0.0, workD, blkSize);
482 MyComm.SumAll(workD, PtKP, blkSize*blkSize);
483
484 // Eigenvalue decomposition of P^t K P
485 callFortran.SYEV('V', 'U', blkSize, PtKP, blkSize, da, workD, lworkD, &info);
486 if (info) {
487 // Break the loop as spectral decomposition failed
488 break;
489 } // if (info)
490
491 // Compute the pseudo-inverse of the eigenvalues
492 for (ii = 0; ii < blkSize; ++ii) {
493 // FIXME (mfh 14 Jan 2011) Is this the right exception to
494 // throw? I'm just replacing an exit(-1) with an exception,
495 // as per Trilinos coding standards.
496 TEUCHOS_TEST_FOR_EXCEPTION(da[ii] < 0.0, std::runtime_error, "Negative "
497 "eigenvalue for P^T K P: da[" << ii << "] = "
498 << da[ii] << ".");
499 da[ii] = (da[ii] == 0.0) ? 0.0 : 1.0/da[ii];
500 } // for (ii = 0; ii < blkSize; ++ii)
501
502 // Compute P^t R
503 callBLAS.GEMM('T', 'N', blkSize, blkSize, xrow, 1.0, P.Values(), xrow, R.Values(), xrow,
504 0.0, workD, blkSize);
505 MyComm.SumAll(workD, coeff, blkSize*blkSize);
506
507 // Compute the coefficient (P^t K P)^{-1} P^t R
508 callBLAS.GEMM('T', 'N', blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize,
509 0.0, workD, blkSize);
510 for (ii = 0; ii < blkSize; ++ii)
511 callFortran.SCAL_INCX(blkSize, da[ii], workD + ii, blkSize);
512 callBLAS.GEMM('N', 'N', blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize,
513 0.0, coeff, blkSize);
514
515 // Update the solutions
516 callBLAS.GEMM('N', 'N', xrow, blkSize, blkSize, 1.0, P.Values(), xrow, coeff, blkSize,
517 1.0, valSOL, xrow);
518
519 // Update the residuals
520 callBLAS.GEMM('N', 'N', xrow, blkSize, blkSize, -1.0, KP.Values(), xrow, coeff, blkSize,
521 1.0, R.Values(), xrow);
522
523 // Check convergence
524 R.Norm2(resNorm);
525 nFound = 0;
526 for (ii = 0; ii < numVec; ++ii) {
527 if (resNorm[ii] <= tolCG*initNorm[ii])
528 nFound += 1;
529 }
530
531 if (localVerbose > 1) {
532 cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1;
533 cout << " -- Iteration " << iter << " -- " << nFound << " converged vectors\n";
534 if (localVerbose > 2) {
535 cout << endl;
536 for (ii = 0; ii < numVec; ++ii) {
537 cout << " ... ";
538 cout.width(5);
539 cout << ii << " ... Residual = ";
540 cout.precision(2);
541 cout.setf(ios::scientific, ios::floatfield);
542 cout << resNorm[ii] << " ... Right Hand Side = " << initNorm[ii] << endl;
543 }
544 cout << endl;
545 }
546 }
547
548 if (nFound == numVec) {
549 break;
550 }
551
552 } // for (iter = 1; iter <= maxIter; ++iter)
553
554 if (useY == false) {
555 // Copy the solutions back into Y
556 memcpy(Y.Values() + xrow*iRHS, valSOL, numVec*xrow*sizeof(double));
557 }
558
559 numSolve += nFound;
560
561 if (nFound == numVec) {
562 minIter = (iter < minIter) ? iter : minIter;
563 maxIter = (iter > maxIter) ? iter : maxIter;
564 sumIter += iter;
565 }
566
567 } // for (iRHS = 0; iRHS < xcol; iRHS += blkSize)
568
569 return info;
570
571}
572
573