Anasazi Version of the Day
Loading...
Searching...
No Matches
KnyazevLOBPCG.cpp
1// @HEADER
2// *****************************************************************************
3// Anasazi: Block Eigensolvers Package
4//
5// Copyright 2004 NTESS and the Anasazi contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10//**************************************************************************
11//
12// NOTICE
13//
14// This software is a result of the research described in the report
15//
16// " A comparison of algorithms for modal analysis in the absence
17// of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
18// Sandia National Laboratories, Technical cout SAND2003-1028J.
19//
20// It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
21// framework ( http://software.sandia.gov/trilinos/ ).
22//
23// The distribution of this software follows also the rules defined in Trilinos.
24// This notice shall be marked on any reproduction of this software, in whole or
25// in part.
26//
27// This program is distributed in the hope that it will be useful, but
28// WITHOUT ANY WARRANTY; without even the implied warranty of
29// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
30//
31// Code Authors: U. Hetmaniuk (ulhetma@sandia.gov), R. Lehoucq (rblehou@sandia.gov)
32//
33//**************************************************************************
34
35#include "KnyazevLOBPCG.h"
36
37
38KnyazevLOBPCG::KnyazevLOBPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
39 const Epetra_Operator *PP,
40 double _tol, int _maxIter, int _verb) :
41 myVerify(_Comm),
42 callBLAS(),
43 callFortran(),
44 modalTool(_Comm),
45 mySort(),
46 MyComm(_Comm),
47 K(KK),
48 M(0),
49 Prec(PP),
50 MyWatch(_Comm),
51 tolEigenSolve(_tol),
52 maxIterEigenSolve(_maxIter),
53 blockSize(0),
54 normWeight(0),
55 verbose(_verb),
56 historyCount(0),
57 resHistory(0),
58 memRequested(0.0),
59 highMem(0.0),
60 massOp(0),
61 numRestart(0),
62 outerIter(0),
63 precOp(0),
64 residual(0),
65 stifOp(0),
66 timeLocalProj(0.0),
67 timeLocalSolve(0.0),
68 timeLocalUpdate(0.0),
69 timeMassOp(0.0),
70 timeNorm(0.0),
71 timeOuterLoop(0.0),
72 timePostProce(0.0),
73 timePrecOp(0.0),
74 timeResidual(0.0),
75 timeStifOp(0.0)
76 {
77
78}
79
80
81KnyazevLOBPCG::KnyazevLOBPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
82 const Epetra_Operator *MM, const Epetra_Operator *PP,
83 double _tol, int _maxIter, int _verb,
84 double *_weight) :
85 myVerify(_Comm),
86 callBLAS(),
87 callFortran(),
88 modalTool(_Comm),
89 mySort(),
90 MyComm(_Comm),
91 K(KK),
92 M(MM),
93 Prec(PP),
94 MyWatch(_Comm),
95 tolEigenSolve(_tol),
96 maxIterEigenSolve(_maxIter),
97 blockSize(0),
98 normWeight(_weight),
99 verbose(_verb),
100 historyCount(0),
101 resHistory(0),
102 memRequested(0.0),
103 highMem(0.0),
104 massOp(0),
105 numRestart(0),
106 outerIter(0),
107 precOp(0),
108 residual(0),
109 stifOp(0),
110 timeLocalProj(0.0),
111 timeLocalSolve(0.0),
112 timeLocalUpdate(0.0),
113 timeMassOp(0.0),
114 timeNorm(0.0),
115 timeOuterLoop(0.0),
116 timePostProce(0.0),
117 timePrecOp(0.0),
118 timeResidual(0.0),
119 timeStifOp(0.0)
120 {
121
122}
123
124
125KnyazevLOBPCG::~KnyazevLOBPCG() {
126
127 if (resHistory) {
128 delete[] resHistory;
129 resHistory = 0;
130 }
131
132}
133
134
135int KnyazevLOBPCG::solve(int numEigen, Epetra_MultiVector &Q, double *lambda) {
136
137 return KnyazevLOBPCG::reSolve(numEigen, Q, lambda);
138
139}
140
141
142int KnyazevLOBPCG::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {
143
144 // Computes the smallest eigenvalues and the corresponding eigenvectors
145 // of the generalized eigenvalue problem
146 //
147 // K X = M X Lambda
148 //
149 // using a Locally Optimal Block Preconditioned Conjugate Gradient method.
150 //
151 // Note that if M is not specified, then K X = X Lambda is solved.
152 // Note that the blocksize is equal to the number of requested eigenvalues.
153 //
154 // Ref: A. Knyazev, "Toward the optimal preconditioned eigensolver:
155 // Locally optimal block preconditioner conjugate gradient method",
156 // SIAM J. Sci. Comput., vol 23, n 2, pp. 517-541
157 // Ref: A. Knyazev and M. Argentati, "Implementation of a preconditioned
158 // eigensolver using Hypre", Numerical Linear Algebra with Applications (submitted)
159 //
160 // Input variables:
161 //
162 // numEigen (integer) = Number of eigenmodes requested
163 //
164 // Q (Epetra_MultiVector) = Converged eigenvectors
165 // The number of columns of Q must be equal to numEigen.
166 // The rows of Q are distributed across processors.
167 // At exit, the first numEigen columns contain the eigenvectors requested.
168 //
169 // lambda (array of doubles) = Converged eigenvalues
170 // At input, it must be of size numEigen.
171 // At exit, the first numEigen locations contain the eigenvalues requested.
172 //
173 // startingEV (integer) = Number of existing converged eigenmodes
174 //
175 // Return information on status of computation
176 //
177 // info >= 0 >> Number of converged eigenpairs at the end of computation
178 //
179 // // Failure due to input arguments
180 //
181 // info = - 1 >> The stiffness matrix K has not been specified.
182 // info = - 2 >> The maps for the matrix K and the matrix M differ.
183 // info = - 3 >> The maps for the matrix K and the preconditioner P differ.
184 // info = - 4 >> The maps for the vectors and the matrix K differ.
185 // info = - 5 >> Q is too small for the number of eigenvalues requested.
186 // info = - 6 >> Q is too small for the computation parameters.
187 //
188 // info = - 10 >> Failure during the mass orthonormalization
189 //
190 // info = - 20 >> Error in LAPACK during the local eigensolve
191 //
192 // info = - 30 >> MEMORY
193 //
194
195 // Check the input parameters
196
197 if (numEigen <= startingEV) {
198 return startingEV;
199 }
200
201 int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, numEigen);
202 if (info < 0)
203 return info;
204
205 int myPid = MyComm.MyPID();
206
207 // Get the weight for approximating the M-inverse norm
208 Epetra_Vector *vectWeight = 0;
209 if (normWeight) {
210 vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
211 }
212
213 int knownEV = startingEV;
214 int localVerbose = verbose*(myPid==0);
215
216 // Define local block vectors
217 //
218 // MX = Working vectors (storing M*X if M is specified, else pointing to X)
219 // KX = Working vectors (storing K*X)
220 //
221 // R = Residuals
222 //
223 // H = Preconditioned search space
224 // MH = Working vectors (storing M*H if M is specified, else pointing to H)
225 // KH = Working vectors (storing K*H)
226 //
227 // P = Search directions
228 // MP = Working vectors (storing M*P if M is specified, else pointing to P)
229 // KP = Working vectors (storing K*P)
230
231 int xr = Q.MyLength();
232 Epetra_MultiVector X(View, Q, 0, numEigen);
233
234 blockSize = numEigen;
235
236 int tmp;
237 tmp = (M == 0) ? 6*numEigen*xr : 9*numEigen*xr;
238
239 double *work1 = new (nothrow) double[tmp];
240 if (work1 == 0) {
241 if (vectWeight)
242 delete vectWeight;
243 info = -30;
244 return info;
245 }
246 memRequested += sizeof(double)*tmp/(1024.0*1024.0);
247
248 highMem = (highMem > currentSize()) ? highMem : currentSize();
249
250 double *tmpD = work1;
251
252 Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, numEigen);
253 tmpD = tmpD + xr*numEigen;
254
255 Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, numEigen);
256 tmpD = (M) ? tmpD + xr*numEigen : tmpD;
257
258 Epetra_MultiVector R(View, Q.Map(), tmpD, xr, numEigen);
259 tmpD = tmpD + xr*numEigen;
260
261 Epetra_MultiVector H(View, Q.Map(), tmpD, xr, numEigen);
262 tmpD = tmpD + xr*numEigen;
263
264 Epetra_MultiVector KH(View, Q.Map(), tmpD, xr, numEigen);
265 tmpD = tmpD + xr*numEigen;
266
267 Epetra_MultiVector MH(View, Q.Map(), (M) ? tmpD : H.Values(), xr, numEigen);
268 tmpD = (M) ? tmpD + xr*numEigen : tmpD;
269
270 Epetra_MultiVector P(View, Q.Map(), tmpD, xr, numEigen);
271 tmpD = tmpD + xr*numEigen;
272
273 Epetra_MultiVector KP(View, Q.Map(), tmpD, xr, numEigen);
274 tmpD = tmpD + xr*numEigen;
275
276 Epetra_MultiVector MP(View, Q.Map(), (M) ? tmpD : P.Values(), xr, numEigen);
277
278 if (startingEV > 0) {
279 // Fill the first vectors of KX and MX
280 Epetra_MultiVector copyX(View, X, 0, startingEV);
281 Epetra_MultiVector copyKX(View, KX, 0, startingEV);
282 timeStifOp -= MyWatch.WallTime();
283 K->Apply(copyX, copyKX);
284 timeStifOp += MyWatch.WallTime();
285 stifOp += startingEV;
286 timeMassOp -= MyWatch.WallTime();
287 if (M) {
288 Epetra_MultiVector copyMX(View, MX, 0, startingEV);
289 M->Apply(copyX, copyMX);
290 }
291 timeMassOp += MyWatch.WallTime();
292 massOp += startingEV;
293 }
294
295 // Define arrays
296 //
297 // theta = Store the local eigenvalues (size: numEigen)
298 // normR = Store the norm of residuals (size: numEigen)
299 //
300 // MM = Local mass matrix (size: 3*numEigen x 3*numEigen)
301 // KK = Local stiffness matrix (size: 3*numEigen x 3*numEigen)
302 //
303 // S = Local eigenvectors (size: 3*numEigen x 3*numEigen)
304
305 int lwork2;
306 lwork2 = 2*numEigen + 27*numEigen*numEigen;
307 double *work2 = new (nothrow) double[lwork2];
308 if (work2 == 0) {
309 if (vectWeight)
310 delete vectWeight;
311 delete[] work1;
312 info = -30;
313 return info;
314 }
315
316 highMem = (highMem > currentSize()) ? highMem : currentSize();
317
318 tmpD = work2;
319
320 double *theta = tmpD;
321 tmpD = tmpD + numEigen;
322
323 double *normR = tmpD;
324 tmpD = tmpD + numEigen;
325
326 double *MM = tmpD;
327 tmpD = tmpD + 9*numEigen*numEigen;
328
329 double *KK = tmpD;
330 tmpD = tmpD + 9*numEigen*numEigen;
331
332 double *S = tmpD;
333
334 memRequested += sizeof(double)*lwork2/(1024.0*1024.0);
335
336 // Define an array to store the residuals history
337 if (localVerbose > 2) {
338 resHistory = new (nothrow) double[maxIterEigenSolve*numEigen];
339 if (resHistory == 0) {
340 if (vectWeight)
341 delete vectWeight;
342 delete[] work1;
343 delete[] work2;
344 info = -30;
345 return info;
346 }
347 historyCount = 0;
348 }
349
350 // Miscellaneous definitions
351
352 bool reStart = false;
353 numRestart = 0;
354
355 int localSize;
356 int firstIndex = knownEV;
357 int i, j;
358
359 if (localVerbose > 0) {
360 cout << endl;
361 cout << " *|* Problem: ";
362 if (M)
363 cout << "K*Q = M*Q D ";
364 else
365 cout << "K*Q = Q D ";
366 if (Prec)
367 cout << " with preconditioner";
368 cout << endl;
369 cout << " *|* Algorithm = LOBPCG (Knyazev's version)" << endl;
370 cout << " *|* Size of blocks = " << blockSize << endl;
371 cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
372 cout.precision(2);
373 cout.setf(ios::scientific, ios::floatfield);
374 cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
375 cout << " *|* Norm used for convergence: ";
376 if (normWeight)
377 cout << "weighted L2-norm with user-provided weights" << endl;
378 else
379 cout << "L^2-norm" << endl;
380 if (startingEV > 0)
381 cout << " *|* Input converged eigenvectors = " << startingEV << endl;
382 cout << "\n -- Start iterations -- \n";
383 }
384
385 timeOuterLoop -= MyWatch.WallTime();
386 for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {
387
388 highMem = (highMem > currentSize()) ? highMem : currentSize();
389
390 int workingSize = numEigen - knownEV;
391
392 Epetra_MultiVector XI(View, X, firstIndex, workingSize);
393 Epetra_MultiVector MXI(View, MX, firstIndex, workingSize);
394 Epetra_MultiVector KXI(View, KX, firstIndex, workingSize);
395
396 Epetra_MultiVector HI(View, H, firstIndex, workingSize);
397 Epetra_MultiVector MHI(View, MH, firstIndex, workingSize);
398 Epetra_MultiVector KHI(View, KH, firstIndex, workingSize);
399
400 Epetra_MultiVector PI(View, P, firstIndex, workingSize);
401 Epetra_MultiVector MPI(View, MP, firstIndex, workingSize);
402 Epetra_MultiVector KPI(View, KP, firstIndex, workingSize);
403
404 Epetra_MultiVector RI(View, R, firstIndex, workingSize);
405
406 if ((outerIter == 1) || (reStart == true)) {
407
408 reStart = false;
409 localSize = numEigen;
410
411 // Apply the mass matrix to X
412 timeMassOp -= MyWatch.WallTime();
413 if (M)
414 M->Apply(XI, MXI);
415 timeMassOp += MyWatch.WallTime();
416 massOp += workingSize;
417
418 // Apply the stiffness matrix to X
419 timeStifOp -= MyWatch.WallTime();
420 K->Apply(XI, KXI);
421 timeStifOp += MyWatch.WallTime();
422 stifOp += workingSize;
423
424 } // if ((outerIter == 1) || (reStart == true))
425 else {
426
427 // Apply the preconditioner on the residuals
428 if (Prec) {
429 timePrecOp -= MyWatch.WallTime();
430 Prec->ApplyInverse(RI, HI);
431 timePrecOp += MyWatch.WallTime();
432 precOp += workingSize;
433 }
434 else {
435 memcpy(HI.Values(), RI.Values(), xr*workingSize*sizeof(double));
436 }
437
438 // Apply the mass matrix on H
439 timeMassOp -= MyWatch.WallTime();
440 if (M)
441 M->Apply(HI, MHI);
442 timeMassOp += MyWatch.WallTime();
443 massOp += workingSize;
444
445 // Apply the stiffness matrix to H
446 timeStifOp -= MyWatch.WallTime();
447 K->Apply(HI, KHI);
448 timeStifOp += MyWatch.WallTime();
449 stifOp += workingSize;
450
451 if (localSize == numEigen)
452 localSize += workingSize;
453
454 } // if ((outerIter == 1) || (reStart==true))
455
456 // Form "local" mass and stiffness matrices
457 // Note: Use S as a temporary workspace
458 timeLocalProj -= MyWatch.WallTime();
459 modalTool.localProjection(numEigen, numEigen, xr, X.Values(), xr, KX.Values(), xr,
460 KK, localSize, S);
461 modalTool.localProjection(numEigen, numEigen, xr, X.Values(), xr, MX.Values(), xr,
462 MM, localSize, S);
463 if (localSize > numEigen) {
464 double *pointer = KK + numEigen*localSize;
465 modalTool.localProjection(numEigen, workingSize, xr, X.Values(), xr, KHI.Values(), xr,
466 pointer, localSize, S);
467 modalTool.localProjection(workingSize, workingSize, xr, HI.Values(), xr, KHI.Values(), xr,
468 pointer + numEigen, localSize, S);
469 pointer = MM + numEigen*localSize;
470 modalTool.localProjection(numEigen, workingSize, xr, X.Values(), xr, MHI.Values(), xr,
471 pointer, localSize, S);
472 modalTool.localProjection(workingSize, workingSize, xr, HI.Values(), xr, MHI.Values(), xr,
473 pointer + numEigen, localSize, S);
474 if (localSize > numEigen + workingSize) {
475 pointer = KK + (numEigen + workingSize)*localSize;
476 modalTool.localProjection(numEigen, workingSize, xr, X.Values(), xr, KPI.Values(), xr,
477 pointer, localSize, S);
478 modalTool.localProjection(workingSize, workingSize, xr, HI.Values(), xr, KPI.Values(), xr,
479 pointer + numEigen, localSize, S);
480 modalTool.localProjection(workingSize, workingSize, xr, PI.Values(), xr, KPI.Values(), xr,
481 pointer + numEigen + workingSize, localSize, S);
482 pointer = MM + (numEigen + workingSize)*localSize;
483 modalTool.localProjection(numEigen, workingSize, xr, X.Values(), xr, MPI.Values(), xr,
484 pointer, localSize, S);
485 modalTool.localProjection(workingSize, workingSize, xr, HI.Values(), xr, MPI.Values(), xr,
486 pointer + numEigen, localSize, S);
487 modalTool.localProjection(workingSize, workingSize, xr, PI.Values(), xr, MPI.Values(), xr,
488 pointer + numEigen + workingSize, localSize, S);
489 } // if (localSize > numEigen + workingSize)
490 } // if (localSize > numEigen)
491 timeLocalProj += MyWatch.WallTime();
492
493 // Perform a spectral decomposition
494 timeLocalSolve -= MyWatch.WallTime();
495 int nevLocal = localSize;
496 info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
497 S, localSize, theta, localVerbose,
498 (blockSize == 1) ? 1 : 0);
499 timeLocalSolve += MyWatch.WallTime();
500
501 if (info < 0) {
502 // Stop when spectral decomposition has a critical failure
503 break;
504 } // if (info < 0)
505
506 // Check for restarting
507 if ((theta[0] < 0.0) || (nevLocal < numEigen)) {
508 if (localVerbose > 0) {
509 cout << " Iteration " << outerIter;
510 cout << " - Failure for spectral decomposition - RESTART with new random search\n";
511 }
512 if (workingSize == 1) {
513 XI.Random();
514 }
515 else {
516 Epetra_MultiVector Xinit(View, XI, 1, workingSize-1);
517 Xinit.Random();
518 } // if (workingSize == 1)
519 reStart = true;
520 numRestart += 1;
521 info = 0;
522 continue;
523 } // if ((theta[0] < 0.0) || (nevLocal < numEigen))
524
525 if ((localSize == numEigen+workingSize) && (nevLocal == numEigen)) {
526 for (j = 0; j < nevLocal; ++j)
527 memcpy(S+j*numEigen, S+j*localSize, numEigen*sizeof(double));
528 localSize = numEigen;
529 }
530
531 if ((localSize == numEigen+2*workingSize) && (nevLocal <= numEigen+workingSize)) {
532 for (j = 0; j < nevLocal; ++j)
533 memcpy(S+j*(numEigen+workingSize), S+j*localSize, (numEigen+workingSize)*sizeof(double));
534 localSize = numEigen + workingSize;
535 }
536
537 // Update the spaces
538 // Note: Use R as a temporary work space
539 timeLocalUpdate -= MyWatch.WallTime();
540
541 memcpy(R.Values(), X.Values(), xr*numEigen*sizeof(double));
542 callBLAS.GEMM('N', 'N', xr, numEigen, numEigen, 1.0, R.Values(), xr, S, localSize,
543 0.0, X.Values(), xr);
544 if (M) {
545 memcpy(R.Values(), MX.Values(), xr*numEigen*sizeof(double));
546 callBLAS.GEMM('N', 'N', xr, numEigen, numEigen, 1.0, R.Values(), xr, S, localSize,
547 0.0, MX.Values(), xr);
548 }
549 memcpy(R.Values(), KX.Values(), xr*numEigen*sizeof(double));
550 callBLAS.GEMM('N', 'N', xr, numEigen, numEigen, 1.0, R.Values(), xr, S, localSize,
551 0.0, KX.Values(), xr);
552
553 if (localSize == numEigen + workingSize) {
554 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, HI.Values(), xr,
555 S + numEigen, localSize, 0.0, P.Values(), xr);
556 if (M) {
557 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, MHI.Values(), xr,
558 S + numEigen, localSize, 0.0, MP.Values(), xr);
559 }
560 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, KHI.Values(), xr,
561 S + numEigen, localSize, 0.0, KP.Values(), xr);
562 }
563
564 if (localSize > numEigen + workingSize) {
565 memcpy(RI.Values(), PI.Values(), xr*workingSize*sizeof(double));
566 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, HI.Values(), xr,
567 S + numEigen, localSize, 0.0, P.Values(), xr);
568 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, RI.Values(), xr,
569 S + numEigen + workingSize, localSize, 1.0, P.Values(), xr);
570 if (M) {
571 memcpy(RI.Values(), MPI.Values(), xr*workingSize*sizeof(double));
572 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, MHI.Values(), xr,
573 S + numEigen, localSize, 0.0, MP.Values(), xr);
574 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, RI.Values(), xr,
575 S + numEigen + workingSize, localSize, 1.0, MP.Values(), xr);
576 }
577 memcpy(RI.Values(), KPI.Values(), xr*workingSize*sizeof(double));
578 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, KHI.Values(), xr,
579 S + numEigen, localSize, 0.0, KP.Values(), xr);
580 callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, RI.Values(), xr,
581 S + numEigen + workingSize, localSize, 1.0, KP.Values(), xr);
582 }
583
584 if (localSize > numEigen) {
585 callBLAS.AXPY(xr*numEigen, 1.0, P.Values(), X.Values());
586 if (M)
587 callBLAS.AXPY(xr*numEigen, 1.0, MP.Values(), MX.Values());
588 callBLAS.AXPY(xr*numEigen, 1.0, KP.Values(), KX.Values());
589 }
590 timeLocalUpdate += MyWatch.WallTime();
591
592 // Compute the residuals
593 timeResidual -= MyWatch.WallTime();
594 memcpy(R.Values(), KX.Values(), xr*numEigen*sizeof(double));
595 for (j = 0; j < numEigen; ++j) {
596 callBLAS.AXPY(xr, -theta[j], MX.Values() + j*xr, R.Values() + j*xr);
597 }
598 timeResidual += MyWatch.WallTime();
599 residual += numEigen;
600
601 // Compute the norms of the residuals
602 timeNorm -= MyWatch.WallTime();
603 if (vectWeight) {
604 R.NormWeighted(*vectWeight, normR);
605 }
606 else {
607 R.Norm2(normR);
608 }
609 // Scale the norms of residuals with the eigenvalues
610 for (j = 0; j < numEigen; ++j) {
611 normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
612 }
613 timeNorm += MyWatch.WallTime();
614
615 // When required, monitor some orthogonalities
616 if (verbose > 2) {
617 accuracyCheck(&X, &MX, &R);
618 } // if (verbose > 2)
619
620 if (localSize == numEigen + workingSize)
621 localSize += workingSize;
622
623 // Count the converged eigenvectors
624 timeNorm -= MyWatch.WallTime();
625 knownEV = 0;
626 for (i=0; i < numEigen; ++i) {
627 if (normR[i] < tolEigenSolve) {
628 lambda[i] = theta[i];
629 knownEV += 1;
630 }
631 }
632 timeNorm += MyWatch.WallTime();
633
634 // Store the residual history
635 if (localVerbose > 2) {
636 memcpy(resHistory + historyCount, normR, numEigen*sizeof(double));
637 historyCount += numEigen;
638 }
639
640 // Print information on current iteration
641 if (localVerbose > 0) {
642 cout << " Iteration " << outerIter;
643 cout << " - Number of converged eigenvectors " << knownEV << endl;
644 }
645
646 if (localVerbose > 1) {
647 cout << endl;
648 cout.precision(2);
649 cout.setf(ios::scientific, ios::floatfield);
650 for (i=0; i<numEigen; ++i) {
651 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
652 cout << " = " << normR[i] << endl;
653 }
654 cout << endl;
655 cout.precision(2);
656 for (i=0; i<numEigen; ++i) {
657 cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
658 cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
659 cout << " = " << theta[i] << endl;
660 }
661 cout << endl;
662 }
663
664 // Convergence test
665 if (knownEV >= numEigen) {
666 if (localVerbose == 1) {
667 cout << endl;
668 cout.precision(2);
669 cout.setf(ios::scientific, ios::floatfield);
670 for (i=0; i<numEigen; ++i) {
671 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
672 cout << " = " << normR[i] << endl;
673 }
674 cout << endl;
675 }
676 break;
677 }
678
679 // Update the sizes
680 if ((knownEV > 0) && (localSize > numEigen)) {
681 if (localSize == numEigen + workingSize)
682 localSize = 2*numEigen - knownEV;
683 if (localSize == numEigen + 2*workingSize)
684 localSize = 3*numEigen - 2*knownEV;
685 }
686
687 // Put the unconverged vectors at the end if needed
688
689 for (i=0; i<numEigen; ++i) {
690 if (normR[i] >= tolEigenSolve) {
691 firstIndex = i;
692 break;
693 }
694 }
695
696 // Continue the loop if no motion of vectors is necessary
697 if (firstIndex == numEigen-1)
698 continue;
699
700 while (firstIndex < knownEV) {
701
702 for (j = firstIndex; j < numEigen; ++j) {
703 if (normR[j] < tolEigenSolve) {
704 callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
705 callFortran.SWAP(xr, X.Values() + j*xr, 1, X.Values() + firstIndex*xr, 1);
706 callFortran.SWAP(xr, KX.Values() + j*xr, 1, KX.Values() + firstIndex*xr, 1);
707 callFortran.SWAP(xr, R.Values() + j*xr, 1, R.Values() + firstIndex*xr, 1);
708 callFortran.SWAP(xr, H.Values() + j*xr, 1, H.Values() + firstIndex*xr, 1);
709 callFortran.SWAP(xr, KH.Values() + j*xr, 1, KH.Values() + firstIndex*xr, 1);
710 callFortran.SWAP(xr, P.Values() + j*xr, 1, P.Values() + firstIndex*xr, 1);
711 callFortran.SWAP(xr, KP.Values() + j*xr, 1, KP.Values() + firstIndex*xr, 1);
712 if (M) {
713 callFortran.SWAP(xr, MX.Values() + j*xr, 1, MX.Values() + firstIndex*xr, 1);
714 callFortran.SWAP(xr, MH.Values() + j*xr, 1, MH.Values() + firstIndex*xr, 1);
715 callFortran.SWAP(xr, MP.Values() + j*xr, 1, MP.Values() + firstIndex*xr, 1);
716 }
717 break;
718 }
719 }
720
721 for (i = firstIndex; i < numEigen; ++i) {
722 if (normR[i] >= tolEigenSolve) {
723 firstIndex = i;
724 break;
725 }
726 }
727
728 }
729
730 } // for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter)
731 timeOuterLoop += MyWatch.WallTime();
732 highMem = (highMem > currentSize()) ? highMem : currentSize();
733
734 // Clean memory
735 delete[] work1;
736 delete[] work2;
737 if (vectWeight)
738 delete vectWeight;
739
740 // Sort the eigenpairs
741 timePostProce -= MyWatch.WallTime();
742 if ((info == 0) && (knownEV > 0)) {
743 mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
744 }
745 timePostProce += MyWatch.WallTime();
746
747 return (info == 0) ? knownEV : info;
748
749}
750
751
752void KnyazevLOBPCG::accuracyCheck(const Epetra_MultiVector *X, const Epetra_MultiVector *MX,
753 const Epetra_MultiVector *R) const {
754
755 cout.precision(2);
756 cout.setf(ios::scientific, ios::floatfield);
757 double tmp;
758
759 int myPid = MyComm.MyPID();
760
761 if (X) {
762 if (M) {
763 if (MX) {
764 tmp = myVerify.errorEquality(X, MX, M);
765 if (myPid == 0)
766 cout << " >> Difference between MX and M*X = " << tmp << endl;
767 }
768 tmp = myVerify.errorOrthonormality(X, M);
769 if (myPid == 0)
770 cout << " >> Error in X^T M X - I = " << tmp << endl;
771 }
772 else {
773 tmp = myVerify.errorOrthonormality(X, 0);
774 if (myPid == 0)
775 cout << " >> Error in X^T X - I = " << tmp << endl;
776 }
777 }
778
779 if ((R) && (X)) {
780 tmp = myVerify.errorOrthogonality(X, R);
781 if (myPid == 0)
782 cout << " >> Orthogonality X^T R up to " << tmp << endl;
783 }
784
785}
786
787
788void KnyazevLOBPCG::initializeCounters() {
789
790 historyCount = 0;
791 if (resHistory) {
792 delete[] resHistory;
793 resHistory = 0;
794 }
795
796 memRequested = 0.0;
797 highMem = 0.0;
798
799 massOp = 0;
800 numRestart = 0;
801 outerIter = 0;
802 precOp = 0;
803 residual = 0;
804 stifOp = 0;
805
806 timeLocalProj = 0.0;
807 timeLocalSolve = 0.0;
808 timeLocalUpdate = 0.0;
809 timeMassOp = 0.0;
810 timeNorm = 0.0;
811 timeOuterLoop = 0.0;
812 timePostProce = 0.0;
813 timePrecOp = 0.0;
814 timeResidual = 0.0;
815 timeStifOp = 0.0;
816
817
818}
819
820
821void KnyazevLOBPCG::algorithmInfo() const {
822
823 int myPid = MyComm.MyPID();
824
825 if (myPid ==0) {
826 cout << " Algorithm: LOBPCG (Knyazev's version) with Cholesky-based local eigensolver\n";
827 cout << " Block Size: " << blockSize << endl;
828 }
829
830}
831
832
833void KnyazevLOBPCG::historyInfo() const {
834
835 if (resHistory) {
836 int j;
837 cout << " Block Size: " << blockSize << endl;
838 cout << endl;
839 cout << " Residuals\n";
840 cout << endl;
841 cout.precision(4);
842 cout.setf(ios::scientific, ios::floatfield);
843 for (j = 0; j < historyCount; ++j) {
844 cout << resHistory[j] << endl;
845 }
846 cout << endl;
847 }
848
849}
850
851
852void KnyazevLOBPCG::memoryInfo() const {
853
854 int myPid = MyComm.MyPID();
855
856 double maxHighMem = 0.0;
857 double tmp = highMem;
858 MyComm.MaxAll(&tmp, &maxHighMem, 1);
859
860 double maxMemRequested = 0.0;
861 tmp = memRequested;
862 MyComm.MaxAll(&tmp, &maxMemRequested, 1);
863
864 if (myPid == 0) {
865 cout.precision(2);
866 cout.setf(ios::fixed, ios::floatfield);
867 cout << " Memory requested per processor by the eigensolver = (EST) ";
868 cout.width(6);
869 cout << maxMemRequested << " MB " << endl;
870 cout << endl;
871 cout << " High water mark in eigensolver = (EST) ";
872 cout.width(6);
873 cout << maxHighMem << " MB " << endl;
874 cout << endl;
875 }
876
877}
878
879
880void KnyazevLOBPCG::operationInfo() const {
881
882 int myPid = MyComm.MyPID();
883
884 if (myPid == 0) {
885 cout << " --- Operations ---\n\n";
886 cout << " Total number of mass matrix multiplications = ";
887 cout.width(9);
888 cout << massOp << endl;
889 cout << " Total number of stiffness matrix operations = ";
890 cout.width(9);
891 cout << stifOp << endl;
892 cout << " Total number of preconditioner applications = ";
893 cout.width(9);
894 cout << precOp << endl;
895 cout << " Total number of computed eigen-residuals = ";
896 cout.width(9);
897 cout << residual << endl;
898 cout << "\n";
899 cout << " Total number of outer iterations = ";
900 cout.width(9);
901 cout << outerIter << endl;
902 cout << " Number of restarts = ";
903 cout.width(9);
904 cout << numRestart << endl;
905 cout << "\n";
906 }
907
908}
909
910
911void KnyazevLOBPCG::timeInfo() const {
912
913 int myPid = MyComm.MyPID();
914
915 if (myPid == 0) {
916 cout << " --- Timings ---\n\n";
917 cout.setf(ios::fixed, ios::floatfield);
918 cout.precision(2);
919 cout << " Total time for outer loop = ";
920 cout.width(9);
921 cout << timeOuterLoop << " s" << endl;
922 cout << " Time for mass matrix operations = ";
923 cout.width(9);
924 cout << timeMassOp << " s ";
925 cout.width(5);
926 cout << 100*timeMassOp/timeOuterLoop << " %\n";
927 cout << " Time for stiffness matrix operations = ";
928 cout.width(9);
929 cout << timeStifOp << " s ";
930 cout.width(5);
931 cout << 100*timeStifOp/timeOuterLoop << " %\n";
932 cout << " Time for preconditioner applications = ";
933 cout.width(9);
934 cout << timePrecOp << " s ";
935 cout.width(5);
936 cout << 100*timePrecOp/timeOuterLoop << " %\n";
937 cout << " Time for local projection = ";
938 cout.width(9);
939 cout << timeLocalProj << " s ";
940 cout.width(5);
941 cout << 100*timeLocalProj/timeOuterLoop << " %\n";
942 cout << " Time for local eigensolve = ";
943 cout.width(9);
944 cout << timeLocalSolve << " s ";
945 cout.width(5);
946 cout << 100*timeLocalSolve/timeOuterLoop << " %\n";
947 cout << " Time for local update = ";
948 cout.width(9);
949 cout << timeLocalUpdate << " s ";
950 cout.width(5);
951 cout << 100*timeLocalUpdate/timeOuterLoop << " %\n";
952 cout << " Time for residual computations = ";
953 cout.width(9);
954 cout << timeResidual << " s ";
955 cout.width(5);
956 cout << 100*timeResidual/timeOuterLoop << " %\n";
957 cout << " Time for residuals norm computations = ";
958 cout.width(9);
959 cout << timeNorm << " s ";
960 cout.width(5);
961 cout << 100*timeNorm/timeOuterLoop << " %\n";
962 cout << "\n";
963 cout << " Total time for post-processing = ";
964 cout.width(9);
965 cout << timePostProce << " s\n";
966 cout << endl;
967 } // if (myPid == 0)
968
969}
970
971