Anasazi Version of the Day
Loading...
Searching...
No Matches
LOBPCG_light.cpp
1// @HEADER
2// *****************************************************************************
3// Anasazi: Block Eigensolvers Package
4//
5// Copyright 2004 NTESS and the Anasazi contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10//**************************************************************************
11//
12// NOTICE
13//
14// This software is a result of the research described in the cout
15//
16// " A comparison of algorithms for modal analysis in the absence
17// of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
18// Sandia National Laboratories, Technical cout SAND2003-1028J.
19//
20// It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
21// framework ( http://software.sandia.gov/trilinos/ ).
22//
23// The distribution of this software follows also the rules defined in Trilinos.
24// This notice shall be marked on any reproduction of this software, in whole or
25// in part.
26//
27// This program is distributed in the hope that it will be useful, but
28// WITHOUT ANY WARRANTY; without even the implied warranty of
29// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
30//
31// Code Authors: U. Hetmaniuk (ulhetma@sandia.gov), R. Lehoucq (rblehou@sandia.gov)
32//
33//**************************************************************************
34
35#include "LOBPCG_light.h"
36
37
38LOBPCG_light::LOBPCG_light(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
39 const Epetra_Operator *PP, int _blk,
40 double _tol, int _maxIter, int _verb) :
41 myVerify(_Comm),
42 callBLAS(),
43 callFortran(),
44 modalTool(_Comm),
45 mySort(),
46 MyComm(_Comm),
47 K(KK),
48 M(0),
49 Prec(PP),
50 MyWatch(_Comm),
51 tolEigenSolve(_tol),
52 maxIterEigenSolve(_maxIter),
53 blockSize(_blk),
54 normWeight(0),
55 verbose(_verb),
56 historyCount(0),
57 resHistory(0),
58 memRequested(0.0),
59 highMem(0.0),
60 massOp(0),
61 numRestart(0),
62 outerIter(0),
63 precOp(0),
64 residual(0),
65 stifOp(0),
66 timeLocalProj(0.0),
67 timeLocalSolve(0.0),
68 timeLocalUpdate(0.0),
69 timeMassOp(0.0),
70 timeNorm(0.0),
71 timeOrtho(0.0),
72 timeOuterLoop(0.0),
73 timePostProce(0.0),
74 timePrecOp(0.0),
75 timeResidual(0.0),
76 timeRestart(0.0),
77 timeStifOp(0.0)
78 {
79
80}
81
82
83LOBPCG_light::LOBPCG_light(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
84 const Epetra_Operator *MM, const Epetra_Operator *PP, int _blk,
85 double _tol, int _maxIter, int _verb,
86 double *_weight) :
87 myVerify(_Comm),
88 callBLAS(),
89 callFortran(),
90 modalTool(_Comm),
91 mySort(),
92 MyComm(_Comm),
93 K(KK),
94 M(MM),
95 Prec(PP),
96 MyWatch(_Comm),
97 tolEigenSolve(_tol),
98 maxIterEigenSolve(_maxIter),
99 blockSize(_blk),
100 normWeight(_weight),
101 verbose(_verb),
102 historyCount(0),
103 resHistory(0),
104 memRequested(0.0),
105 highMem(0.0),
106 massOp(0),
107 numRestart(0),
108 outerIter(0),
109 precOp(0),
110 residual(0),
111 stifOp(0),
112 timeLocalProj(0.0),
113 timeLocalSolve(0.0),
114 timeLocalUpdate(0.0),
115 timeMassOp(0.0),
116 timeNorm(0.0),
117 timeOrtho(0.0),
118 timeOuterLoop(0.0),
119 timePostProce(0.0),
120 timePrecOp(0.0),
121 timeResidual(0.0),
122 timeRestart(0.0),
123 timeStifOp(0.0)
124 {
125
126}
127
128
129LOBPCG_light::~LOBPCG_light() {
130
131 if (resHistory) {
132 delete[] resHistory;
133 resHistory = 0;
134 }
135
136}
137
138
139int LOBPCG_light::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {
140
141 // Computes the smallest eigenvalues and the corresponding eigenvectors
142 // of the generalized eigenvalue problem
143 //
144 // K X = M X Lambda
145 //
146 // using a modified Locally Optimal Block Preconditioned Conjugate
147 // Gradient method.
148 //
149 // Note that if M is not specified, then K X = X Lambda is solved.
150 //
151 // Ref: A. Knyazev, "Toward the optimal preconditioned eigensolver:
152 // Locally optimal block preconditioner conjugate gradient method",
153 // SIAM J. Sci. Comput., vol 23, n 2, pp. 517-541
154 //
155 // Input variables:
156 //
157 // numEigen (integer) = Number of eigenmodes requested
158 //
159 // Q (Epetra_MultiVector) = Converged eigenvectors
160 // The number of columns of Q must be equal to numEigen + blockSize.
161 // The rows of Q are distributed across processors.
162 // At exit, the first numEigen columns contain the eigenvectors requested.
163 //
164 // lambda (array of doubles) = Converged eigenvalues
165 // At input, it must be of size numEigen + blockSize.
166 // At exit, the first numEigen locations contain the eigenvalues requested.
167 //
168 // startingEV (integer) = Number of existing converged eigenmodes
169 //
170 // Return information on status of computation
171 //
172 // info >= 0 >> Number of converged eigenpairs at the end of computation
173 //
174 // // Failure due to input arguments
175 //
176 // info = - 1 >> The stiffness matrix K has not been specified.
177 // info = - 2 >> The maps for the matrix K and the matrix M differ.
178 // info = - 3 >> The maps for the matrix K and the preconditioner P differ.
179 // info = - 4 >> The maps for the vectors and the matrix K differ.
180 // info = - 5 >> Q is too small for the number of eigenvalues requested.
181 // info = - 6 >> Q is too small for the computation parameters.
182 //
183 // info = - 10 >> Failure during the mass orthonormalization
184 //
185 // info = - 20 >> Error in LAPACK during the local eigensolve
186 //
187 // info = - 30 >> MEMORY
188 //
189
190 // Check the input parameters
191
192 if (numEigen <= startingEV) {
193 return numEigen;
194 }
195
196 int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, minimumSpaceDimension(numEigen));
197 if (info < 0)
198 return info;
199
200 int myPid = MyComm.MyPID();
201
202 // Get the weight for approximating the M-inverse norm
203 Epetra_Vector *vectWeight = 0;
204 if (normWeight) {
205 vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
206 }
207
208 int knownEV = startingEV;
209 int localVerbose = verbose*(myPid==0);
210
211 // Define local block vectors
212 //
213 // R = Residuals
214 //
215 // H = Preconditioned search space
216 //
217 // P = Search directions
218
219 int xr = Q.MyLength();
220 Epetra_MultiVector X(View, Q, numEigen, blockSize);
221 X.Random();
222
223 int tmp;
224 tmp = 3*blockSize*xr;
225
226 double *work1 = new (nothrow) double[tmp];
227 if (work1 == 0) {
228 if (vectWeight)
229 delete vectWeight;
230 info = -30;
231 return info;
232 }
233 memRequested += sizeof(double)*tmp/(1024.0*1024.0);
234
235 highMem = (highMem > currentSize()) ? highMem : currentSize();
236
237 double *tmpD = work1;
238
239 Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);
240 tmpD = tmpD + xr*blockSize;
241
242 Epetra_MultiVector H(View, Q.Map(), tmpD, xr, blockSize);
243 tmpD = tmpD + xr*blockSize;
244
245 Epetra_MultiVector P(View, Q.Map(), tmpD, xr, blockSize);
246 tmpD = tmpD + xr*blockSize;
247
248 // Define arrays
249 //
250 // theta = Store the local eigenvalues (size: 3*blockSize)
251 // normR = Store the norm of residuals (size: blockSize)
252 //
253 // MM = Local mass matrix (size: 3*blockSize x 3*blockSize)
254 // KK = Local stiffness matrix (size: 3*blockSize x 3*blockSize)
255 //
256 // S = Local eigenvectors (size: 3*blockSize x 3*blockSize)
257
258 int lwork2;
259 lwork2 = 4*blockSize + 27*blockSize*blockSize;
260 double *work2 = new (nothrow) double[lwork2];
261 if (work2 == 0) {
262 if (vectWeight)
263 delete vectWeight;
264 delete[] work1;
265 info = -30;
266 return info;
267 }
268
269 highMem = (highMem > currentSize()) ? highMem : currentSize();
270
271 tmpD = work2;
272
273 double *theta = tmpD;
274 tmpD = tmpD + 3*blockSize;
275
276 double *normR = tmpD;
277 tmpD = tmpD + blockSize;
278
279 double *MM = tmpD;
280 tmpD = tmpD + 9*blockSize*blockSize;
281
282 double *KK = tmpD;
283 tmpD = tmpD + 9*blockSize*blockSize;
284
285 double *S = tmpD;
286
287 memRequested += sizeof(double)*lwork2/(1024.0*1024.0);
288
289 // Define an array to store the residuals history
290 if (localVerbose > 2) {
291 resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
292 if (resHistory == 0) {
293 if (vectWeight)
294 delete vectWeight;
295 delete[] work1;
296 delete[] work2;
297 info = -30;
298 return info;
299 }
300 historyCount = 0;
301 }
302
303 // Miscellaneous definitions
304
305 bool reStart = false;
306 numRestart = 0;
307
308 int localSize;
309 int twoBlocks = 2*blockSize;
310 int threeBlocks = 3*blockSize;
311 int nFound = blockSize;
312 int i, j;
313
314 double minNorm = 1000*tolEigenSolve;
315 bool hasNextVectors = false;
316
317 if (localVerbose > 0) {
318 cout << endl;
319 cout << " *|* Problem: ";
320 if (M)
321 cout << "K*Q = M*Q D ";
322 else
323 cout << "K*Q = Q D ";
324 if (Prec)
325 cout << " with preconditioner";
326 cout << endl;
327 cout << " *|* Algorithm = LOBPCG (Small memory requirements)" << endl;
328 cout << " *|* Size of blocks = " << blockSize << endl;
329 cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
330 cout.precision(2);
331 cout.setf(ios::scientific, ios::floatfield);
332 cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
333 cout << " *|* Norm used for convergence: ";
334 if (normWeight){
335 cout << "weighted L2-norm with user-provided weights" << endl;
336 }
337 else {
338 cout << "L^2-norm" << endl;
339 }
340 if (startingEV > 0)
341 cout << " *|* Input converged eigenvectors = " << startingEV << endl;
342 cout << "\n -- Start iterations -- \n";
343 }
344
345 timeOuterLoop -= MyWatch.WallTime();
346 for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {
347
348 highMem = (highMem > currentSize()) ? highMem : currentSize();
349
350 if ((outerIter == 1) || (reStart == true)) {
351
352 reStart = false;
353 localSize = blockSize;
354
355 hasNextVectors = false;
356 minNorm = 1000*tolEigenSolve;
357
358 // Apply the mass matrix to X
359 // Note: Use P as work space
360 timeMassOp -= MyWatch.WallTime();
361 if (M)
362 M->Apply(X, P);
363 else
364 memcpy(P.Values(), X.Values(), xr*blockSize*sizeof(double));
365 timeMassOp += MyWatch.WallTime();
366 massOp += blockSize;
367
368 if ((knownEV > 0) && (nFound > 0)) {
369 // Orthonormalize X against the known eigenvectors with Gram-Schmidt
370 // Note: Use R as a temporary work space
371 Epetra_MultiVector copyQ(View, Q, 0, knownEV);
372 timeOrtho -= MyWatch.WallTime();
373 info = modalTool.massOrthonormalize(X, P, M, copyQ, nFound, 1, R.Values());
374 timeOrtho += MyWatch.WallTime();
375 // Exit the code if the orthogonalization did not succeed
376 if (info < 0) {
377 info = -10;
378 if (vectWeight)
379 delete vectWeight;
380 delete[] work1;
381 delete[] work2;
382 return info;
383 }
384 } // if ((knownEV > 0) && (nFound > 0))
385
386 } // if ((outerIter == 1) || (reStart == true))
387 else {
388
389 // Apply the preconditioner on the residuals
390 if (Prec) {
391 timePrecOp -= MyWatch.WallTime();
392 Prec->ApplyInverse(R, H);
393 timePrecOp += MyWatch.WallTime();
394 precOp += blockSize;
395 }
396 else {
397 memcpy(H.Values(), R.Values(), xr*blockSize*sizeof(double));
398 }
399
400 // Apply the mass matrix on H
401 timeMassOp -= MyWatch.WallTime();
402 if (M)
403 M->Apply(H, R);
404 else
405 memcpy(R.Values(), H.Values(), xr*blockSize*sizeof(double));
406 timeMassOp += MyWatch.WallTime();
407 massOp += blockSize;
408
409 if (knownEV > 0) {
410 // Orthogonalize H against the known eigenvectors
411 Epetra_MultiVector copyQ(View, Q, 0, knownEV);
412 timeOrtho -= MyWatch.WallTime();
413 modalTool.massOrthonormalize(H, R, M, copyQ, blockSize, 1);
414 timeOrtho += MyWatch.WallTime();
415 }
416
417 if (localSize == blockSize)
418 localSize += blockSize;
419
420 } // if ((outerIter == 1) || (reStart==true))
421
422 // Form "local" mass and stiffness matrices
423 // Note: Use S as a temporary workspace
424 if (localSize == blockSize) {
425 // P stores M*X
426 timeLocalProj -= MyWatch.WallTime();
427 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, P.Values(), xr,
428 MM, localSize, S);
429 timeLocalProj += MyWatch.WallTime();
430 // Put K*X in H
431 timeStifOp -= MyWatch.WallTime();
432 K->Apply(X, H);
433 timeStifOp += MyWatch.WallTime();
434 stifOp += blockSize;
435 timeLocalProj -= MyWatch.WallTime();
436 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, H.Values(), xr,
437 KK, localSize, S);
438 timeLocalProj += MyWatch.WallTime();
439 }
440 else {
441 // Put diagonal matrices in first block
442 timeLocalProj -= MyWatch.WallTime();
443 for (i = 0; i < blockSize; ++i) {
444 memset(KK + i*localSize, 0, i*sizeof(double));
445 KK[i + i*localSize] = theta[i];
446 memset(MM + i*localSize, 0, i*sizeof(double));
447 MM[i + i*localSize] = 1.0;
448 }
449 timeLocalProj += MyWatch.WallTime();
450 // R stores M*H
451 timeLocalProj -= MyWatch.WallTime();
452 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, R.Values(), xr,
453 MM + blockSize*localSize, localSize, S);
454 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, R.Values(), xr,
455 MM + blockSize*localSize + blockSize, localSize, S);
456 timeLocalProj += MyWatch.WallTime();
457 // Put K*H in R
458 timeStifOp -= MyWatch.WallTime();
459 K->Apply(H, R);
460 timeStifOp += MyWatch.WallTime();
461 stifOp += blockSize;
462 timeLocalProj -= MyWatch.WallTime();
463 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, R.Values(), xr,
464 KK + blockSize*localSize, localSize, S);
465 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, R.Values(), xr,
466 KK + blockSize*localSize + blockSize, localSize, S);
467 timeLocalProj += MyWatch.WallTime();
468 if (localSize > twoBlocks) {
469 // Put M*P in R
470 timeMassOp -= MyWatch.WallTime();
471 if (M)
472 M->Apply(P, R);
473 else
474 memcpy(R.Values(), P.Values(), xr*blockSize*sizeof(double));
475 timeMassOp += MyWatch.WallTime();
476 massOp += blockSize;
477 timeLocalProj -= MyWatch.WallTime();
478 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, R.Values(), xr,
479 MM + twoBlocks*localSize, localSize, S);
480 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, R.Values(), xr,
481 MM + twoBlocks*localSize + blockSize, localSize, S);
482 modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, R.Values(), xr,
483 MM + twoBlocks*localSize + twoBlocks, localSize, S);
484 timeLocalProj += MyWatch.WallTime();
485 // Put K*P in R
486 timeStifOp -= MyWatch.WallTime();
487 K->Apply(P, R);
488 timeStifOp += MyWatch.WallTime();
489 stifOp += blockSize;
490 timeLocalProj -= MyWatch.WallTime();
491 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, R.Values(), xr,
492 KK + twoBlocks*localSize, localSize, S);
493 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, R.Values(), xr,
494 KK + twoBlocks*localSize + blockSize, localSize, S);
495 modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, R.Values(), xr,
496 KK + twoBlocks*localSize + twoBlocks, localSize, S);
497 timeLocalProj += MyWatch.WallTime();
498 } // if (localSize > twoBlocks)
499 } // if (localSize == blockSize)
500
501 // Perform a spectral decomposition
502 timeLocalSolve -= MyWatch.WallTime();
503 int nevLocal = localSize;
504 info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
505 S, localSize, theta, localVerbose,
506 (blockSize == 1) ? 1 : 0);
507 timeLocalSolve += MyWatch.WallTime();
508
509 if (info < 0) {
510 // Stop when spectral decomposition has a critical failure
511 break;
512 } // if (info < 0)
513
514 // Check for restarting
515 if ((theta[0] < 0.0) || (nevLocal < blockSize)) {
516 if (localVerbose > 0) {
517 cout << " Iteration " << outerIter;
518 cout << "- Failure for spectral decomposition - RESTART with new random search\n";
519 }
520 if (blockSize == 1) {
521 X.Random();
522 nFound = blockSize;
523 }
524 else {
525 Epetra_MultiVector Xinit(View, X, 1, blockSize-1);
526 Xinit.Random();
527 nFound = blockSize - 1;
528 } // if (blockSize == 1)
529 reStart = true;
530 numRestart += 1;
531 info = 0;
532 continue;
533 } // if ((theta[0] < 0.0) || (nevLocal < blockSize))
534
535 if ((localSize == twoBlocks) && (nevLocal == blockSize)) {
536 for (j = 0; j < nevLocal; ++j)
537 memcpy(S + j*blockSize, S + j*twoBlocks, blockSize*sizeof(double));
538 localSize = blockSize;
539 }
540
541 if ((localSize == threeBlocks) && (nevLocal <= twoBlocks)) {
542 for (j = 0; j < nevLocal; ++j)
543 memcpy(S + j*twoBlocks, S + j*threeBlocks, twoBlocks*sizeof(double));
544 localSize = twoBlocks;
545 }
546
547 // Update the iterate and, if needed, define the next Ritz vectors
548 if (localSize == twoBlocks) {
549 if (minNorm < 10*tolEigenSolve) {
550 timeRestart -= MyWatch.WallTime();
551 int numVec = (numEigen > knownEV + blockSize) ? blockSize : numEigen - knownEV - 1;
552 double *pointerS = S + blockSize*localSize;
553 double *pointerQ = Q.Values() + knownEV*xr;
554 callBLAS.GEMM('N', 'N', xr, numVec, blockSize, 1.0, X.Values(), xr,
555 pointerS, localSize, 0.0, pointerQ, xr);
556 callBLAS.GEMM('N', 'N', xr, numVec, blockSize, 1.0, H.Values(), xr,
557 pointerS + blockSize, localSize, 1.0, pointerQ, xr);
558 hasNextVectors = true;
559 timeRestart += MyWatch.WallTime();
560 }
561 timeLocalUpdate -= MyWatch.WallTime();
562 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr,
563 S + blockSize, localSize, 0.0, P.Values(), xr);
564 memcpy(R.Values(), X.Values(), xr*blockSize*sizeof(double));
565 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
566 S, localSize, 0.0, X.Values(), xr);
567 X.Update(1.0, P, 1.0);
568 timeLocalUpdate += MyWatch.WallTime();
569 }
570 if (localSize == threeBlocks) {
571 if (minNorm < 10*tolEigenSolve) {
572 timeRestart -= MyWatch.WallTime();
573 int numVec = (numEigen > knownEV + blockSize) ? blockSize : numEigen - knownEV - 1;
574 double *pointerS = S + blockSize*localSize;
575 double *pointerQ = Q.Values() + knownEV*xr;
576 callBLAS.GEMM('N', 'N', xr, numVec, blockSize, 1.0, X.Values(), xr,
577 pointerS, localSize, 0.0, pointerQ, xr);
578 // Note: We use the contiguity of [H, P] in the memory
579 callBLAS.GEMM('N', 'N', xr, numVec, twoBlocks, 1.0, H.Values(), xr,
580 pointerS + blockSize, localSize, 1.0, pointerQ, xr);
581 hasNextVectors = true;
582 timeRestart += MyWatch.WallTime();
583 }
584 timeLocalUpdate -= MyWatch.WallTime();
585 memcpy(R.Values(), P.Values(), xr*blockSize*sizeof(double));
586 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr,
587 S + blockSize, localSize, 0.0, P.Values(), xr);
588 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
589 S + twoBlocks, localSize, 1.0, P.Values(), xr);
590 memcpy(R.Values(), X.Values(), xr*blockSize*sizeof(double));
591 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
592 S, localSize, 0.0, X.Values(), xr);
593 X.Update(1.0, P, 1.0);
594 timeLocalUpdate += MyWatch.WallTime();
595 }
596
597 // Compute the residuals
598 if (localSize == blockSize) {
599 timeResidual -= MyWatch.WallTime();
600 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, blockSize,
601 0.0, R.Values(), xr);
602 timeResidual += MyWatch.WallTime();
603 timeLocalUpdate -= MyWatch.WallTime();
604 memcpy(H.Values(), X.Values(), xr*blockSize*sizeof(double));
605 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, blockSize,
606 0.0, X.Values(), xr);
607 timeLocalUpdate += MyWatch.WallTime();
608 // Note: We scale S
609 timeResidual -= MyWatch.WallTime();
610 for (j = 0; j < blockSize; ++j)
611 callBLAS.SCAL(localSize, theta[j], S + j*localSize);
612 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, P.Values(), xr, S, blockSize,
613 1.0, R.Values(), xr);
614 timeResidual += MyWatch.WallTime();
615 // Note: S is not scaled back to its original value
616 }
617 else {
618 timeStifOp -= MyWatch.WallTime();
619 K->Apply(X, R);
620 timeStifOp += MyWatch.WallTime();
621 stifOp += blockSize;
622 timeMassOp -= MyWatch.WallTime();
623 if (M)
624 M->Apply(X, H);
625 else
626 memcpy(H.Values(), X.Values(), xr*blockSize*sizeof(double));
627 timeMassOp += MyWatch.WallTime();
628 massOp += blockSize;
629 timeResidual -= MyWatch.WallTime();
630 for (j = 0; j < blockSize; ++j)
631 callBLAS.AXPY(xr, -theta[j], H.Values() + j*xr, R.Values() + j*xr);
632 timeResidual += MyWatch.WallTime();
633 }
634
635 // Compute the norms of the residuals
636 timeNorm -= MyWatch.WallTime();
637 if (vectWeight)
638 R.NormWeighted(*vectWeight, normR);
639 else
640 R.Norm2(normR);
641
642 // Scale the norms of residuals with the eigenvalues
643 // Count the converged eigenvectors
644 nFound = 0;
645 for (j = 0; j < blockSize; ++j) {
646 normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
647 if (normR[j] < tolEigenSolve)
648 nFound += 1;
649 minNorm = (normR[j] < minNorm) ? normR[j] : minNorm;
650 }
651 timeNorm += MyWatch.WallTime();
652
653 // Store the residual history
654 if (localVerbose > 2) {
655 memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double));
656 historyCount += 1;
657 }
658
659 // Print information on current iteration
660 if (localVerbose > 0) {
661 cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
662 cout << knownEV + nFound << endl;
663 }
664
665 if (localVerbose > 1) {
666 cout << endl;
667 cout.precision(2);
668 cout.setf(ios::scientific, ios::floatfield);
669 for (i=0; i<blockSize; ++i) {
670 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
671 cout << " = " << normR[i] << endl;
672 }
673 cout << endl;
674 cout.precision(2);
675 for (i=0; i<blockSize; ++i) {
676 cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
677 cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
678 cout << " = " << theta[i] << endl;
679 }
680 cout << endl;
681 }
682
683 if (nFound == 0) {
684 // When required, monitor some orthogonalities
685 if (verbose > 2) {
686 if (knownEV == 0) {
687 accuracyCheck(&X, 0, &R, 0, 0, 0);
688 }
689 else {
690 Epetra_MultiVector copyQ(View, Q, 0, knownEV);
691 accuracyCheck(&X, 0, &R, &copyQ, (localSize>blockSize) ? &H : 0,
692 (localSize>twoBlocks) ? &P : 0);
693 }
694 } // if (verbose > 2)
695 if (localSize < threeBlocks)
696 localSize += blockSize;
697 continue;
698 } // if (nFound == 0)
699
700 // Exit the loop if enough vectors have converged
701 if (knownEV + nFound >= numEigen) {
702 for (j = 0; j < blockSize; ++j) {
703 if (normR[j] < tolEigenSolve) {
704 lambda[knownEV] = theta[j];
705 memcpy(Q.Values() + knownEV*xr, X.Values() + j*xr, xr*sizeof(double));
706 knownEV += 1;
707 }
708 }
709 if (localVerbose == 1) {
710 cout << endl;
711 cout.precision(2);
712 cout.setf(ios::scientific, ios::floatfield);
713 for (i=0; i<blockSize; ++i) {
714 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
715 cout << " = " << normR[i] << endl;
716 }
717 cout << endl;
718 }
719 break;
720 } // if (knownEV >= numEigen)
721
722 // Store the converged eigenpairs and define the new X iterate
723 if (hasNextVectors == true) {
724 // Q stores the next Ritz vectors
725 for (j = 0; j < blockSize; ++j) {
726 if (normR[j] < tolEigenSolve) {
727 lambda[knownEV] = theta[j];
728 callFortran.SWAP(xr, X.Values() + j*xr, 1, Q.Values() + knownEV*xr, 1);
729 knownEV += 1;
730 }
731 }
732 nFound = 0;
733 }
734 else {
735 for (j = 0; j < blockSize; ++j) {
736 if (normR[j] < tolEigenSolve) {
737 lambda[knownEV] = theta[j];
738 memcpy(Q.Values() + knownEV*xr, X.Values() + j*xr, xr*sizeof(double));
739 knownEV += 1;
740 }
741 }
742 // Replace converged vectors with random vectors
743 timeRestart -= MyWatch.WallTime();
744 nFound = 0;
745 for (j = 0; j < blockSize; ++j) {
746 if (normR[j] >= tolEigenSolve) {
747 if (nFound > 0)
748 memcpy(X.Values() + (j-nFound)*xr, X.Values() + j*xr, xr*sizeof(double));
749 }
750 else
751 nFound += 1;
752 }
753 if (nFound > 0) {
754 // Put new random vectors at the end of the block
755 Epetra_MultiVector Xtmp(View, X, blockSize - nFound, nFound);
756 Xtmp.Random();
757 }
758 timeRestart += MyWatch.WallTime();
759 }
760 reStart = true;
761
762 } // for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter)
763 timeOuterLoop += MyWatch.WallTime();
764 highMem = (highMem > currentSize()) ? highMem : currentSize();
765
766 // Clean memory
767 delete[] work1;
768 delete[] work2;
769 if (vectWeight)
770 delete vectWeight;
771
772 // Sort the eigenpairs
773 timePostProce -= MyWatch.WallTime();
774 if ((info == 0) && (knownEV > 0)) {
775 mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
776 }
777 timePostProce += MyWatch.WallTime();
778
779 return (info == 0) ? knownEV : info;
780
781}
782
783
784void LOBPCG_light::accuracyCheck(const Epetra_MultiVector *X, const Epetra_MultiVector *MX,
785 const Epetra_MultiVector *R, const Epetra_MultiVector *Q,
786 const Epetra_MultiVector *H, const Epetra_MultiVector *P) const {
787
788 cout.precision(2);
789 cout.setf(ios::scientific, ios::floatfield);
790 double tmp;
791
792 int myPid = MyComm.MyPID();
793
794 if (X) {
795 if (M) {
796 if (MX) {
797 tmp = myVerify.errorEquality(X, MX, M);
798 if (myPid == 0)
799 cout << " >> Difference between MX and M*X = " << tmp << endl;
800 }
801 tmp = myVerify.errorOrthonormality(X, M);
802 if (myPid == 0)
803 cout << " >> Error in X^T M X - I = " << tmp << endl;
804 }
805 else {
806 tmp = myVerify.errorOrthonormality(X, 0);
807 if (myPid == 0)
808 cout << " >> Error in X^T X - I = " << tmp << endl;
809 }
810 }
811
812 if ((R) && (X)) {
813 tmp = myVerify.errorOrthogonality(X, R);
814 if (myPid == 0)
815 cout << " >> Orthogonality X^T R up to " << tmp << endl;
816 }
817
818 if (Q == 0)
819 return;
820
821 if (M) {
822 tmp = myVerify.errorOrthonormality(Q, M);
823 if (myPid == 0)
824 cout << " >> Error in Q^T M Q - I = " << tmp << endl;
825 if (X) {
826 tmp = myVerify.errorOrthogonality(Q, X, M);
827 if (myPid == 0)
828 cout << " >> Orthogonality Q^T M X up to " << tmp << endl;
829 }
830 if (H) {
831 tmp = myVerify.errorOrthogonality(Q, H, M);
832 if (myPid == 0)
833 cout << " >> Orthogonality Q^T M H up to " << tmp << endl;
834 }
835 if (P) {
836 tmp = myVerify.errorOrthogonality(Q, P, M);
837 if (myPid == 0)
838 cout << " >> Orthogonality Q^T M P up to " << tmp << endl;
839 }
840 }
841 else {
842 tmp = myVerify.errorOrthonormality(Q, 0);
843 if (myPid == 0)
844 cout << " >> Error in Q^T Q - I = " << tmp << endl;
845 if (X) {
846 tmp = myVerify.errorOrthogonality(Q, X, 0);
847 if (myPid == 0)
848 cout << " >> Orthogonality Q^T X up to " << tmp << endl;
849 }
850 if (H) {
851 tmp = myVerify.errorOrthogonality(Q, H, 0);
852 if (myPid == 0)
853 cout << " >> Orthogonality Q^T H up to " << tmp << endl;
854 }
855 if (P) {
856 tmp = myVerify.errorOrthogonality(Q, P, 0);
857 if (myPid == 0)
858 cout << " >> Orthogonality Q^T P up to " << tmp << endl;
859 }
860 }
861
862}
863
864
865void LOBPCG_light::initializeCounters() {
866
867 historyCount = 0;
868 if (resHistory) {
869 delete[] resHistory;
870 resHistory = 0;
871 }
872
873 memRequested = 0.0;
874 highMem = 0.0;
875
876 massOp = 0;
877 numRestart = 0;
878 outerIter = 0;
879 precOp = 0;
880 residual = 0;
881 stifOp = 0;
882
883 timeLocalProj = 0.0;
884 timeLocalSolve = 0.0;
885 timeLocalUpdate = 0.0;
886 timeMassOp = 0.0;
887 timeNorm = 0.0;
888 timeOrtho = 0.0;
889 timeOuterLoop = 0.0;
890 timePostProce = 0.0;
891 timePrecOp = 0.0;
892 timeResidual = 0.0;
893 timeRestart = 0.0;
894 timeStifOp = 0.0;
895
896
897}
898
899
900void LOBPCG_light::algorithmInfo() const {
901
902 int myPid = MyComm.MyPID();
903
904 if (myPid ==0) {
905 cout << " Algorithm: LOBPCG (block version) with storage for 4 blocks of vectors\n";
906 cout << " Block Size: " << blockSize << endl;
907 }
908
909}
910
911
912void LOBPCG_light::historyInfo() const {
913
914 if (resHistory) {
915 int j;
916 cout << " Block Size: " << blockSize << endl;
917 cout << endl;
918 cout << " Residuals\n";
919 cout << endl;
920 cout.precision(4);
921 cout.setf(ios::scientific, ios::floatfield);
922 for (j = 0; j < historyCount; ++j) {
923 int ii;
924 for (ii = 0; ii < blockSize; ++ii)
925 cout << resHistory[blockSize*j + ii] << endl;
926 }
927 cout << endl;
928 }
929
930}
931
932
933void LOBPCG_light::memoryInfo() const {
934
935 int myPid = MyComm.MyPID();
936
937 double maxHighMem = 0.0;
938 double tmp = highMem;
939 MyComm.MaxAll(&tmp, &maxHighMem, 1);
940
941 double maxMemRequested = 0.0;
942 tmp = memRequested;
943 MyComm.MaxAll(&tmp, &maxMemRequested, 1);
944
945 if (myPid == 0) {
946 cout.precision(2);
947 cout.setf(ios::fixed, ios::floatfield);
948 cout << " Memory requested per processor by the eigensolver = (EST) ";
949 cout.width(6);
950 cout << maxMemRequested << " MB " << endl;
951 cout << endl;
952 cout << " High water mark in eigensolver = (EST) ";
953 cout.width(6);
954 cout << maxHighMem << " MB " << endl;
955 cout << endl;
956 }
957
958}
959
960
961void LOBPCG_light::operationInfo() const {
962
963 int myPid = MyComm.MyPID();
964
965 if (myPid == 0) {
966 cout << " --- Operations ---\n\n";
967 cout << " Total number of mass matrix multiplications = ";
968 cout.width(9);
969 cout << massOp + modalTool.getNumProj_MassMult() + modalTool.getNumNorm_MassMult() << endl;
970 cout << " Mass multiplications in solver loop = ";
971 cout.width(9);
972 cout << massOp << endl;
973 cout << " Mass multiplications for orthogonalization = ";
974 cout.width(9);
975 cout << modalTool.getNumProj_MassMult() << endl;
976 cout << " Mass multiplications for normalization = ";
977 cout.width(9);
978 cout << modalTool.getNumNorm_MassMult() << endl;
979 cout << " Total number of stiffness matrix operations = ";
980 cout.width(9);
981 cout << stifOp << endl;
982 cout << " Total number of preconditioner applications = ";
983 cout.width(9);
984 cout << precOp << endl;
985 cout << " Total number of computed eigen-residuals = ";
986 cout.width(9);
987 cout << residual << endl;
988 cout << "\n";
989 cout << " Total number of outer iterations = ";
990 cout.width(9);
991 cout << outerIter << endl;
992 cout << " Number of restarts = ";
993 cout.width(9);
994 cout << numRestart << endl;
995 cout << "\n";
996 }
997
998}
999
1000
1001void LOBPCG_light::timeInfo() const {
1002
1003 int myPid = MyComm.MyPID();
1004
1005 if (myPid == 0) {
1006 cout << " --- Timings ---\n\n";
1007 cout.setf(ios::fixed, ios::floatfield);
1008 cout.precision(2);
1009 cout << " Total time for outer loop = ";
1010 cout.width(9);
1011 cout << timeOuterLoop << " s" << endl;
1012 cout << " Time for mass matrix operations = ";
1013 cout.width(9);
1014 cout << timeMassOp << " s ";
1015 cout.width(5);
1016 cout << 100*timeMassOp/timeOuterLoop << " %\n";
1017 cout << " Time for stiffness matrix operations = ";
1018 cout.width(9);
1019 cout << timeStifOp << " s ";
1020 cout.width(5);
1021 cout << 100*timeStifOp/timeOuterLoop << " %\n";
1022 cout << " Time for preconditioner applications = ";
1023 cout.width(9);
1024 cout << timePrecOp << " s ";
1025 cout.width(5);
1026 cout << 100*timePrecOp/timeOuterLoop << " %\n";
1027 cout << " Time for orthogonalizations = ";
1028 cout.width(9);
1029 cout << timeOrtho << " s ";
1030 cout.width(5);
1031 cout << 100*timeOrtho/timeOuterLoop << " %\n";
1032 cout << " Projection step : ";
1033 cout.width(9);
1034 cout << modalTool.getTimeProj() << " s\n";
1035 cout << " Q^T mult. :: ";
1036 cout.width(9);
1037 cout << modalTool.getTimeProj_QtMult() << " s\n";
1038 cout << " Q mult. :: ";
1039 cout.width(9);
1040 cout << modalTool.getTimeProj_QMult() << " s\n";
1041 cout << " Mass mult. :: ";
1042 cout.width(9);
1043 cout << modalTool.getTimeProj_MassMult() << " s\n";
1044 cout << " Normalization step : ";
1045 cout.width(9);
1046 cout << modalTool.getTimeNorm() << " s\n";
1047 cout << " Mass mult. :: ";
1048 cout.width(9);
1049 cout << modalTool.getTimeNorm_MassMult() << " s\n";
1050 cout << " Time for local projection = ";
1051 cout.width(9);
1052 cout << timeLocalProj << " s ";
1053 cout.width(5);
1054 cout << 100*timeLocalProj/timeOuterLoop << " %\n";
1055 cout << " Time for local eigensolve = ";
1056 cout.width(9);
1057 cout << timeLocalSolve << " s ";
1058 cout.width(5);
1059 cout << 100*timeLocalSolve/timeOuterLoop << " %\n";
1060 cout << " Time for local update = ";
1061 cout.width(9);
1062 cout << timeLocalUpdate << " s ";
1063 cout.width(5);
1064 cout << 100*timeLocalUpdate/timeOuterLoop << " %\n";
1065 cout << " Time for residual computations = ";
1066 cout.width(9);
1067 cout << timeResidual << " s ";
1068 cout.width(5);
1069 cout << 100*timeResidual/timeOuterLoop << " %\n";
1070 cout << " Time for residuals norm computations = ";
1071 cout.width(9);
1072 cout << timeNorm << " s ";
1073 cout.width(5);
1074 cout << 100*timeNorm/timeOuterLoop << " %\n";
1075 cout << " Time for restarting space definition = ";
1076 cout.width(9);
1077 cout << timeRestart << " s ";
1078 cout.width(5);
1079 cout << 100*timeRestart/timeOuterLoop << " %\n";
1080 cout << "\n";
1081 cout << " Total time for post-processing = ";
1082 cout.width(9);
1083 cout << timePostProce << " s\n";
1084 cout << endl;
1085 } // if (myPid == 0)
1086
1087}
1088
1089