Anasazi Version of the Day
Loading...
Searching...
No Matches
LOBPCG.cpp
1// @HEADER
2// *****************************************************************************
3// Anasazi: Block Eigensolvers Package
4//
5// Copyright 2004 NTESS and the Anasazi contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10//**************************************************************************
11//
12// NOTICE
13//
14// This software is a result of the research described in the cout
15//
16// " A comparison of algorithms for modal analysis in the absence
17// of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
18// Sandia National Laboratories, Technical cout SAND2003-1028J.
19//
20// It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
21// framework ( http://software.sandia.gov/trilinos/ ).
22//
23// The distribution of this software follows also the rules defined in Trilinos.
24// This notice shall be marked on any reproduction of this software, in whole or
25// in part.
26//
27// This program is distributed in the hope that it will be useful, but
28// WITHOUT ANY WARRANTY; without even the implied warranty of
29// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
30//
31// Code Authors: U. Hetmaniuk (ulhetma@sandia.gov), R. Lehoucq (rblehou@sandia.gov)
32//
33//**************************************************************************
34
35#include "LOBPCG.h"
36
37
38LOBPCG::LOBPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
39 const Epetra_Operator *PP, int _blk,
40 double _tol, int _maxIter, int _verb) :
41 myVerify(_Comm),
42 callBLAS(),
43 callFortran(),
44 modalTool(_Comm),
45 mySort(),
46 MyComm(_Comm),
47 K(KK),
48 M(0),
49 Prec(PP),
50 MyWatch(_Comm),
51 tolEigenSolve(_tol),
52 maxIterEigenSolve(_maxIter),
53 blockSize(_blk),
54 normWeight(0),
55 verbose(_verb),
56 historyCount(0),
57 resHistory(0),
58 memRequested(0.0),
59 highMem(0.0),
60 massOp(0),
61 numRestart(0),
62 outerIter(0),
63 precOp(0),
64 residual(0),
65 stifOp(0),
66 timeLocalProj(0.0),
67 timeLocalSolve(0.0),
68 timeLocalUpdate(0.0),
69 timeMassOp(0.0),
70 timeNorm(0.0),
71 timeOrtho(0.0),
72 timeOuterLoop(0.0),
73 timePostProce(0.0),
74 timePrecOp(0.0),
75 timeResidual(0.0),
76 timeRestart(0.0),
77 timeStifOp(0.0)
78 {
79
80}
81
82
83LOBPCG::LOBPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
84 const Epetra_Operator *MM, const Epetra_Operator *PP, int _blk,
85 double _tol, int _maxIter, int _verb,
86 double *_weight) :
87 myVerify(_Comm),
88 callBLAS(),
89 callFortran(),
90 modalTool(_Comm),
91 mySort(),
92 MyComm(_Comm),
93 K(KK),
94 M(MM),
95 Prec(PP),
96 MyWatch(_Comm),
97 tolEigenSolve(_tol),
98 maxIterEigenSolve(_maxIter),
99 blockSize(_blk),
100 normWeight(_weight),
101 verbose(_verb),
102 historyCount(0),
103 resHistory(0),
104 memRequested(0.0),
105 highMem(0.0),
106 massOp(0),
107 numRestart(0),
108 outerIter(0),
109 precOp(0),
110 residual(0),
111 stifOp(0),
112 timeLocalProj(0.0),
113 timeLocalSolve(0.0),
114 timeLocalUpdate(0.0),
115 timeMassOp(0.0),
116 timeNorm(0.0),
117 timeOrtho(0.0),
118 timeOuterLoop(0.0),
119 timePostProce(0.0),
120 timePrecOp(0.0),
121 timeResidual(0.0),
122 timeRestart(0.0),
123 timeStifOp(0.0)
124 {
125
126}
127
128
129LOBPCG::~LOBPCG() {
130
131 if (resHistory) {
132 delete[] resHistory;
133 resHistory = 0;
134 }
135
136}
137
138
139int LOBPCG::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {
140
141 // Computes the smallest eigenvalues and the corresponding eigenvectors
142 // of the generalized eigenvalue problem
143 //
144 // K X = M X Lambda
145 //
146 // using a modified Locally Optimal Block Preconditioned Conjugate
147 // Gradient method.
148 //
149 // Note that if M is not specified, then K X = X Lambda is solved.
150 //
151 // Ref: A. Knyazev, "Toward the optimal preconditioned eigensolver:
152 // Locally optimal block preconditioner conjugate gradient method",
153 // SIAM J. Sci. Comput., vol 23, n 2, pp. 517-541
154 //
155 // Input variables:
156 //
157 // numEigen (integer) = Number of eigenmodes requested
158 //
159 // Q (Epetra_MultiVector) = Converged eigenvectors
160 // The number of columns of Q must be equal to numEigen + blockSize.
161 // The rows of Q are distributed across processors.
162 // At exit, the first numEigen columns contain the eigenvectors requested.
163 //
164 // lambda (array of doubles) = Converged eigenvalues
165 // At input, it must be of size numEigen + blockSize.
166 // At exit, the first numEigen locations contain the eigenvalues requested.
167 //
168 // startingEV (integer) = Number of existing converged eigenmodes
169 //
170 // Return information on status of computation
171 //
172 // info >= 0 >> Number of converged eigenpairs at the end of computation
173 //
174 // // Failure due to input arguments
175 //
176 // info = - 1 >> The stiffness matrix K has not been specified.
177 // info = - 2 >> The maps for the matrix K and the matrix M differ.
178 // info = - 3 >> The maps for the matrix K and the preconditioner P differ.
179 // info = - 4 >> The maps for the vectors and the matrix K differ.
180 // info = - 5 >> Q is too small for the number of eigenvalues requested.
181 // info = - 6 >> Q is too small for the computation parameters.
182 //
183 // info = - 10 >> Failure during the mass orthonormalization
184 //
185 // info = - 20 >> Error in LAPACK during the local eigensolve
186 //
187 // info = - 30 >> MEMORY
188 //
189
190 // Check the input parameters
191
192 if (numEigen <= startingEV) {
193 return numEigen;
194 }
195
196 int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, minimumSpaceDimension(numEigen));
197 if (info < 0)
198 return info;
199
200 int myPid = MyComm.MyPID();
201
202 // Get the weight for approximating the M-inverse norm
203 Epetra_Vector *vectWeight = 0;
204 if (normWeight) {
205 vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
206 }
207
208 int knownEV = startingEV;
209 int localVerbose = verbose*(myPid==0);
210
211 // Define local block vectors
212 //
213 // MX = Working vectors (storing M*X if M is specified, else pointing to X)
214 // KX = Working vectors (storing K*X)
215 //
216 // R = Residuals
217 //
218 // H = Preconditioned search space
219 // MH = Working vectors (storing M*H if M is specified, else pointing to H)
220 // KH = Working vectors (storing K*H)
221 //
222 // P = Search directions
223 // MP = Working vectors (storing M*P if M is specified, else pointing to P)
224 // KP = Working vectors (storing K*P)
225
226 int xr = Q.MyLength();
227 Epetra_MultiVector X(View, Q, numEigen, blockSize);
228 X.Random();
229
230 int tmp;
231 tmp = (M == 0) ? 6*blockSize*xr : 9*blockSize*xr;
232
233 double *work1 = new (nothrow) double[tmp];
234 if (work1 == 0) {
235 if (vectWeight)
236 delete vectWeight;
237 info = -30;
238 return info;
239 }
240 memRequested += sizeof(double)*tmp/(1024.0*1024.0);
241
242 highMem = (highMem > currentSize()) ? highMem : currentSize();
243
244 double *tmpD = work1;
245
246 Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize);
247 tmpD = tmpD + xr*blockSize;
248
249 Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize);
250 tmpD = (M) ? tmpD + xr*blockSize : tmpD;
251
252 Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);
253 tmpD = tmpD + xr*blockSize;
254
255 Epetra_MultiVector H(View, Q.Map(), tmpD, xr, blockSize);
256 tmpD = tmpD + xr*blockSize;
257
258 Epetra_MultiVector KH(View, Q.Map(), tmpD, xr, blockSize);
259 tmpD = tmpD + xr*blockSize;
260
261 Epetra_MultiVector MH(View, Q.Map(), (M) ? tmpD : H.Values(), xr, blockSize);
262 tmpD = (M) ? tmpD + xr*blockSize : tmpD;
263
264 Epetra_MultiVector P(View, Q.Map(), tmpD, xr, blockSize);
265 tmpD = tmpD + xr*blockSize;
266
267 Epetra_MultiVector KP(View, Q.Map(), tmpD, xr, blockSize);
268 tmpD = tmpD + xr*blockSize;
269
270 Epetra_MultiVector MP(View, Q.Map(), (M) ? tmpD : P.Values(), xr, blockSize);
271
272 // Define arrays
273 //
274 // theta = Store the local eigenvalues (size: 3*blockSize)
275 // normR = Store the norm of residuals (size: blockSize)
276 //
277 // MM = Local mass matrix (size: 3*blockSize x 3*blockSize)
278 // KK = Local stiffness matrix (size: 3*blockSize x 3*blockSize)
279 //
280 // S = Local eigenvectors (size: 3*blockSize x 3*blockSize)
281
282 int lwork2;
283 lwork2 = 4*blockSize + 27*blockSize*blockSize;
284 double *work2 = new (nothrow) double[lwork2];
285 if (work2 == 0) {
286 if (vectWeight)
287 delete vectWeight;
288 delete[] work1;
289 info = -30;
290 return info;
291 }
292
293 highMem = (highMem > currentSize()) ? highMem : currentSize();
294
295 tmpD = work2;
296
297 double *theta = tmpD;
298 tmpD = tmpD + 3*blockSize;
299
300 double *normR = tmpD;
301 tmpD = tmpD + blockSize;
302
303 double *MM = tmpD;
304 tmpD = tmpD + 9*blockSize*blockSize;
305
306 double *KK = tmpD;
307 tmpD = tmpD + 9*blockSize*blockSize;
308
309 double *S = tmpD;
310
311 memRequested += sizeof(double)*lwork2/(1024.0*1024.0);
312
313 // Define an array to store the residuals history
314 if (localVerbose > 2) {
315 resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
316 if (resHistory == 0) {
317 if (vectWeight)
318 delete vectWeight;
319 delete[] work1;
320 delete[] work2;
321 info = -30;
322 return info;
323 }
324 historyCount = 0;
325 }
326
327 // Miscellaneous definitions
328
329 bool reStart = false;
330 numRestart = 0;
331
332 int localSize;
333 int twoBlocks = 2*blockSize;
334 int threeBlocks = 3*blockSize;
335 int nFound = blockSize;
336 int i, j;
337
338 if (localVerbose > 0) {
339 cout << endl;
340 cout << " *|* Problem: ";
341 if (M)
342 cout << "K*Q = M*Q D ";
343 else
344 cout << "K*Q = Q D ";
345 if (Prec)
346 cout << " with preconditioner";
347 cout << endl;
348 cout << " *|* Algorithm = LOBPCG" << endl;
349 cout << " *|* Size of blocks = " << blockSize << endl;
350 cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
351 cout.precision(2);
352 cout.setf(ios::scientific, ios::floatfield);
353 cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
354 cout << " *|* Norm used for convergence: ";
355 if (normWeight){
356 cout << "weighted L2-norm with user-provided weights" << endl;
357 }
358 else {
359 cout << "L^2-norm" << endl;
360 }
361 if (startingEV > 0)
362 cout << " *|* Input converged eigenvectors = " << startingEV << endl;
363 cout << "\n -- Start iterations -- \n";
364 }
365
366 timeOuterLoop -= MyWatch.WallTime();
367 for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {
368
369 highMem = (highMem > currentSize()) ? highMem : currentSize();
370
371 if ((outerIter == 1) || (reStart == true)) {
372
373 reStart = false;
374 localSize = blockSize;
375
376 if (nFound > 0) {
377
378 Epetra_MultiVector X2(View, X, blockSize-nFound, nFound);
379 Epetra_MultiVector MX2(View, MX, blockSize-nFound, nFound);
380 Epetra_MultiVector KX2(View, KX, blockSize-nFound, nFound);
381
382 // Apply the mass matrix to X
383 timeMassOp -= MyWatch.WallTime();
384 if (M)
385 M->Apply(X2, MX2);
386 timeMassOp += MyWatch.WallTime();
387 massOp += nFound;
388
389 if (knownEV > 0) {
390 // Orthonormalize X against the known eigenvectors with Gram-Schmidt
391 // Note: Use R as a temporary work space
392 Epetra_MultiVector copyQ(View, Q, 0, knownEV);
393 timeOrtho -= MyWatch.WallTime();
394 info = modalTool.massOrthonormalize(X, MX, M, copyQ, nFound, 1, R.Values());
395 timeOrtho += MyWatch.WallTime();
396 // Exit the code if the orthogonalization did not succeed
397 if (info < 0) {
398 info = -10;
399 if (vectWeight)
400 delete vectWeight;
401 delete[] work1;
402 delete[] work2;
403 return info;
404 }
405 }
406
407 // Apply the stiffness matrix to X
408 timeStifOp -= MyWatch.WallTime();
409 K->Apply(X2, KX2);
410 timeStifOp += MyWatch.WallTime();
411 stifOp += nFound;
412
413 } // if (nFound > 0)
414
415 } // if ((outerIter == 1) || (reStart == true))
416 else {
417
418 // Apply the preconditioner on the residuals
419 if (Prec) {
420 timePrecOp -= MyWatch.WallTime();
421 Prec->ApplyInverse(R, H);
422 timePrecOp += MyWatch.WallTime();
423 precOp += blockSize;
424 }
425 else {
426 memcpy(H.Values(), R.Values(), xr*blockSize*sizeof(double));
427 }
428
429 // Apply the mass matrix on H
430 timeMassOp -= MyWatch.WallTime();
431 if (M)
432 M->Apply(H, MH);
433 timeMassOp += MyWatch.WallTime();
434 massOp += blockSize;
435
436 if (knownEV > 0) {
437 // Orthogonalize H against the known eigenvectors
438 // Note: Use R as a temporary work space
439 Epetra_MultiVector copyQ(View, Q, 0, knownEV);
440 timeOrtho -= MyWatch.WallTime();
441 modalTool.massOrthonormalize(H, MH, M, copyQ, blockSize, 1, R.Values());
442 timeOrtho += MyWatch.WallTime();
443 }
444
445 // Apply the stiffness matrix to H
446 timeStifOp -= MyWatch.WallTime();
447 K->Apply(H, KH);
448 timeStifOp += MyWatch.WallTime();
449 stifOp += blockSize;
450
451 if (localSize == blockSize)
452 localSize += blockSize;
453
454 } // if ((outerIter == 1) || (reStart==true))
455
456 // Form "local" mass and stiffness matrices
457 // Note: Use S as a temporary workspace
458 timeLocalProj -= MyWatch.WallTime();
459 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KX.Values(), xr,
460 KK, localSize, S);
461 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MX.Values(), xr,
462 MM, localSize, S);
463 if (localSize > blockSize) {
464 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KH.Values(), xr,
465 KK + blockSize*localSize, localSize, S);
466 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, KH.Values(), xr,
467 KK + blockSize*localSize + blockSize, localSize, S);
468 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MH.Values(), xr,
469 MM + blockSize*localSize, localSize, S);
470 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, MH.Values(), xr,
471 MM + blockSize*localSize + blockSize, localSize, S);
472 if (localSize > twoBlocks) {
473 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KP.Values(), xr,
474 KK + twoBlocks*localSize, localSize, S);
475 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, KP.Values(), xr,
476 KK + twoBlocks*localSize + blockSize, localSize, S);
477 modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, KP.Values(), xr,
478 KK + twoBlocks*localSize + twoBlocks, localSize, S);
479 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MP.Values(), xr,
480 MM + twoBlocks*localSize, localSize, S);
481 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, MP.Values(), xr,
482 MM + twoBlocks*localSize + blockSize, localSize, S);
483 modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, MP.Values(), xr,
484 MM + twoBlocks*localSize + twoBlocks, localSize, S);
485 } // if (localSize > twoBlocks)
486 } // if (localSize > blockSize)
487 timeLocalProj += MyWatch.WallTime();
488
489 // Perform a spectral decomposition
490 timeLocalSolve -= MyWatch.WallTime();
491 int nevLocal = localSize;
492 info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
493 S, localSize, theta, localVerbose,
494 (blockSize == 1) ? 1 : 0);
495 timeLocalSolve += MyWatch.WallTime();
496
497 if (info < 0) {
498 // Stop when spectral decomposition has a critical failure
499 break;
500 } // if (info < 0)
501
502 // Check for restarting
503 if ((theta[0] < 0.0) || (nevLocal < blockSize)) {
504 if (localVerbose > 0) {
505 cout << " Iteration " << outerIter;
506 cout << "- Failure for spectral decomposition - RESTART with new random search\n";
507 }
508 if (blockSize == 1) {
509 X.Random();
510 nFound = blockSize;
511 }
512 else {
513 Epetra_MultiVector Xinit(View, X, 1, blockSize-1);
514 Xinit.Random();
515 nFound = blockSize - 1;
516 } // if (blockSize == 1)
517 reStart = true;
518 numRestart += 1;
519 info = 0;
520 continue;
521 } // if ((theta[0] < 0.0) || (nevLocal < blockSize))
522
523 if ((localSize == twoBlocks) && (nevLocal == blockSize)) {
524 for (j = 0; j < nevLocal; ++j)
525 memcpy(S + j*blockSize, S + j*twoBlocks, blockSize*sizeof(double));
526 localSize = blockSize;
527 }
528
529 if ((localSize == threeBlocks) && (nevLocal <= twoBlocks)) {
530 for (j = 0; j < nevLocal; ++j)
531 memcpy(S + j*twoBlocks, S + j*threeBlocks, twoBlocks*sizeof(double));
532 localSize = twoBlocks;
533 }
534
535 // Compute the residuals
536 timeResidual -= MyWatch.WallTime();
537 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KX.Values(), xr,
538 S, localSize, 0.0, R.Values(), xr);
539 if (localSize >= twoBlocks) {
540 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KH.Values(), xr,
541 S + blockSize, localSize, 1.0, R.Values(), xr);
542 if (localSize == threeBlocks) {
543 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
544 S + twoBlocks, localSize, 1.0, R.Values(), xr);
545 }
546 }
547 for (j = 0; j < blockSize; ++j)
548 callBLAS.SCAL(localSize, theta[j], S + j*localSize);
549 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MX.Values(), xr,
550 S, localSize, 1.0, R.Values(), xr);
551 if (localSize >= twoBlocks) {
552 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MH.Values(), xr,
553 S + blockSize, localSize, 1.0, R.Values(), xr);
554 if (localSize == threeBlocks) {
555 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MP.Values(), xr,
556 S + twoBlocks, localSize, 1.0, R.Values(), xr);
557 }
558 }
559 for (j = 0; j < blockSize; ++j)
560 callBLAS.SCAL(localSize, 1.0/theta[j], S + j*localSize);
561 timeResidual += MyWatch.WallTime();
562
563 // Compute the norms of the residuals
564 timeNorm -= MyWatch.WallTime();
565 if (vectWeight)
566 R.NormWeighted(*vectWeight, normR);
567 else
568 R.Norm2(normR);
569
570 // Scale the norms of residuals with the eigenvalues
571 // Count the converged eigenvectors
572 nFound = 0;
573 for (j = 0; j < blockSize; ++j) {
574 normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
575 if (normR[j] < tolEigenSolve)
576 nFound += 1;
577 }
578 timeNorm += MyWatch.WallTime();
579
580 // Store the residual history
581 if (localVerbose > 2) {
582 memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double));
583 historyCount += 1;
584 }
585
586 // Print information on current iteration
587 if (localVerbose > 0) {
588 cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
589 cout << knownEV + nFound << endl;
590 }
591
592 if (localVerbose > 1) {
593 cout << endl;
594 cout.precision(2);
595 cout.setf(ios::scientific, ios::floatfield);
596 for (i=0; i<blockSize; ++i) {
597 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
598 cout << " = " << normR[i] << endl;
599 }
600 cout << endl;
601 cout.precision(2);
602 for (i=0; i<blockSize; ++i) {
603 cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
604 cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
605 cout << " = " << theta[i] << endl;
606 }
607 cout << endl;
608 }
609
610 if (nFound == 0) {
611 // Update the spaces
612 // Note: Use R as a temporary work space
613 timeLocalUpdate -= MyWatch.WallTime();
614 memcpy(R.Values(), X.Values(), xr*blockSize*sizeof(double));
615 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, S, localSize,
616 0.0, X.Values(), xr);
617 memcpy(R.Values(), KX.Values(), xr*blockSize*sizeof(double));
618 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, S, localSize,
619 0.0, KX.Values(), xr);
620 if (M) {
621 memcpy(R.Values(), MX.Values(), xr*blockSize*sizeof(double));
622 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, S, localSize,
623 0.0, MX.Values(), xr);
624 }
625 if (localSize == twoBlocks) {
626 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr,
627 S+blockSize, localSize, 0.0, P.Values(), xr);
628 callBLAS.AXPY(xr*blockSize, 1.0, P.Values(), X.Values());
629 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KH.Values(), xr,
630 S+blockSize, localSize, 0.0, KP.Values(), xr);
631 callBLAS.AXPY(xr*blockSize, 1.0, KP.Values(), KX.Values());
632 if (M) {
633 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, MH.Values(), xr,
634 S+blockSize, localSize, 0.0, MP.Values(), xr);
635 callBLAS.AXPY(xr*blockSize, 1.0, MP.Values(), MX.Values());
636 }
637 } // if (localSize == twoBlocks)
638 if (localSize == threeBlocks) {
639 memcpy(R.Values(), P.Values(), xr*blockSize*sizeof(double));
640 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
641 S + twoBlocks, localSize, 0.0, P.Values(), xr);
642 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr,
643 S + blockSize, localSize, 1.0, P.Values(), xr);
644 callBLAS.AXPY(xr*blockSize, 1.0, P.Values(), X.Values());
645 memcpy(R.Values(), KP.Values(), xr*blockSize*sizeof(double));
646 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
647 S + twoBlocks, localSize, 0.0, KP.Values(), xr);
648 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KH.Values(), xr,
649 S + blockSize, localSize, 1.0, KP.Values(), xr);
650 callBLAS.AXPY(xr*blockSize, 1.0, KP.Values(), KX.Values());
651 if (M) {
652 memcpy(R.Values(), MP.Values(), xr*blockSize*sizeof(double));
653 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
654 S + twoBlocks, localSize, 0.0, MP.Values(), xr);
655 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, MH.Values(), xr,
656 S + blockSize, localSize, 1.0, MP.Values(), xr);
657 callBLAS.AXPY(xr*blockSize, 1.0, MP.Values(), MX.Values());
658 }
659 } // if (localSize == threeBlocks)
660 timeLocalUpdate += MyWatch.WallTime();
661 // Compute the new residuals
662 timeResidual -= MyWatch.WallTime();
663 memcpy(R.Values(), KX.Values(), blockSize*xr*sizeof(double));
664 for (j = 0; j < blockSize; ++j)
665 callBLAS.AXPY(xr, -theta[j], MX.Values() + j*xr, R.Values() + j*xr);
666 timeResidual += MyWatch.WallTime();
667 // When required, monitor some orthogonalities
668 if (verbose > 2) {
669 if (knownEV == 0) {
670 accuracyCheck(&X, &MX, &R, 0, 0, 0);
671 }
672 else {
673 Epetra_MultiVector copyQ(View, Q, 0, knownEV);
674 accuracyCheck(&X, &MX, &R, &copyQ, (localSize>blockSize) ? &H : 0,
675 (localSize>twoBlocks) ? &P : 0);
676 }
677 } // if (verbose > 2)
678 if (localSize < threeBlocks)
679 localSize += blockSize;
680 continue;
681 } // if (nFound == 0)
682
683 // Order the Ritz eigenvectors by putting the converged vectors at the beginning
684 int firstIndex = blockSize;
685 for (j = 0; j < blockSize; ++j) {
686 if (normR[j] >= tolEigenSolve) {
687 firstIndex = j;
688 break;
689 }
690 } // for (j = 0; j < blockSize; ++j)
691 while (firstIndex < nFound) {
692 for (j = firstIndex; j < blockSize; ++j) {
693 if (normR[j] < tolEigenSolve) {
694 // Swap the j-th and firstIndex-th position
695 callFortran.SWAP(localSize, S + j*localSize, 1, S + firstIndex*localSize, 1);
696 callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
697 callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
698 break;
699 }
700 } // for (j = firstIndex; j < blockSize; ++j)
701 for (j = 0; j < blockSize; ++j) {
702 if (normR[j] >= tolEigenSolve) {
703 firstIndex = j;
704 break;
705 }
706 } // for (j = 0; j < blockSize; ++j)
707 } // while (firstIndex < nFound)
708
709 // Copy the converged eigenvalues
710 memcpy(lambda + knownEV, theta, nFound*sizeof(double));
711
712 // Convergence test
713 if (knownEV + nFound >= numEigen) {
714 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
715 S, localSize, 0.0, R.Values(), xr);
716 if (localSize >= twoBlocks) {
717 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, H.Values(), xr,
718 S + blockSize, localSize, 1.0, R.Values(), xr);
719 if (localSize == threeBlocks) {
720 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
721 S + twoBlocks, localSize, 1.0, R.Values(), xr);
722 }
723 }
724 memcpy(Q.Values() + knownEV*xr, R.Values(), nFound*xr*sizeof(double));
725 knownEV += nFound;
726 if (localVerbose == 1) {
727 cout << endl;
728 cout.precision(2);
729 cout.setf(ios::scientific, ios::floatfield);
730 for (i=0; i<blockSize; ++i) {
731 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
732 cout << " = " << normR[i] << endl;
733 }
734 cout << endl;
735 }
736 break;
737 }
738
739 // Store the converged eigenvalues and eigenvectors
740 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
741 S, localSize, 0.0, Q.Values() + knownEV*xr, xr);
742 if (localSize >= twoBlocks) {
743 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, H.Values(), xr,
744 S + blockSize, localSize, 1.0, Q.Values() + knownEV*xr, xr);
745 if (localSize >= threeBlocks)
746 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
747 S + twoBlocks, localSize, 1.0, Q.Values() + knownEV*xr, xr);
748 }
749 knownEV += nFound;
750
751 // Define the restarting vectors
752 timeRestart -= MyWatch.WallTime();
753 int leftOver = (nevLocal < blockSize + nFound) ? nevLocal - nFound : blockSize;
754 double *Snew = S + nFound*localSize;
755 memcpy(R.Values(), X.Values(), blockSize*xr*sizeof(double));
756 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, R.Values(), xr,
757 Snew, localSize, 0.0, X.Values(), xr);
758 memcpy(R.Values(), KX.Values(), blockSize*xr*sizeof(double));
759 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, R.Values(), xr,
760 Snew, localSize, 0.0, KX.Values(), xr);
761 if (M) {
762 memcpy(R.Values(), MX.Values(), blockSize*xr*sizeof(double));
763 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, R.Values(), xr,
764 Snew, localSize, 0.0, MX.Values(), xr);
765 }
766 if (localSize >= twoBlocks) {
767 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
768 Snew+blockSize, localSize, 1.0, X.Values(), xr);
769 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, KH.Values(), xr,
770 Snew+blockSize, localSize, 1.0, KX.Values(), xr);
771 if (M) {
772 callBLAS.GEMM('N','N', xr, leftOver, blockSize, 1.0, MH.Values(), xr,
773 Snew+blockSize, localSize, 1.0, MX.Values(), xr);
774 }
775 if (localSize >= threeBlocks) {
776 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, P.Values(), xr,
777 Snew+twoBlocks, localSize, 1.0, X.Values(), xr);
778 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, KP.Values(), xr,
779 Snew+twoBlocks, localSize, 1.0, KX.Values(), xr);
780 if (M) {
781 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, MP.Values(), xr,
782 Snew+twoBlocks, localSize, 1.0, MX.Values(), xr);
783 }
784 }
785 }
786 if (nevLocal < blockSize + nFound) {
787 // Put new random vectors at the end of the block
788 Epetra_MultiVector Xtmp(View, X, leftOver, blockSize - leftOver);
789 Xtmp.Random();
790 }
791 else {
792 nFound = 0;
793 }
794 reStart = true;
795 timeRestart += MyWatch.WallTime();
796
797 } // for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter)
798 timeOuterLoop += MyWatch.WallTime();
799 highMem = (highMem > currentSize()) ? highMem : currentSize();
800
801 // Clean memory
802 delete[] work1;
803 delete[] work2;
804 if (vectWeight)
805 delete vectWeight;
806
807 // Sort the eigenpairs
808 timePostProce -= MyWatch.WallTime();
809 if ((info == 0) && (knownEV > 0)) {
810 mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
811 }
812 timePostProce += MyWatch.WallTime();
813
814 return (info == 0) ? knownEV : info;
815
816}
817
818
819void LOBPCG::accuracyCheck(const Epetra_MultiVector *X, const Epetra_MultiVector *MX,
820 const Epetra_MultiVector *R, const Epetra_MultiVector *Q,
821 const Epetra_MultiVector *H, const Epetra_MultiVector *P) const {
822
823 cout.precision(2);
824 cout.setf(ios::scientific, ios::floatfield);
825 double tmp;
826
827 int myPid = MyComm.MyPID();
828
829 if (X) {
830 if (M) {
831 if (MX) {
832 tmp = myVerify.errorEquality(X, MX, M);
833 if (myPid == 0)
834 cout << " >> Difference between MX and M*X = " << tmp << endl;
835 }
836 tmp = myVerify.errorOrthonormality(X, M);
837 if (myPid == 0)
838 cout << " >> Error in X^T M X - I = " << tmp << endl;
839 }
840 else {
841 tmp = myVerify.errorOrthonormality(X, 0);
842 if (myPid == 0)
843 cout << " >> Error in X^T X - I = " << tmp << endl;
844 }
845 }
846
847 if ((R) && (X)) {
848 tmp = myVerify.errorOrthogonality(X, R);
849 if (myPid == 0)
850 cout << " >> Orthogonality X^T R up to " << tmp << endl;
851 }
852
853 if (Q == 0)
854 return;
855
856 if (M) {
857 tmp = myVerify.errorOrthonormality(Q, M);
858 if (myPid == 0)
859 cout << " >> Error in Q^T M Q - I = " << tmp << endl;
860 if (X) {
861 tmp = myVerify.errorOrthogonality(Q, X, M);
862 if (myPid == 0)
863 cout << " >> Orthogonality Q^T M X up to " << tmp << endl;
864 }
865 if (H) {
866 tmp = myVerify.errorOrthogonality(Q, H, M);
867 if (myPid == 0)
868 cout << " >> Orthogonality Q^T M H up to " << tmp << endl;
869 }
870 if (P) {
871 tmp = myVerify.errorOrthogonality(Q, P, M);
872 if (myPid == 0)
873 cout << " >> Orthogonality Q^T M P up to " << tmp << endl;
874 }
875 }
876 else {
877 tmp = myVerify.errorOrthonormality(Q, 0);
878 if (myPid == 0)
879 cout << " >> Error in Q^T Q - I = " << tmp << endl;
880 if (X) {
881 tmp = myVerify.errorOrthogonality(Q, X, 0);
882 if (myPid == 0)
883 cout << " >> Orthogonality Q^T X up to " << tmp << endl;
884 }
885 if (H) {
886 tmp = myVerify.errorOrthogonality(Q, H, 0);
887 if (myPid == 0)
888 cout << " >> Orthogonality Q^T H up to " << tmp << endl;
889 }
890 if (P) {
891 tmp = myVerify.errorOrthogonality(Q, P, 0);
892 if (myPid == 0)
893 cout << " >> Orthogonality Q^T P up to " << tmp << endl;
894 }
895 }
896
897}
898
899
900void LOBPCG::initializeCounters() {
901
902 historyCount = 0;
903 if (resHistory) {
904 delete[] resHistory;
905 resHistory = 0;
906 }
907
908 memRequested = 0.0;
909 highMem = 0.0;
910
911 massOp = 0;
912 numRestart = 0;
913 outerIter = 0;
914 precOp = 0;
915 residual = 0;
916 stifOp = 0;
917
918 timeLocalProj = 0.0;
919 timeLocalSolve = 0.0;
920 timeLocalUpdate = 0.0;
921 timeMassOp = 0.0;
922 timeNorm = 0.0;
923 timeOrtho = 0.0;
924 timeOuterLoop = 0.0;
925 timePostProce = 0.0;
926 timePrecOp = 0.0;
927 timeResidual = 0.0;
928 timeRestart = 0.0;
929 timeStifOp = 0.0;
930
931
932}
933
934
935void LOBPCG::algorithmInfo() const {
936
937 int myPid = MyComm.MyPID();
938
939 if (myPid ==0) {
940 cout << " Algorithm: LOBPCG (block version) with Cholesky-based local eigensolver\n";
941 cout << " Block Size: " << blockSize << endl;
942 }
943
944}
945
946
947void LOBPCG::historyInfo() const {
948
949 if (resHistory) {
950 int j;
951 cout << " Block Size: " << blockSize << endl;
952 cout << endl;
953 cout << " Residuals\n";
954 cout << endl;
955 cout.precision(4);
956 cout.setf(ios::scientific, ios::floatfield);
957 for (j = 0; j < historyCount; ++j) {
958 int ii;
959 for (ii = 0; ii < blockSize; ++ii)
960 cout << resHistory[blockSize*j + ii] << endl;
961 }
962 cout << endl;
963 }
964
965}
966
967
968void LOBPCG::memoryInfo() const {
969
970 int myPid = MyComm.MyPID();
971
972 double maxHighMem = 0.0;
973 double tmp = highMem;
974 MyComm.MaxAll(&tmp, &maxHighMem, 1);
975
976 double maxMemRequested = 0.0;
977 tmp = memRequested;
978 MyComm.MaxAll(&tmp, &maxMemRequested, 1);
979
980 if (myPid == 0) {
981 cout.precision(2);
982 cout.setf(ios::fixed, ios::floatfield);
983 cout << " Memory requested per processor by the eigensolver = (EST) ";
984 cout.width(6);
985 cout << maxMemRequested << " MB " << endl;
986 cout << endl;
987 cout << " High water mark in eigensolver = (EST) ";
988 cout.width(6);
989 cout << maxHighMem << " MB " << endl;
990 cout << endl;
991 }
992
993}
994
995
996void LOBPCG::operationInfo() const {
997
998 int myPid = MyComm.MyPID();
999
1000 if (myPid == 0) {
1001 cout << " --- Operations ---\n\n";
1002 cout << " Total number of mass matrix multiplications = ";
1003 cout.width(9);
1004 cout << massOp + modalTool.getNumProj_MassMult() + modalTool.getNumNorm_MassMult() << endl;
1005 cout << " Mass multiplications in solver loop = ";
1006 cout.width(9);
1007 cout << massOp << endl;
1008 cout << " Mass multiplications for orthogonalization = ";
1009 cout.width(9);
1010 cout << modalTool.getNumProj_MassMult() << endl;
1011 cout << " Mass multiplications for normalization = ";
1012 cout.width(9);
1013 cout << modalTool.getNumNorm_MassMult() << endl;
1014 cout << " Total number of stiffness matrix operations = ";
1015 cout.width(9);
1016 cout << stifOp << endl;
1017 cout << " Total number of preconditioner applications = ";
1018 cout.width(9);
1019 cout << precOp << endl;
1020 cout << " Total number of computed eigen-residuals = ";
1021 cout.width(9);
1022 cout << residual << endl;
1023 cout << "\n";
1024 cout << " Total number of outer iterations = ";
1025 cout.width(9);
1026 cout << outerIter << endl;
1027 cout << " Number of restarts = ";
1028 cout.width(9);
1029 cout << numRestart << endl;
1030 cout << "\n";
1031 }
1032
1033}
1034
1035
1036void LOBPCG::timeInfo() const {
1037
1038 int myPid = MyComm.MyPID();
1039
1040 if (myPid == 0) {
1041 cout << " --- Timings ---\n\n";
1042 cout.setf(ios::fixed, ios::floatfield);
1043 cout.precision(2);
1044 cout << " Total time for outer loop = ";
1045 cout.width(9);
1046 cout << timeOuterLoop << " s" << endl;
1047 cout << " Time for mass matrix operations = ";
1048 cout.width(9);
1049 cout << timeMassOp << " s ";
1050 cout.width(5);
1051 cout << 100*timeMassOp/timeOuterLoop << " %\n";
1052 cout << " Time for stiffness matrix operations = ";
1053 cout.width(9);
1054 cout << timeStifOp << " s ";
1055 cout.width(5);
1056 cout << 100*timeStifOp/timeOuterLoop << " %\n";
1057 cout << " Time for preconditioner applications = ";
1058 cout.width(9);
1059 cout << timePrecOp << " s ";
1060 cout.width(5);
1061 cout << 100*timePrecOp/timeOuterLoop << " %\n";
1062 cout << " Time for orthogonalizations = ";
1063 cout.width(9);
1064 cout << timeOrtho << " s ";
1065 cout.width(5);
1066 cout << 100*timeOrtho/timeOuterLoop << " %\n";
1067 cout << " Projection step : ";
1068 cout.width(9);
1069 cout << modalTool.getTimeProj() << " s\n";
1070 cout << " Q^T mult. :: ";
1071 cout.width(9);
1072 cout << modalTool.getTimeProj_QtMult() << " s\n";
1073 cout << " Q mult. :: ";
1074 cout.width(9);
1075 cout << modalTool.getTimeProj_QMult() << " s\n";
1076 cout << " Mass mult. :: ";
1077 cout.width(9);
1078 cout << modalTool.getTimeProj_MassMult() << " s\n";
1079 cout << " Normalization step : ";
1080 cout.width(9);
1081 cout << modalTool.getTimeNorm() << " s\n";
1082 cout << " Mass mult. :: ";
1083 cout.width(9);
1084 cout << modalTool.getTimeNorm_MassMult() << " s\n";
1085 cout << " Time for local projection = ";
1086 cout.width(9);
1087 cout << timeLocalProj << " s ";
1088 cout.width(5);
1089 cout << 100*timeLocalProj/timeOuterLoop << " %\n";
1090 cout << " Time for local eigensolve = ";
1091 cout.width(9);
1092 cout << timeLocalSolve << " s ";
1093 cout.width(5);
1094 cout << 100*timeLocalSolve/timeOuterLoop << " %\n";
1095 cout << " Time for local update = ";
1096 cout.width(9);
1097 cout << timeLocalUpdate << " s ";
1098 cout.width(5);
1099 cout << 100*timeLocalUpdate/timeOuterLoop << " %\n";
1100 cout << " Time for residual computations = ";
1101 cout.width(9);
1102 cout << timeResidual << " s ";
1103 cout.width(5);
1104 cout << 100*timeResidual/timeOuterLoop << " %\n";
1105 cout << " Time for residuals norm computations = ";
1106 cout.width(9);
1107 cout << timeNorm << " s ";
1108 cout.width(5);
1109 cout << 100*timeNorm/timeOuterLoop << " %\n";
1110 cout << " Time for restarting space definition = ";
1111 cout.width(9);
1112 cout << timeRestart << " s ";
1113 cout.width(5);
1114 cout << 100*timeRestart/timeOuterLoop << " %\n";
1115 cout << "\n";
1116 cout << " Total time for post-processing = ";
1117 cout.width(9);
1118 cout << timePostProce << " s\n";
1119 cout << endl;
1120 } // if (myPid == 0)
1121
1122}
1123
1124