Anasazi Version of the Day
Loading...
Searching...
No Matches
BRQMIN.cpp
1// @HEADER
2// *****************************************************************************
3// Anasazi: Block Eigensolvers Package
4//
5// Copyright 2004 NTESS and the Anasazi contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10// This software is a result of the research described in the report
11//
12// "A comparison of algorithms for modal analysis in the absence
13// of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
14// Sandia National Laboratories, Technical report SAND2003-1028J.
15//
16// It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
17// framework ( http://trilinos.org/ ).
18
19#include "BRQMIN.h"
20
21
22BRQMIN::BRQMIN(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
23 const Epetra_Operator *PP, int _blk,
24 double _tol, int _maxIter, int _verb) :
25 myVerify(_Comm),
26 callBLAS(),
27 callFortran(),
28 callLAPACK(),
29 modalTool(_Comm),
30 mySort(),
31 MyComm(_Comm),
32 K(KK),
33 M(0),
34 Prec(PP),
35 MyWatch(_Comm),
36 tolEigenSolve(_tol),
37 maxIterEigenSolve(_maxIter),
38 blockSize(_blk),
39 normWeight(0),
40 verbose(_verb),
41 historyCount(0),
42 resHistory(0),
43 memRequested(0.0),
44 highMem(0.0),
45 massOp(0),
46 numRestart(0),
47 outerIter(0),
48 precOp(0),
49 residual(0),
50 stifOp(0),
51 timeLocalProj(0.0),
52 timeLocalSolve(0.0),
53 timeLocalUpdate(0.0),
54 timeMassOp(0.0),
55 timeNorm(0.0),
56 timeOrtho(0.0),
57 timeOuterLoop(0.0),
58 timePostProce(0.0),
59 timePrecOp(0.0),
60 timeResidual(0.0),
61 timeRestart(0.0),
62 timeSearchP(0.0),
63 timeStifOp(0.0)
64 {
65
66}
67
68
69BRQMIN::BRQMIN(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
70 const Epetra_Operator *MM, const Epetra_Operator *PP, int _blk,
71 double _tol, int _maxIter, int _verb,
72 double *_weight) :
73 myVerify(_Comm),
74 callBLAS(),
75 callFortran(),
76 callLAPACK(),
77 modalTool(_Comm),
78 mySort(),
79 MyComm(_Comm),
80 K(KK),
81 M(MM),
82 Prec(PP),
83 MyWatch(_Comm),
84 tolEigenSolve(_tol),
85 maxIterEigenSolve(_maxIter),
86 blockSize(_blk),
87 normWeight(_weight),
88 verbose(_verb),
89 historyCount(0),
90 resHistory(0),
91 memRequested(0.0),
92 highMem(0.0),
93 massOp(0),
94 numRestart(0),
95 outerIter(0),
96 precOp(0),
97 residual(0),
98 stifOp(0),
99 timeLocalProj(0.0),
100 timeLocalSolve(0.0),
101 timeLocalUpdate(0.0),
102 timeMassOp(0.0),
103 timeNorm(0.0),
104 timeOrtho(0.0),
105 timeOuterLoop(0.0),
106 timePostProce(0.0),
107 timePrecOp(0.0),
108 timeResidual(0.0),
109 timeRestart(0.0),
110 timeSearchP(0.0),
111 timeStifOp(0.0)
112 {
113
114}
115
116
117BRQMIN::~BRQMIN() {
118
119 if (resHistory) {
120 delete[] resHistory;
121 resHistory = 0;
122 }
123
124}
125
126
127int BRQMIN::solve(int numEigen, Epetra_MultiVector &Q, double *lambda) {
128
129 return BRQMIN::reSolve(numEigen, Q, lambda);
130
131}
132
133
134int BRQMIN::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {
135
136 // Computes the smallest eigenvalues and the corresponding eigenvectors
137 // of the generalized eigenvalue problem
138 //
139 // K X = M X Lambda
140 //
141 // using a Block Rayleigh Quotient MINimization.
142 //
143 // Note that if M is not specified, then K X = X Lambda is solved.
144 //
145 // Ref: P. Arbenz & R. Lehoucq, "A comparison of algorithms for modal analysis in the
146 // absence of a sparse direct method", SNL, Technical Report SAND2003-1028J
147 // With the notations of this report, the coefficient beta is defined as
148 // ( P^T K P )^{-1} P^T K H
149 //
150 // Input variables:
151 //
152 // numEigen (integer) = Number of eigenmodes requested
153 //
154 // Q (Epetra_MultiVector) = Converged eigenvectors
155 // The number of columns of Q must be equal to numEigen + blockSize.
156 // The rows of Q are distributed across processors.
157 // At exit, the first numEigen columns contain the eigenvectors requested.
158 //
159 // lambda (array of doubles) = Converged eigenvalues
160 // At input, it must be of size numEigen + blockSize.
161 // At exit, the first numEigen locations contain the eigenvalues requested.
162 //
163 // startingEV (integer) = Number of existing converged eigenmodes
164 //
165 // Return information on status of computation
166 //
167 // info >= 0 >> Number of converged eigenpairs at the end of computation
168 //
169 // // Failure due to input arguments
170 //
171 // info = - 1 >> The stiffness matrix K has not been specified.
172 // info = - 2 >> The maps for the matrix K and the matrix M differ.
173 // info = - 3 >> The maps for the matrix K and the preconditioner P differ.
174 // info = - 4 >> The maps for the vectors and the matrix K differ.
175 // info = - 5 >> Q is too small for the number of eigenvalues requested.
176 // info = - 6 >> Q is too small for the computation parameters.
177 //
178 // info = - 10 >> Failure during the mass orthonormalization
179 //
180 // info = - 20 >> Error in LAPACK during the local eigensolve
181 //
182 // info = - 30 >> MEMORY
183 //
184
185 // Check the input parameters
186
187 if (numEigen <= startingEV) {
188 return startingEV;
189 }
190
191 int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, numEigen + blockSize);
192 if (info < 0)
193 return info;
194
195 int myPid = MyComm.MyPID();
196
197 // Get the weight for approximating the M-inverse norm
198 Epetra_Vector *vectWeight = 0;
199 if (normWeight) {
200 vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
201 }
202
203 int knownEV = startingEV;
204 int localVerbose = verbose*(myPid==0);
205
206 // Define local block vectors
207 //
208 // MX = Working vectors (storing M*X if M is specified, else pointing to X)
209 // KX = Working vectors (storing K*X)
210 //
211 // R = Residuals
212 //
213 // H = Preconditioned residuals
214 //
215 // P = Search directions
216 // MP = Working vectors (storing M*P if M is specified, else pointing to P)
217 // KP = Working vectors (storing K*P)
218
219 int xr = Q.MyLength();
220 Epetra_MultiVector X(View, Q, numEigen, blockSize);
221 X.Random();
222
223 int tmp;
224 tmp = (M == 0) ? 5*blockSize*xr : 7*blockSize*xr;
225
226 double *work1 = new (nothrow) double[tmp];
227 if (work1 == 0) {
228 if (vectWeight)
229 delete vectWeight;
230 info = -30;
231 return info;
232 }
233 memRequested += sizeof(double)*tmp/(1024.0*1024.0);
234
235 highMem = (highMem > currentSize()) ? highMem : currentSize();
236
237 double *tmpD = work1;
238
239 Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize);
240 tmpD = tmpD + xr*blockSize;
241
242 Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize);
243 tmpD = (M) ? tmpD + xr*blockSize : tmpD;
244
245 Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);
246 tmpD = tmpD + xr*blockSize;
247
248 Epetra_MultiVector H(View, Q.Map(), tmpD, xr, blockSize);
249 tmpD = tmpD + xr*blockSize;
250
251 Epetra_MultiVector P(View, Q.Map(), tmpD, xr, blockSize);
252 tmpD = tmpD + xr*blockSize;
253
254 Epetra_MultiVector KP(View, Q.Map(), tmpD, xr, blockSize);
255 tmpD = tmpD + xr*blockSize;
256
257 Epetra_MultiVector MP(View, Q.Map(), (M) ? tmpD : P.Values(), xr, blockSize);
258
259 // Define arrays
260 //
261 // theta = Store the local eigenvalues (size: 2*blockSize)
262 // normR = Store the norm of residuals (size: blockSize)
263 //
264 // MM = Local mass matrix (size: 2*blockSize x 2*blockSize)
265 // KK = Local stiffness matrix (size: 2*blockSize x 2*blockSize)
266 //
267 // S = Local eigenvectors (size: 2*blockSize x 2*blockSize)
268
269 int lwork2;
270 lwork2 = 3*blockSize + 12*blockSize*blockSize;
271 double *work2 = new (nothrow) double[lwork2];
272 if (work2 == 0) {
273 if (vectWeight)
274 delete vectWeight;
275 delete[] work1;
276 info = -30;
277 return info;
278 }
279
280 highMem = (highMem > currentSize()) ? highMem : currentSize();
281
282 tmpD = work2;
283
284 double *theta = tmpD;
285 tmpD = tmpD + 2*blockSize;
286
287 double *normR = tmpD;
288 tmpD = tmpD + blockSize;
289
290 double *MM = tmpD;
291 tmpD = tmpD + 4*blockSize*blockSize;
292
293 double *KK = tmpD;
294 tmpD = tmpD + 4*blockSize*blockSize;
295
296 double *S = tmpD;
297
298 memRequested += sizeof(double)*lwork2/(1024.0*1024.0);
299
300 // Define an array to store the residuals history
301 if (localVerbose > 2) {
302 resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
303 if (resHistory == 0) {
304 if (vectWeight)
305 delete vectWeight;
306 delete[] work1;
307 delete[] work2;
308 info = -30;
309 return info;
310 }
311 historyCount = 0;
312 }
313
314 // Miscellaneous definitions
315
316 bool reStart = false;
317 numRestart = 0;
318
319 int localSize;
320 int twoBlocks = 2*blockSize;
321 int nFound = blockSize;
322 int i, j;
323
324 if (localVerbose > 0) {
325 cout << endl;
326 cout << " *|* Problem: ";
327 if (M)
328 cout << "K*Q = M*Q D ";
329 else
330 cout << "K*Q = Q D ";
331 if (Prec)
332 cout << " with preconditioner";
333 cout << endl;
334 cout << " *|* Algorithm = BRQMIN" << endl;
335 cout << " *|* Size of blocks = " << blockSize << endl;
336 cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
337 cout.precision(2);
338 cout.setf(ios::scientific, ios::floatfield);
339 cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
340 cout << " *|* Norm used for convergence: ";
341 if (normWeight)
342 cout << "weighted L2-norm with user-provided weights" << endl;
343 else
344 cout << "L^2-norm" << endl;
345 if (startingEV > 0)
346 cout << " *|* Input converged eigenvectors = " << startingEV << endl;
347 cout << "\n -- Start iterations -- \n";
348 }
349
350 timeOuterLoop -= MyWatch.WallTime();
351 for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {
352
353 highMem = (highMem > currentSize()) ? highMem : currentSize();
354
355 if ((outerIter == 1) || (reStart == true)) {
356
357 reStart = false;
358 localSize = blockSize;
359
360 if (nFound > 0) {
361
362 Epetra_MultiVector X2(View, X, blockSize-nFound, nFound);
363 Epetra_MultiVector MX2(View, MX, blockSize-nFound, nFound);
364 Epetra_MultiVector KX2(View, KX, blockSize-nFound, nFound);
365
366 // Apply the mass matrix to X
367 timeMassOp -= MyWatch.WallTime();
368 if (M)
369 M->Apply(X2, MX2);
370 timeMassOp += MyWatch.WallTime();
371 massOp += nFound;
372
373 if (knownEV > 0) {
374 // Orthonormalize X against the known eigenvectors with Gram-Schmidt
375 // Note: Use R as a temporary work space
376 Epetra_MultiVector copyQ(View, Q, 0, knownEV);
377 timeOrtho -= MyWatch.WallTime();
378 info = modalTool.massOrthonormalize(X, MX, M, copyQ, nFound, 0, R.Values());
379 timeOrtho += MyWatch.WallTime();
380 // Exit the code if the orthogonalization did not succeed
381 if (info < 0) {
382 info = -10;
383 delete[] work1;
384 delete[] work2;
385 if (vectWeight)
386 delete vectWeight;
387 return info;
388 }
389 }
390
391 // Apply the stiffness matrix to X
392 timeStifOp -= MyWatch.WallTime();
393 K->Apply(X2, KX2);
394 timeStifOp += MyWatch.WallTime();
395 stifOp += nFound;
396
397 } // if (nFound > 0)
398
399 } // if ((outerIter == 1) || (reStart == true))
400 else {
401
402 // Apply the preconditioner on the residuals
403 if (Prec != 0) {
404 timePrecOp -= MyWatch.WallTime();
405 Prec->ApplyInverse(R, H);
406 timePrecOp += MyWatch.WallTime();
407 precOp += blockSize;
408 }
409 else {
410 memcpy(H.Values(), R.Values(), xr*blockSize*sizeof(double));
411 }
412
413 timeSearchP -= MyWatch.WallTime();
414 // Define the new search direction
415 if (localSize == blockSize) {
416 P.Scale(-1.0, H);
417 localSize = twoBlocks;
418 } // if (localSize == blockSize)
419 else {
420 // Compute the projected stiffness matrix P^T K P
421 // Note: Use S as a workspace
422 // Use KK to store this matrix
423 modalTool.localProjection(blockSize, blockSize, xr, KP.Values(), xr, P.Values(), xr,
424 KK, blockSize, S);
425 // Factor the projected stiffness matrix P^T K P
426 callLAPACK.POTRF('U', blockSize, KK, blockSize, &info);
427 if (info != 0) {
428 if (info < 0) {
429 if (localVerbose > 0) {
430 cerr << " Iteration " << outerIter;
431 cerr << " - DPOTRF has a critical failure (" << info << ")" << endl;
432 }
433 info = - 20;
434 break;
435 }
436 // Restart the computation
437 if (localVerbose > 0) {
438 cout << " Iteration " << outerIter;
439 cout << " - Failure for DPOTRF";
440 cout << " - RESET search directions to residuals\n";
441 }
442 P.Scale(-1.0, H);
443 } // if (info != 0)
444 else {
445 // Compute the projected stiffness matrix P^T K H
446 // Note: Use S as a workspace
447 // Use MM to store this matrix
448 modalTool.localProjection(blockSize, blockSize, xr, KP.Values(), xr, H.Values(), xr,
449 MM, blockSize, S);
450 callLAPACK.POTRS('U', blockSize, blockSize, KK, blockSize, MM, blockSize, &info);
451 if (info < 0) {
452 if (localVerbose > 0) {
453 cerr << " Iteration " << outerIter;
454 cerr << " - DPOTRS has a critical failure (" << info << ")" << endl;
455 }
456 info = -20;
457 break;
458 }
459 // Define the new search directions
460 // Note: Use R as a workspace
461 memcpy(R.Values(), P.Values(), xr*blockSize*sizeof(double));
462 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, MM, blockSize,
463 0.0, P.Values(), xr);
464 callBLAS.AXPY(xr*blockSize, -1.0, H.Values(), P.Values());
465
466 // Check orthogonality of P with previous directions
467 // Note: Use KK and MM as workspaces
468 if (verbose > 2) {
469 callBLAS.GEMM('T', 'N', blockSize, blockSize, xr, 1.0, P.Values(), xr,
470 KP.Values(), xr, 0.0, MM, blockSize);
471 MyComm.SumAll(MM, KK, blockSize*blockSize);
472 if (localVerbose > 0) {
473 double dotMax = 0.0;
474 for (j = 0; j < blockSize*blockSize; ++j) {
475 dotMax = (fabs(KK[j]) > dotMax) ? fabs(KK[j]) : dotMax;
476 }
477 cout << endl;
478 cout << " K-Orthogonality check for search directions = " << dotMax << endl;
479 cout << endl;
480 }
481 } // if (verbose > 2)
482
483 } // if (info != 0)
484
485 } // if (localSize == blockSize)
486 timeSearchP += MyWatch.WallTime();
487
488 // Apply the mass matrix on P
489 timeMassOp -= MyWatch.WallTime();
490 if (M)
491 M->Apply(P, MP);
492 timeMassOp += MyWatch.WallTime();
493 massOp += blockSize;
494
495 if (knownEV > 0) {
496 // Orthogonalize P against the known eigenvectors
497 // Note: Use R as a temporary work space
498 Epetra_MultiVector copyQ(View, Q, 0, knownEV);
499 timeOrtho -= MyWatch.WallTime();
500 modalTool.massOrthonormalize(P, MP, M, copyQ, blockSize, 1, R.Values());
501 timeOrtho += MyWatch.WallTime();
502 }
503
504 // Apply the stiffness matrix to P
505 timeStifOp -= MyWatch.WallTime();
506 K->Apply(P, KP);
507 timeStifOp += MyWatch.WallTime();
508 stifOp += blockSize;
509
510 } // if ((outerIter == 1) || (reStart == true))
511
512 // Form "local" mass and stiffness matrices
513 // Note: Use S as a temporary workspace
514 timeLocalProj -= MyWatch.WallTime();
515 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KX.Values(), xr,
516 KK, localSize, S);
517 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MX.Values(), xr,
518 MM, localSize, S);
519 if (localSize > blockSize) {
520 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KP.Values(), xr,
521 KK + blockSize*localSize, localSize, S);
522 modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, KP.Values(), xr,
523 KK + blockSize*localSize + blockSize, localSize, S);
524 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MP.Values(), xr,
525 MM + blockSize*localSize, localSize, S);
526 modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, MP.Values(), xr,
527 MM + blockSize*localSize + blockSize, localSize, S);
528 } // if (localSize > blockSize)
529 timeLocalProj += MyWatch.WallTime();
530
531 // Perform a spectral decomposition
532 int nevLocal = localSize;
533 timeLocalSolve -= MyWatch.WallTime();
534 info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
535 S, localSize, theta, localVerbose,
536 (blockSize == 1) ? 1 : 0);
537 timeLocalSolve += MyWatch.WallTime();
538
539 if (info < 0) {
540 // Stop when spectral decomposition has a critical failure
541 break;
542 } // if (info < 0)
543
544 // Check for restarting
545 if ((theta[0] < 0.0) || (nevLocal < blockSize)) {
546 if (localVerbose > 0) {
547 cout << " Iteration " << outerIter;
548 cout << "- Failure for spectral decomposition - RESTART with new random search\n";
549 }
550 if (blockSize == 1) {
551 X.Random();
552 nFound = blockSize;
553 }
554 else {
555 Epetra_MultiVector Xinit(View, X, 1, blockSize-1);
556 Xinit.Random();
557 nFound = blockSize - 1;
558 } // if (blockSize == 1)
559 reStart = true;
560 numRestart += 1;
561 info = 0;
562 continue;
563 } // if ((theta[0] < 0.0) || (nevLocal < blockSize))
564
565 if ((localSize == twoBlocks) && (nevLocal == blockSize)) {
566 for (j = 0; j < nevLocal; ++j)
567 memcpy(S + j*blockSize, S + j*twoBlocks, blockSize*sizeof(double));
568 localSize = blockSize;
569 }
570
571 // Check the direction of eigenvectors
572 for (j = 0; j < nevLocal; ++j) {
573 double coeff = S[j + j*localSize];
574 if (coeff < 0.0)
575 callBLAS.SCAL(localSize, -1.0, S + j*localSize);
576 }
577
578 // Compute the residuals
579 timeResidual -= MyWatch.WallTime();
580 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KX.Values(), xr,
581 S, localSize, 0.0, R.Values(), xr);
582 if (localSize == twoBlocks) {
583 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
584 S + blockSize, localSize, 1.0, R.Values(), xr);
585 }
586 for (j = 0; j < blockSize; ++j)
587 callBLAS.SCAL(localSize, theta[j], S + j*localSize);
588 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MX.Values(), xr,
589 S, localSize, 1.0, R.Values(), xr);
590 if (localSize == twoBlocks) {
591 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MP.Values(), xr,
592 S + blockSize, localSize, 1.0, R.Values(), xr);
593 }
594 for (j = 0; j < blockSize; ++j)
595 callBLAS.SCAL(localSize, 1.0/theta[j], S + j*localSize);
596 timeResidual += MyWatch.WallTime();
597
598 // Compute the norms of the residuals
599 timeNorm -= MyWatch.WallTime();
600 if (vectWeight)
601 R.NormWeighted(*vectWeight, normR);
602 else
603 R.Norm2(normR);
604 // Scale the norms of residuals with the eigenvalues
605 // Count the converged eigenvectors
606 nFound = 0;
607 for (j = 0; j < blockSize; ++j) {
608 normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
609 if (normR[j] < tolEigenSolve)
610 nFound += 1;
611 }
612 timeNorm += MyWatch.WallTime();
613
614 // Store the residual history
615 if (localVerbose > 2) {
616 memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double));
617 historyCount += 1;
618 }
619
620 // Print information on current iteration
621 if (localVerbose > 0) {
622 cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
623 cout << knownEV + nFound << endl;
624 }
625
626 if (localVerbose > 1) {
627 cout << endl;
628 cout.precision(2);
629 cout.setf(ios::scientific, ios::floatfield);
630 for (i=0; i<blockSize; ++i) {
631 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
632 cout << " = " << normR[i] << endl;
633 }
634 cout << endl;
635 cout.precision(2);
636 for (i=0; i<blockSize; ++i) {
637 cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
638 cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
639 cout << " = " << theta[i] << endl;
640 }
641 cout << endl;
642 }
643
644 if (nFound == 0) {
645 // Update the spaces
646 // Note: Use H as a temporary work space
647 timeLocalUpdate -= MyWatch.WallTime();
648 memcpy(H.Values(), X.Values(), xr*blockSize*sizeof(double));
649 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
650 0.0, X.Values(), xr);
651 memcpy(H.Values(), KX.Values(), xr*blockSize*sizeof(double));
652 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
653 0.0, KX.Values(), xr);
654 if (M) {
655 memcpy(H.Values(), MX.Values(), xr*blockSize*sizeof(double));
656 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
657 0.0, MX.Values(), xr);
658 }
659 if (localSize == twoBlocks) {
660 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, P.Values(), xr,
661 S + blockSize, localSize, 1.0, X.Values(), xr);
662 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
663 S + blockSize, localSize, 1.0, KX.Values(), xr);
664 if (M) {
665 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, MP.Values(), xr,
666 S + blockSize, localSize, 1.0, MX.Values(), xr);
667 }
668 } // if (localSize == twoBlocks)
669 timeLocalUpdate += MyWatch.WallTime();
670 // When required, monitor some orthogonalities
671 if (verbose > 2) {
672 if (knownEV == 0) {
673 accuracyCheck(&X, &MX, &R, 0, (localSize>blockSize) ? &P : 0);
674 }
675 else {
676 Epetra_MultiVector copyQ(View, Q, 0, knownEV);
677 accuracyCheck(&X, &MX, &R, &copyQ, (localSize>blockSize) ? &P : 0);
678 }
679 } // if (verbose > 2)
680 continue;
681 } // if (nFound == 0)
682
683 // Order the Ritz eigenvectors by putting the converged vectors at the beginning
684 int firstIndex = blockSize;
685 for (j = 0; j < blockSize; ++j) {
686 if (normR[j] >= tolEigenSolve) {
687 firstIndex = j;
688 break;
689 }
690 } // for (j = 0; j < blockSize; ++j)
691 while (firstIndex < nFound) {
692 for (j = firstIndex; j < blockSize; ++j) {
693 if (normR[j] < tolEigenSolve) {
694 // Swap the j-th and firstIndex-th position
695 callFortran.SWAP(localSize, S + j*localSize, 1, S + firstIndex*localSize, 1);
696 callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
697 callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
698 break;
699 }
700 } // for (j = firstIndex; j < blockSize; ++j)
701 for (j = 0; j < blockSize; ++j) {
702 if (normR[j] >= tolEigenSolve) {
703 firstIndex = j;
704 break;
705 }
706 } // for (j = 0; j < blockSize; ++j)
707 } // while (firstIndex < nFound)
708
709 // Copy the converged eigenvalues
710 memcpy(lambda + knownEV, theta, nFound*sizeof(double));
711
712 // Convergence test
713 if (knownEV + nFound >= numEigen) {
714 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
715 S, localSize, 0.0, R.Values(), xr);
716 if (localSize > blockSize) {
717 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
718 S + blockSize, localSize, 1.0, R.Values(), xr);
719 }
720 memcpy(Q.Values() + knownEV*xr, R.Values(), nFound*xr*sizeof(double));
721 knownEV += nFound;
722 if (localVerbose == 1) {
723 cout << endl;
724 cout.precision(2);
725 cout.setf(ios::scientific, ios::floatfield);
726 for (i=0; i<blockSize; ++i) {
727 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
728 cout << " = " << normR[i] << endl;
729 }
730 cout << endl;
731 }
732 break;
733 }
734
735 // Store the converged eigenvalues and eigenvectors
736 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
737 S, localSize, 0.0, Q.Values() + knownEV*xr, xr);
738 if (localSize == twoBlocks) {
739 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
740 S + blockSize, localSize, 1.0, Q.Values() + knownEV*xr, xr);
741 }
742 knownEV += nFound;
743
744 // Define the restarting vectors
745 timeRestart -= MyWatch.WallTime();
746 int leftOver = (nevLocal < blockSize + nFound) ? nevLocal - nFound : blockSize;
747 double *Snew = S + nFound*localSize;
748 memcpy(H.Values(), X.Values(), blockSize*xr*sizeof(double));
749 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
750 Snew, localSize, 0.0, X.Values(), xr);
751 memcpy(H.Values(), KX.Values(), blockSize*xr*sizeof(double));
752 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
753 Snew, localSize, 0.0, KX.Values(), xr);
754 if (M) {
755 memcpy(H.Values(), MX.Values(), blockSize*xr*sizeof(double));
756 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
757 Snew, localSize, 0.0, MX.Values(), xr);
758 }
759 if (localSize == twoBlocks) {
760 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, P.Values(), xr,
761 Snew+blockSize, localSize, 1.0, X.Values(), xr);
762 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, KP.Values(), xr,
763 Snew+blockSize, localSize, 1.0, KX.Values(), xr);
764 if (M) {
765 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, MP.Values(), xr,
766 Snew+blockSize, localSize, 1.0, MX.Values(), xr);
767 }
768 } // if (localSize == twoBlocks)
769 if (nevLocal < blockSize + nFound) {
770 // Put new random vectors at the end of the block
771 Epetra_MultiVector Xtmp(View, X, leftOver, blockSize - leftOver);
772 Xtmp.Random();
773 }
774 else {
775 nFound = 0;
776 } // if (nevLocal < blockSize + nFound)
777 reStart = true;
778 timeRestart += MyWatch.WallTime();
779
780 } // for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter)
781 timeOuterLoop += MyWatch.WallTime();
782 highMem = (highMem > currentSize()) ? highMem : currentSize();
783
784 // Clean memory
785 delete[] work1;
786 delete[] work2;
787 if (vectWeight)
788 delete vectWeight;
789
790 // Sort the eigenpairs
791 timePostProce -= MyWatch.WallTime();
792 if ((info == 0) && (knownEV > 0)) {
793 mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
794 }
795 timePostProce += MyWatch.WallTime();
796
797 return (info == 0) ? knownEV : info;
798
799}
800
801
802void BRQMIN::accuracyCheck(const Epetra_MultiVector *X, const Epetra_MultiVector *MX,
803 const Epetra_MultiVector *R, const Epetra_MultiVector *Q,
804 const Epetra_MultiVector *P) const {
805
806 cout.precision(2);
807 cout.setf(ios::scientific, ios::floatfield);
808 double tmp;
809
810 int myPid = MyComm.MyPID();
811
812 if (X) {
813 if (M) {
814 if (MX) {
815 tmp = myVerify.errorEquality(X, MX, M);
816 if (myPid == 0)
817 cout << " >> Relative difference between MX and M*X = " << tmp << endl;
818 }
819 tmp = myVerify.errorOrthonormality(X, M);
820 if (myPid == 0)
821 cout << " >> Error in X^T M X - I = " << tmp << endl;
822 }
823 else {
824 tmp = myVerify.errorOrthonormality(X, 0);
825 if (myPid == 0)
826 cout << " >> Error in X^T X - I = " << tmp << endl;
827 }
828 }
829
830 if ((R) && (X)) {
831 tmp = myVerify.errorOrthogonality(X, R);
832 if (myPid == 0)
833 cout << " >> Orthogonality X^T R up to " << tmp << endl;
834 }
835
836 if (Q == 0)
837 return;
838
839 if (M) {
840 tmp = myVerify.errorOrthonormality(Q, M);
841 if (myPid == 0)
842 cout << " >> Error in Q^T M Q - I = " << tmp << endl;
843 if (X) {
844 tmp = myVerify.errorOrthogonality(Q, X, M);
845 if (myPid == 0)
846 cout << " >> Orthogonality Q^T M X up to " << tmp << endl;
847 }
848 if (P) {
849 tmp = myVerify.errorOrthogonality(Q, P, M);
850 if (myPid == 0)
851 cout << " >> Orthogonality Q^T M P up to " << tmp << endl;
852 }
853 }
854 else {
855 tmp = myVerify.errorOrthonormality(Q, 0);
856 if (myPid == 0)
857 cout << " >> Error in Q^T Q - I = " << tmp << endl;
858 if (X) {
859 tmp = myVerify.errorOrthogonality(Q, X, 0);
860 if (myPid == 0)
861 cout << " >> Orthogonality Q^T X up to " << tmp << endl;
862 }
863 if (P) {
864 tmp = myVerify.errorOrthogonality(Q, P, 0);
865 if (myPid == 0)
866 cout << " >> Orthogonality Q^T P up to " << tmp << endl;
867 }
868 }
869
870}
871
872
873void BRQMIN::initializeCounters() {
874
875 historyCount = 0;
876 if (resHistory) {
877 delete[] resHistory;
878 resHistory = 0;
879 }
880
881 memRequested = 0.0;
882 highMem = 0.0;
883
884 massOp = 0;
885 numRestart = 0;
886 outerIter = 0;
887 precOp = 0;
888 residual = 0;
889 stifOp = 0;
890
891 timeLocalProj = 0.0;
892 timeLocalSolve = 0.0;
893 timeLocalUpdate = 0.0;
894 timeMassOp = 0.0;
895 timeNorm = 0.0;
896 timeOrtho = 0.0;
897 timeOuterLoop = 0.0;
898 timePostProce = 0.0;
899 timePrecOp = 0.0;
900 timeResidual = 0.0;
901 timeRestart = 0.0;
902 timeSearchP = 0.0;
903 timeStifOp = 0.0;
904
905
906}
907
908
909void BRQMIN::algorithmInfo() const {
910
911 int myPid = MyComm.MyPID();
912
913 if (myPid ==0) {
914 cout << " Algorithm: BRQMIN with Cholesky-based local eigensolver\n";
915 cout << " Block Size: " << blockSize << endl;
916 }
917
918}
919
920
921void BRQMIN::historyInfo() const {
922
923 if (resHistory) {
924 int j;
925 cout << " Block Size: " << blockSize << endl;
926 cout << endl;
927 cout << " Residuals\n";
928 cout << endl;
929 cout.precision(4);
930 cout.setf(ios::scientific, ios::floatfield);
931 for (j = 0; j < historyCount; ++j) {
932 int ii;
933 for (ii = 0; ii < blockSize; ++ii)
934 cout << resHistory[blockSize*j + ii] << endl;
935 }
936 cout << endl;
937 }
938
939}
940
941
942void BRQMIN::memoryInfo() const {
943
944 int myPid = MyComm.MyPID();
945
946 double maxHighMem = 0.0;
947 double tmp = highMem;
948 MyComm.MaxAll(&tmp, &maxHighMem, 1);
949
950 double maxMemRequested = 0.0;
951 tmp = memRequested;
952 MyComm.MaxAll(&tmp, &maxMemRequested, 1);
953
954 if (myPid == 0) {
955 cout.precision(2);
956 cout.setf(ios::fixed, ios::floatfield);
957 cout << " Memory requested per processor by the eigensolver = (EST) ";
958 cout.width(6);
959 cout << maxMemRequested << " MB " << endl;
960 cout << endl;
961 cout << " High water mark in eigensolver = (EST) ";
962 cout.width(6);
963 cout << maxHighMem << " MB " << endl;
964 cout << endl;
965 }
966
967}
968
969
970void BRQMIN::operationInfo() const {
971
972 int myPid = MyComm.MyPID();
973
974 if (myPid == 0) {
975 cout << " --- Operations ---\n\n";
976 cout << " Total number of mass matrix multiplications = ";
977 cout.width(9);
978 cout << massOp + modalTool.getNumProj_MassMult() + modalTool.getNumNorm_MassMult() << endl;
979 cout << " Mass multiplications in eigensolver = ";
980 cout.width(9);
981 cout << massOp << endl;
982 cout << " Mass multiplications for orthogonalization = ";
983 cout.width(9);
984 cout << modalTool.getNumProj_MassMult() << endl;
985 cout << " Mass multiplications for normalization = ";
986 cout.width(9);
987 cout << modalTool.getNumNorm_MassMult() << endl;
988 cout << " Total number of stiffness matrix operations = ";
989 cout.width(9);
990 cout << stifOp << endl;
991 cout << " Total number of preconditioner applications = ";
992 cout.width(9);
993 cout << precOp << endl;
994 cout << " Total number of computed eigen-residuals = ";
995 cout.width(9);
996 cout << residual << endl;
997 cout << "\n";
998 cout << " Total number of outer iterations = ";
999 cout.width(9);
1000 cout << outerIter << endl;
1001 cout << " Number of restarts = ";
1002 cout.width(9);
1003 cout << numRestart << endl;
1004 cout << "\n";
1005 }
1006
1007}
1008
1009
1010void BRQMIN::timeInfo() const {
1011
1012 int myPid = MyComm.MyPID();
1013
1014 if (myPid == 0) {
1015 cout << " --- Timings ---\n\n";
1016 cout.setf(ios::fixed, ios::floatfield);
1017 cout.precision(2);
1018 cout << " Total time for outer loop = ";
1019 cout.width(9);
1020 cout << timeOuterLoop << " s" << endl;
1021 cout << " Time for mass matrix operations = ";
1022 cout.width(9);
1023 cout << timeMassOp << " s ";
1024 cout.width(5);
1025 cout << 100*timeMassOp/timeOuterLoop << " %\n";
1026 cout << " Time for stiffness matrix operations = ";
1027 cout.width(9);
1028 cout << timeStifOp << " s ";
1029 cout.width(5);
1030 cout << 100*timeStifOp/timeOuterLoop << " %\n";
1031 cout << " Time for preconditioner applications = ";
1032 cout.width(9);
1033 cout << timePrecOp << " s ";
1034 cout.width(5);
1035 cout << 100*timePrecOp/timeOuterLoop << " %\n";
1036 cout << " Time for defining search directions = ";
1037 cout.width(9);
1038 cout << timeSearchP << " s ";
1039 cout.width(5);
1040 cout << 100*timeSearchP/timeOuterLoop << " %\n";
1041 cout << " Time for orthogonalizations = ";
1042 cout.width(9);
1043 cout << timeOrtho << " s ";
1044 cout.width(5);
1045 cout << 100*timeOrtho/timeOuterLoop << " %\n";
1046 cout << " Projection step : ";
1047 cout.width(9);
1048 cout << modalTool.getTimeProj() << " s\n";
1049 cout << " Q^T mult. :: ";
1050 cout.width(9);
1051 cout << modalTool.getTimeProj_QtMult() << " s\n";
1052 cout << " Q mult. :: ";
1053 cout.width(9);
1054 cout << modalTool.getTimeProj_QMult() << " s\n";
1055 cout << " Mass mult. :: ";
1056 cout.width(9);
1057 cout << modalTool.getTimeProj_MassMult() << " s\n";
1058 cout << " Normalization step : ";
1059 cout.width(9);
1060 cout << modalTool.getTimeNorm() << " s\n";
1061 cout << " Mass mult. :: ";
1062 cout.width(9);
1063 cout << modalTool.getTimeNorm_MassMult() << " s\n";
1064 cout << " Time for local projection = ";
1065 cout.width(9);
1066 cout << timeLocalProj << " s ";
1067 cout.width(5);
1068 cout << 100*timeLocalProj/timeOuterLoop << " %\n";
1069 cout << " Time for local eigensolve = ";
1070 cout.width(9);
1071 cout << timeLocalSolve << " s ";
1072 cout.width(5);
1073 cout << 100*timeLocalSolve/timeOuterLoop << " %\n";
1074 cout << " Time for local update = ";
1075 cout.width(9);
1076 cout << timeLocalUpdate << " s ";
1077 cout.width(5);
1078 cout << 100*timeLocalUpdate/timeOuterLoop << " %\n";
1079 cout << " Time for residual computations = ";
1080 cout.width(9);
1081 cout << timeResidual << " s ";
1082 cout.width(5);
1083 cout << 100*timeResidual/timeOuterLoop << " %\n";
1084 cout << " Time for residuals norm computations = ";
1085 cout.width(9);
1086 cout << timeNorm << " s ";
1087 cout.width(5);
1088 cout << 100*timeNorm/timeOuterLoop << " %\n";
1089 cout << " Time for restarting space definition = ";
1090 cout.width(9);
1091 cout << timeRestart << " s ";
1092 cout.width(5);
1093 cout << 100*timeRestart/timeOuterLoop << " %\n";
1094 cout << "\n";
1095 cout << " Total time for post-processing = ";
1096 cout.width(9);
1097 cout << timePostProce << " s\n";
1098 cout << endl;
1099 } // if (myPid == 0)
1100
1101}
1102
1103