Anasazi Version of the Day
Loading...
Searching...
No Matches
BlockDACG.cpp
1// @HEADER
2// *****************************************************************************
3// Anasazi: Block Eigensolvers Package
4//
5// Copyright 2004 NTESS and the Anasazi contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10// This software is a result of the research described in the report
11//
12// "A comparison of algorithms for modal analysis in the absence
13// of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
14// Sandia National Laboratories, Technical report SAND2003-1028J.
15//
16// It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
17// framework ( http://trilinos.org/ ).
18
19#include "BlockDACG.h"
20
21
22BlockDACG::BlockDACG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
23 const Epetra_Operator *PP, int _blk,
24 double _tol, int _maxIter, int _verb) :
25 myVerify(_Comm),
26 callBLAS(),
27 callFortran(),
28 modalTool(_Comm),
29 mySort(),
30 MyComm(_Comm),
31 K(KK),
32 M(0),
33 Prec(PP),
34 MyWatch(_Comm),
35 tolEigenSolve(_tol),
36 maxIterEigenSolve(_maxIter),
37 blockSize(_blk),
38 normWeight(0),
39 verbose(_verb),
40 historyCount(0),
41 resHistory(0),
42 memRequested(0.0),
43 highMem(0.0),
44 massOp(0),
45 numRestart(0),
46 outerIter(0),
47 precOp(0),
48 residual(0),
49 stifOp(0),
50 timeLocalProj(0.0),
51 timeLocalSolve(0.0),
52 timeLocalUpdate(0.0),
53 timeMassOp(0.0),
54 timeNorm(0.0),
55 timeOrtho(0.0),
56 timeOuterLoop(0.0),
57 timePostProce(0.0),
58 timePrecOp(0.0),
59 timeResidual(0.0),
60 timeRestart(0.0),
61 timeSearchP(0.0),
62 timeStifOp(0.0)
63 {
64
65}
66
67
68BlockDACG::BlockDACG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
69 const Epetra_Operator *MM, const Epetra_Operator *PP, int _blk,
70 double _tol, int _maxIter, int _verb,
71 double *_weight) :
72 myVerify(_Comm),
73 callBLAS(),
74 callFortran(),
75 modalTool(_Comm),
76 mySort(),
77 MyComm(_Comm),
78 K(KK),
79 M(MM),
80 Prec(PP),
81 MyWatch(_Comm),
82 tolEigenSolve(_tol),
83 maxIterEigenSolve(_maxIter),
84 blockSize(_blk),
85 normWeight(_weight),
86 verbose(_verb),
87 historyCount(0),
88 resHistory(0),
89 memRequested(0.0),
90 highMem(0.0),
91 massOp(0),
92 numRestart(0),
93 outerIter(0),
94 precOp(0),
95 residual(0),
96 stifOp(0),
97 timeLocalProj(0.0),
98 timeLocalSolve(0.0),
99 timeLocalUpdate(0.0),
100 timeMassOp(0.0),
101 timeNorm(0.0),
102 timeOrtho(0.0),
103 timeOuterLoop(0.0),
104 timePostProce(0.0),
105 timePrecOp(0.0),
106 timeResidual(0.0),
107 timeRestart(0.0),
108 timeSearchP(0.0),
109 timeStifOp(0.0)
110 {
111
112}
113
114
115BlockDACG::~BlockDACG() {
116
117 if (resHistory) {
118 delete[] resHistory;
119 resHistory = 0;
120 }
121
122}
123
124
125int BlockDACG::solve(int numEigen, Epetra_MultiVector &Q, double *lambda) {
126
127 return BlockDACG::reSolve(numEigen, Q, lambda);
128}
129
130
131int BlockDACG::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {
132
133 // Computes the smallest eigenvalues and the corresponding eigenvectors
134 // of the generalized eigenvalue problem
135 //
136 // K X = M X Lambda
137 //
138 // using a Block Deflation Accelerated Conjugate Gradient algorithm.
139 //
140 // Note that if M is not specified, then K X = X Lambda is solved.
141 //
142 // Ref: P. Arbenz & R. Lehoucq, "A comparison of algorithms for modal analysis in the
143 // absence of a sparse direct method", SNL, Technical Report SAND2003-1028J
144 // With the notations of this report, the coefficient beta is defined as
145 // diag( H^T_{k} G_{k} ) / diag( H^T_{k-1} G_{k-1} )
146 //
147 // Input variables:
148 //
149 // numEigen (integer) = Number of eigenmodes requested
150 //
151 // Q (Epetra_MultiVector) = Converged eigenvectors
152 // The number of columns of Q must be equal to numEigen + blockSize.
153 // The rows of Q are distributed across processors.
154 // At exit, the first numEigen columns contain the eigenvectors requested.
155 //
156 // lambda (array of doubles) = Converged eigenvalues
157 // At input, it must be of size numEigen + blockSize.
158 // At exit, the first numEigen locations contain the eigenvalues requested.
159 //
160 // startingEV (integer) = Number of existing converged eigenmodes
161 //
162 // Return information on status of computation
163 //
164 // info >= 0 >> Number of converged eigenpairs at the end of computation
165 //
166 // // Failure due to input arguments
167 //
168 // info = - 1 >> The stiffness matrix K has not been specified.
169 // info = - 2 >> The maps for the matrix K and the matrix M differ.
170 // info = - 3 >> The maps for the matrix K and the preconditioner P differ.
171 // info = - 4 >> The maps for the vectors and the matrix K differ.
172 // info = - 5 >> Q is too small for the number of eigenvalues requested.
173 // info = - 6 >> Q is too small for the computation parameters.
174 //
175 // info = - 10 >> Failure during the mass orthonormalization
176 //
177 // info = - 20 >> Error in LAPACK during the local eigensolve
178 //
179 // info = - 30 >> MEMORY
180 //
181
182 // Check the input parameters
183
184 if (numEigen <= startingEV) {
185 return startingEV;
186 }
187
188 int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, numEigen + blockSize);
189 if (info < 0)
190 return info;
191
192 int myPid = MyComm.MyPID();
193
194 // Get the weight for approximating the M-inverse norm
195 Epetra_Vector *vectWeight = 0;
196 if (normWeight) {
197 vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
198 }
199
200 int knownEV = startingEV;
201 int localVerbose = verbose*(myPid==0);
202
203 // Define local block vectors
204 //
205 // MX = Working vectors (storing M*X if M is specified, else pointing to X)
206 // KX = Working vectors (storing K*X)
207 //
208 // R = Residuals
209 //
210 // H = Preconditioned residuals
211 //
212 // P = Search directions
213 // MP = Working vectors (storing M*P if M is specified, else pointing to P)
214 // KP = Working vectors (storing K*P)
215
216 int xr = Q.MyLength();
217 Epetra_MultiVector X(View, Q, numEigen, blockSize);
218 X.Random();
219
220 int tmp;
221 tmp = (M == 0) ? 5*blockSize*xr : 7*blockSize*xr;
222
223 double *work1 = new (nothrow) double[tmp];
224 if (work1 == 0) {
225 if (vectWeight)
226 delete vectWeight;
227 info = -30;
228 return info;
229 }
230 memRequested += sizeof(double)*tmp/(1024.0*1024.0);
231
232 highMem = (highMem > currentSize()) ? highMem : currentSize();
233
234 double *tmpD = work1;
235
236 Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize);
237 tmpD = tmpD + xr*blockSize;
238
239 Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize);
240 tmpD = (M) ? tmpD + xr*blockSize : tmpD;
241
242 Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);
243 tmpD = tmpD + xr*blockSize;
244
245 Epetra_MultiVector H(View, Q.Map(), tmpD, xr, blockSize);
246 tmpD = tmpD + xr*blockSize;
247
248 Epetra_MultiVector P(View, Q.Map(), tmpD, xr, blockSize);
249 tmpD = tmpD + xr*blockSize;
250
251 Epetra_MultiVector KP(View, Q.Map(), tmpD, xr, blockSize);
252 tmpD = tmpD + xr*blockSize;
253
254 Epetra_MultiVector MP(View, Q.Map(), (M) ? tmpD : P.Values(), xr, blockSize);
255
256 // Define arrays
257 //
258 // theta = Store the local eigenvalues (size: 2*blockSize)
259 // normR = Store the norm of residuals (size: blockSize)
260 //
261 // oldHtR = Store the previous H_i^T*R_i (size: blockSize)
262 // currentHtR = Store the current H_i^T*R_i (size: blockSize)
263 //
264 // MM = Local mass matrix (size: 2*blockSize x 2*blockSize)
265 // KK = Local stiffness matrix (size: 2*blockSize x 2*blockSize)
266 //
267 // S = Local eigenvectors (size: 2*blockSize x 2*blockSize)
268
269 int lwork2;
270 lwork2 = 5*blockSize + 12*blockSize*blockSize;
271 double *work2 = new (nothrow) double[lwork2];
272 if (work2 == 0) {
273 if (vectWeight)
274 delete vectWeight;
275 delete[] work1;
276 info = -30;
277 return info;
278 }
279
280 highMem = (highMem > currentSize()) ? highMem : currentSize();
281
282 tmpD = work2;
283
284 double *theta = tmpD;
285 tmpD = tmpD + 2*blockSize;
286
287 double *normR = tmpD;
288 tmpD = tmpD + blockSize;
289
290 double *oldHtR = tmpD;
291 tmpD = tmpD + blockSize;
292
293 double *currentHtR = tmpD;
294 tmpD = tmpD + blockSize;
295 memset(currentHtR, 0, blockSize*sizeof(double));
296
297 double *MM = tmpD;
298 tmpD = tmpD + 4*blockSize*blockSize;
299
300 double *KK = tmpD;
301 tmpD = tmpD + 4*blockSize*blockSize;
302
303 double *S = tmpD;
304
305 memRequested += sizeof(double)*lwork2/(1024.0*1024.0);
306
307 // Define an array to store the residuals history
308 if (localVerbose > 2) {
309 resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
310 if (resHistory == 0) {
311 if (vectWeight)
312 delete vectWeight;
313 delete[] work1;
314 delete[] work2;
315 info = -30;
316 return info;
317 }
318 historyCount = 0;
319 }
320
321 // Miscellaneous definitions
322
323 bool reStart = false;
324 numRestart = 0;
325
326 int localSize;
327 int twoBlocks = 2*blockSize;
328 int nFound = blockSize;
329 int i, j;
330
331 if (localVerbose > 0) {
332 cout << endl;
333 cout << " *|* Problem: ";
334 if (M)
335 cout << "K*Q = M*Q D ";
336 else
337 cout << "K*Q = Q D ";
338 if (Prec)
339 cout << " with preconditioner";
340 cout << endl;
341 cout << " *|* Algorithm = DACG (block version)" << endl;
342 cout << " *|* Size of blocks = " << blockSize << endl;
343 cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
344 cout.precision(2);
345 cout.setf(ios::scientific, ios::floatfield);
346 cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
347 cout << " *|* Norm used for convergence: ";
348 if (normWeight)
349 cout << "weighted L2-norm with user-provided weights" << endl;
350 else
351 cout << "L^2-norm" << endl;
352 if (startingEV > 0)
353 cout << " *|* Input converged eigenvectors = " << startingEV << endl;
354 cout << "\n -- Start iterations -- \n";
355 }
356
357 timeOuterLoop -= MyWatch.WallTime();
358 for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {
359
360 highMem = (highMem > currentSize()) ? highMem : currentSize();
361
362 if ((outerIter == 1) || (reStart == true)) {
363
364 reStart = false;
365 localSize = blockSize;
366
367 if (nFound > 0) {
368
369 Epetra_MultiVector X2(View, X, blockSize-nFound, nFound);
370 Epetra_MultiVector MX2(View, MX, blockSize-nFound, nFound);
371 Epetra_MultiVector KX2(View, KX, blockSize-nFound, nFound);
372
373 // Apply the mass matrix to X
374 timeMassOp -= MyWatch.WallTime();
375 if (M)
376 M->Apply(X2, MX2);
377 timeMassOp += MyWatch.WallTime();
378 massOp += nFound;
379
380 if (knownEV > 0) {
381 // Orthonormalize X against the known eigenvectors with Gram-Schmidt
382 // Note: Use R as a temporary work space
383 Epetra_MultiVector copyQ(View, Q, 0, knownEV);
384 timeOrtho -= MyWatch.WallTime();
385 info = modalTool.massOrthonormalize(X, MX, M, copyQ, nFound, 0, R.Values());
386 timeOrtho += MyWatch.WallTime();
387 // Exit the code if the orthogonalization did not succeed
388 if (info < 0) {
389 info = -10;
390 delete[] work1;
391 delete[] work2;
392 if (vectWeight)
393 delete vectWeight;
394 return info;
395 }
396 }
397
398 // Apply the stiffness matrix to X
399 timeStifOp -= MyWatch.WallTime();
400 K->Apply(X2, KX2);
401 timeStifOp += MyWatch.WallTime();
402 stifOp += nFound;
403
404 } // if (nFound > 0)
405
406 } // if ((outerIter == 1) || (reStart == true))
407 else {
408
409 // Apply the preconditioner on the residuals
410 if (Prec != 0) {
411 timePrecOp -= MyWatch.WallTime();
412 Prec->ApplyInverse(R, H);
413 timePrecOp += MyWatch.WallTime();
414 precOp += blockSize;
415 }
416 else {
417 memcpy(H.Values(), R.Values(), xr*blockSize*sizeof(double));
418 }
419
420 // Compute the product H^T*R
421 timeSearchP -= MyWatch.WallTime();
422 memcpy(oldHtR, currentHtR, blockSize*sizeof(double));
423 H.Dot(R, currentHtR);
424 // Define the new search directions
425 if (localSize == blockSize) {
426 P.Scale(-1.0, H);
427 localSize = twoBlocks;
428 } // if (localSize == blockSize)
429 else {
430 bool hasZeroDot = false;
431 for (j = 0; j < blockSize; ++j) {
432 if (oldHtR[j] == 0.0) {
433 hasZeroDot = true;
434 break;
435 }
436 callBLAS.SCAL(xr, currentHtR[j]/oldHtR[j], P.Values() + j*xr);
437 }
438 if (hasZeroDot == true) {
439 // Restart the computation when there is a null dot product
440 if (localVerbose > 0) {
441 cout << endl;
442 cout << " !! Null dot product -- Restart the search space !!\n";
443 cout << endl;
444 }
445 if (blockSize == 1) {
446 X.Random();
447 nFound = blockSize;
448 }
449 else {
450 Epetra_MultiVector Xinit(View, X, j, blockSize-j);
451 Xinit.Random();
452 nFound = blockSize - j;
453 } // if (blockSize == 1)
454 reStart = true;
455 numRestart += 1;
456 info = 0;
457 continue;
458 }
459 callBLAS.AXPY(xr*blockSize, -1.0, H.Values(), P.Values());
460 } // if (localSize == blockSize)
461 timeSearchP += MyWatch.WallTime();
462
463 // Apply the mass matrix on P
464 timeMassOp -= MyWatch.WallTime();
465 if (M)
466 M->Apply(P, MP);
467 timeMassOp += MyWatch.WallTime();
468 massOp += blockSize;
469
470 if (knownEV > 0) {
471 // Orthogonalize P against the known eigenvectors
472 // Note: Use R as a temporary work space
473 Epetra_MultiVector copyQ(View, Q, 0, knownEV);
474 timeOrtho -= MyWatch.WallTime();
475 modalTool.massOrthonormalize(P, MP, M, copyQ, blockSize, 1, R.Values());
476 timeOrtho += MyWatch.WallTime();
477 }
478
479 // Apply the stiffness matrix to P
480 timeStifOp -= MyWatch.WallTime();
481 K->Apply(P, KP);
482 timeStifOp += MyWatch.WallTime();
483 stifOp += blockSize;
484
485 } // if ((outerIter == 1) || (reStart == true))
486
487 // Form "local" mass and stiffness matrices
488 // Note: Use S as a temporary workspace
489 timeLocalProj -= MyWatch.WallTime();
490 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KX.Values(), xr,
491 KK, localSize, S);
492 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MX.Values(), xr,
493 MM, localSize, S);
494 if (localSize > blockSize) {
495 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KP.Values(), xr,
496 KK + blockSize*localSize, localSize, S);
497 modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, KP.Values(), xr,
498 KK + blockSize*localSize + blockSize, localSize, S);
499 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MP.Values(), xr,
500 MM + blockSize*localSize, localSize, S);
501 modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, MP.Values(), xr,
502 MM + blockSize*localSize + blockSize, localSize, S);
503 } // if (localSize > blockSize)
504 timeLocalProj += MyWatch.WallTime();
505
506 // Perform a spectral decomposition
507 timeLocalSolve -= MyWatch.WallTime();
508 int nevLocal = localSize;
509 info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
510 S, localSize, theta, localVerbose,
511 (blockSize == 1) ? 1: 0);
512 timeLocalSolve += MyWatch.WallTime();
513
514 if (info < 0) {
515 // Stop when spectral decomposition has a critical failure
516 break;
517 }
518
519 // Check for restarting
520 if ((theta[0] < 0.0) || (nevLocal < blockSize)) {
521 if (localVerbose > 0) {
522 cout << " Iteration " << outerIter;
523 cout << "- Failure for spectral decomposition - RESTART with new random search\n";
524 }
525 if (blockSize == 1) {
526 X.Random();
527 nFound = blockSize;
528 }
529 else {
530 Epetra_MultiVector Xinit(View, X, 1, blockSize-1);
531 Xinit.Random();
532 nFound = blockSize - 1;
533 } // if (blockSize == 1)
534 reStart = true;
535 numRestart += 1;
536 info = 0;
537 continue;
538 } // if ((theta[0] < 0.0) || (nevLocal < blockSize))
539
540 if ((localSize == twoBlocks) && (nevLocal == blockSize)) {
541 for (j = 0; j < nevLocal; ++j)
542 memcpy(S + j*blockSize, S + j*twoBlocks, blockSize*sizeof(double));
543 localSize = blockSize;
544 }
545
546 // Check the direction of eigenvectors
547 // Note: This sign check is important for convergence
548 for (j = 0; j < nevLocal; ++j) {
549 double coeff = S[j + j*localSize];
550 if (coeff < 0.0)
551 callBLAS.SCAL(localSize, -1.0, S + j*localSize);
552 }
553
554 // Compute the residuals
555 timeResidual -= MyWatch.WallTime();
556 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KX.Values(), xr,
557 S, localSize, 0.0, R.Values(), xr);
558 if (localSize == twoBlocks) {
559 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
560 S + blockSize, localSize, 1.0, R.Values(), xr);
561 }
562 for (j = 0; j < blockSize; ++j)
563 callBLAS.SCAL(localSize, theta[j], S + j*localSize);
564 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MX.Values(), xr,
565 S, localSize, 1.0, R.Values(), xr);
566 if (localSize == twoBlocks) {
567 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MP.Values(), xr,
568 S + blockSize, localSize, 1.0, R.Values(), xr);
569 }
570 for (j = 0; j < blockSize; ++j)
571 callBLAS.SCAL(localSize, 1.0/theta[j], S + j*localSize);
572 timeResidual += MyWatch.WallTime();
573
574 // Compute the norms of the residuals
575 timeNorm -= MyWatch.WallTime();
576 if (vectWeight)
577 R.NormWeighted(*vectWeight, normR);
578 else
579 R.Norm2(normR);
580 // Scale the norms of residuals with the eigenvalues
581 // Count the converged eigenvectors
582 nFound = 0;
583 for (j = 0; j < blockSize; ++j) {
584 normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
585 if (normR[j] < tolEigenSolve)
586 nFound += 1;
587 }
588 timeNorm += MyWatch.WallTime();
589
590 // Store the residual history
591 if (localVerbose > 2) {
592 memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double));
593 historyCount += 1;
594 }
595
596 // Print information on current iteration
597 if (localVerbose > 0) {
598 cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
599 cout << knownEV + nFound << endl;
600 }
601
602 if (localVerbose > 1) {
603 cout << endl;
604 cout.precision(2);
605 cout.setf(ios::scientific, ios::floatfield);
606 for (i=0; i<blockSize; ++i) {
607 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
608 cout << " = " << normR[i] << endl;
609 }
610 cout << endl;
611 cout.precision(2);
612 for (i=0; i<blockSize; ++i) {
613 cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
614 cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
615 cout << " = " << theta[i] << endl;
616 }
617 cout << endl;
618 }
619
620 if (nFound == 0) {
621 // Update the spaces
622 // Note: Use H as a temporary work space
623 timeLocalUpdate -= MyWatch.WallTime();
624 memcpy(H.Values(), X.Values(), xr*blockSize*sizeof(double));
625 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
626 0.0, X.Values(), xr);
627 memcpy(H.Values(), KX.Values(), xr*blockSize*sizeof(double));
628 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
629 0.0, KX.Values(), xr);
630 if (M) {
631 memcpy(H.Values(), MX.Values(), xr*blockSize*sizeof(double));
632 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
633 0.0, MX.Values(), xr);
634 }
635 if (localSize == twoBlocks) {
636 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, P.Values(), xr,
637 S + blockSize, localSize, 1.0, X.Values(), xr);
638 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
639 S + blockSize, localSize, 1.0, KX.Values(), xr);
640 if (M) {
641 callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, MP.Values(), xr,
642 S + blockSize, localSize, 1.0, MX.Values(), xr);
643 }
644 } // if (localSize == twoBlocks)
645 timeLocalUpdate += MyWatch.WallTime();
646 // When required, monitor some orthogonalities
647 if (verbose > 2) {
648 if (knownEV == 0) {
649 accuracyCheck(&X, &MX, &R, 0, (localSize>blockSize) ? &P : 0);
650 }
651 else {
652 Epetra_MultiVector copyQ(View, Q, 0, knownEV);
653 accuracyCheck(&X, &MX, &R, &copyQ, (localSize>blockSize) ? &P : 0);
654 }
655 } // if (verbose > 2)
656 continue;
657 } // if (nFound == 0)
658
659 // Order the Ritz eigenvectors by putting the converged vectors at the beginning
660 int firstIndex = blockSize;
661 for (j = 0; j < blockSize; ++j) {
662 if (normR[j] >= tolEigenSolve) {
663 firstIndex = j;
664 break;
665 }
666 } // for (j = 0; j < blockSize; ++j)
667 while (firstIndex < nFound) {
668 for (j = firstIndex; j < blockSize; ++j) {
669 if (normR[j] < tolEigenSolve) {
670 // Swap the j-th and firstIndex-th position
671 callFortran.SWAP(localSize, S + j*localSize, 1, S + firstIndex*localSize, 1);
672 callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
673 callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
674 break;
675 }
676 } // for (j = firstIndex; j < blockSize; ++j)
677 for (j = 0; j < blockSize; ++j) {
678 if (normR[j] >= tolEigenSolve) {
679 firstIndex = j;
680 break;
681 }
682 } // for (j = 0; j < blockSize; ++j)
683 } // while (firstIndex < nFound)
684
685 // Copy the converged eigenvalues
686 memcpy(lambda + knownEV, theta, nFound*sizeof(double));
687
688 // Convergence test
689 if (knownEV + nFound >= numEigen) {
690 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
691 S, localSize, 0.0, R.Values(), xr);
692 if (localSize > blockSize) {
693 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
694 S + blockSize, localSize, 1.0, R.Values(), xr);
695 }
696 memcpy(Q.Values() + knownEV*xr, R.Values(), nFound*xr*sizeof(double));
697 knownEV += nFound;
698 if (localVerbose == 1) {
699 cout << endl;
700 cout.precision(2);
701 cout.setf(ios::scientific, ios::floatfield);
702 for (i=0; i<blockSize; ++i) {
703 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
704 cout << " = " << normR[i] << endl;
705 }
706 cout << endl;
707 }
708 break;
709 }
710
711 // Store the converged eigenvalues and eigenvectors
712 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
713 S, localSize, 0.0, Q.Values() + knownEV*xr, xr);
714 if (localSize == twoBlocks) {
715 callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
716 S + blockSize, localSize, 1.0, Q.Values() + knownEV*xr, xr);
717 }
718 knownEV += nFound;
719
720 // Define the restarting vectors
721 timeRestart -= MyWatch.WallTime();
722 int leftOver = (nevLocal < blockSize + nFound) ? nevLocal - nFound : blockSize;
723 double *Snew = S + nFound*localSize;
724 memcpy(H.Values(), X.Values(), blockSize*xr*sizeof(double));
725 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
726 Snew, localSize, 0.0, X.Values(), xr);
727 memcpy(H.Values(), KX.Values(), blockSize*xr*sizeof(double));
728 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
729 Snew, localSize, 0.0, KX.Values(), xr);
730 if (M) {
731 memcpy(H.Values(), MX.Values(), blockSize*xr*sizeof(double));
732 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
733 Snew, localSize, 0.0, MX.Values(), xr);
734 }
735 if (localSize == twoBlocks) {
736 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, P.Values(), xr,
737 Snew+blockSize, localSize, 1.0, X.Values(), xr);
738 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, KP.Values(), xr,
739 Snew+blockSize, localSize, 1.0, KX.Values(), xr);
740 if (M) {
741 callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, MP.Values(), xr,
742 Snew+blockSize, localSize, 1.0, MX.Values(), xr);
743 }
744 } // if (localSize == twoBlocks)
745 if (nevLocal < blockSize + nFound) {
746 // Put new random vectors at the end of the block
747 Epetra_MultiVector Xtmp(View, X, leftOver, blockSize - leftOver);
748 Xtmp.Random();
749 }
750 else {
751 nFound = 0;
752 } // if (nevLocal < blockSize + nFound)
753 reStart = true;
754 timeRestart += MyWatch.WallTime();
755
756 } // for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter)
757 timeOuterLoop += MyWatch.WallTime();
758 highMem = (highMem > currentSize()) ? highMem : currentSize();
759
760 // Clean memory
761 delete[] work1;
762 delete[] work2;
763 if (vectWeight)
764 delete vectWeight;
765
766 // Sort the eigenpairs
767 timePostProce -= MyWatch.WallTime();
768 if ((info == 0) && (knownEV > 0)) {
769 mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
770 }
771 timePostProce += MyWatch.WallTime();
772
773 return (info == 0) ? knownEV : info;
774
775}
776
777
778void BlockDACG::accuracyCheck(const Epetra_MultiVector *X, const Epetra_MultiVector *MX,
779 const Epetra_MultiVector *R, const Epetra_MultiVector *Q,
780 const Epetra_MultiVector *P) const {
781
782 cout.precision(2);
783 cout.setf(ios::scientific, ios::floatfield);
784 double tmp;
785
786 int myPid = MyComm.MyPID();
787
788 if (X) {
789 if (M) {
790 if (MX) {
791 tmp = myVerify.errorEquality(X, MX, M);
792 if (myPid == 0)
793 cout << " >> Difference between MX and M*X = " << tmp << endl;
794 }
795 tmp = myVerify.errorOrthonormality(X, M);
796 if (myPid == 0)
797 cout << " >> Error in X^T M X - I = " << tmp << endl;
798 }
799 else {
800 tmp = myVerify.errorOrthonormality(X, 0);
801 if (myPid == 0)
802 cout << " >> Error in X^T X - I = " << tmp << endl;
803 }
804 }
805
806 if ((R) && (X)) {
807 tmp = myVerify.errorOrthogonality(X, R);
808 if (myPid == 0)
809 cout << " >> Orthogonality X^T R up to " << tmp << endl;
810 }
811
812 if (Q == 0)
813 return;
814
815 if (M) {
816 tmp = myVerify.errorOrthonormality(Q, M);
817 if (myPid == 0)
818 cout << " >> Error in Q^T M Q - I = " << tmp << endl;
819 if (X) {
820 tmp = myVerify.errorOrthogonality(Q, X, M);
821 if (myPid == 0)
822 cout << " >> Orthogonality Q^T M X up to " << tmp << endl;
823 }
824 if (P) {
825 tmp = myVerify.errorOrthogonality(Q, P, M);
826 if (myPid == 0)
827 cout << " >> Orthogonality Q^T M P up to " << tmp << endl;
828 }
829 }
830 else {
831 tmp = myVerify.errorOrthonormality(Q, 0);
832 if (myPid == 0)
833 cout << " >> Error in Q^T Q - I = " << tmp << endl;
834 if (X) {
835 tmp = myVerify.errorOrthogonality(Q, X, 0);
836 if (myPid == 0)
837 cout << " >> Orthogonality Q^T X up to " << tmp << endl;
838 }
839 if (P) {
840 tmp = myVerify.errorOrthogonality(Q, P, 0);
841 if (myPid == 0)
842 cout << " >> Orthogonality Q^T P up to " << tmp << endl;
843 }
844 }
845
846}
847
848
849void BlockDACG::initializeCounters() {
850
851 historyCount = 0;
852 if (resHistory) {
853 delete[] resHistory;
854 resHistory = 0;
855 }
856
857 memRequested = 0.0;
858 highMem = 0.0;
859
860 massOp = 0;
861 numRestart = 0;
862 outerIter = 0;
863 precOp = 0;
864 residual = 0;
865 stifOp = 0;
866
867 timeLocalProj = 0.0;
868 timeLocalSolve = 0.0;
869 timeLocalUpdate = 0.0;
870 timeMassOp = 0.0;
871 timeNorm = 0.0;
872 timeOrtho = 0.0;
873 timeOuterLoop = 0.0;
874 timePostProce = 0.0;
875 timePrecOp = 0.0;
876 timeResidual = 0.0;
877 timeRestart = 0.0;
878 timeSearchP = 0.0;
879 timeStifOp = 0.0;
880
881
882}
883
884
885void BlockDACG::algorithmInfo() const {
886
887 int myPid = MyComm.MyPID();
888
889 if (myPid ==0) {
890 cout << " Algorithm: DACG (block version) with Cholesky-based local eigensolver\n";
891 cout << " Block Size: " << blockSize << endl;
892 }
893
894}
895
896
897void BlockDACG::historyInfo() const {
898
899 if (resHistory) {
900 int j;
901 cout << " Block Size: " << blockSize << endl;
902 cout << endl;
903 cout << " Residuals\n";
904 cout << endl;
905 cout.precision(4);
906 cout.setf(ios::scientific, ios::floatfield);
907 for (j = 0; j < historyCount; ++j) {
908 int ii;
909 for (ii = 0; ii < blockSize; ++ii)
910 cout << resHistory[blockSize*j + ii] << endl;
911 }
912 cout << endl;
913 }
914
915}
916
917
918void BlockDACG::memoryInfo() const {
919
920 int myPid = MyComm.MyPID();
921
922 double maxHighMem = 0.0;
923 double tmp = highMem;
924 MyComm.MaxAll(&tmp, &maxHighMem, 1);
925
926 double maxMemRequested = 0.0;
927 tmp = memRequested;
928 MyComm.MaxAll(&tmp, &maxMemRequested, 1);
929
930 if (myPid == 0) {
931 cout.precision(2);
932 cout.setf(ios::fixed, ios::floatfield);
933 cout << " Memory requested per processor by the eigensolver = (EST) ";
934 cout.width(6);
935 cout << maxMemRequested << " MB " << endl;
936 cout << endl;
937 cout << " High water mark in eigensolver = (EST) ";
938 cout.width(6);
939 cout << maxHighMem << " MB " << endl;
940 cout << endl;
941 }
942
943}
944
945
946void BlockDACG::operationInfo() const {
947
948 int myPid = MyComm.MyPID();
949
950 if (myPid == 0) {
951 cout << " --- Operations ---\n\n";
952 cout << " Total number of mass matrix multiplications = ";
953 cout.width(9);
954 cout << massOp + modalTool.getNumProj_MassMult() + modalTool.getNumNorm_MassMult() << endl;
955 cout << " Mass multiplications in eigensolver = ";
956 cout.width(9);
957 cout << massOp << endl;
958 cout << " Mass multiplications for orthogonalization = ";
959 cout.width(9);
960 cout << modalTool.getNumProj_MassMult() << endl;
961 cout << " Mass multiplications for normalization = ";
962 cout.width(9);
963 cout << modalTool.getNumNorm_MassMult() << endl;
964 cout << " Total number of stiffness matrix operations = ";
965 cout.width(9);
966 cout << stifOp << endl;
967 cout << " Total number of preconditioner applications = ";
968 cout.width(9);
969 cout << precOp << endl;
970 cout << " Total number of computed eigen-residuals = ";
971 cout.width(9);
972 cout << residual << endl;
973 cout << "\n";
974 cout << " Total number of outer iterations = ";
975 cout.width(9);
976 cout << outerIter << endl;
977 cout << " Number of restarts = ";
978 cout.width(9);
979 cout << numRestart << endl;
980 cout << "\n";
981 }
982
983}
984
985
986void BlockDACG::timeInfo() const {
987
988 int myPid = MyComm.MyPID();
989
990 if (myPid == 0) {
991 cout << " --- Timings ---\n\n";
992 cout.setf(ios::fixed, ios::floatfield);
993 cout.precision(2);
994 cout << " Total time for outer loop = ";
995 cout.width(9);
996 cout << timeOuterLoop << " s" << endl;
997 cout << " Time for mass matrix operations = ";
998 cout.width(9);
999 cout << timeMassOp << " s ";
1000 cout.width(5);
1001 cout << 100*timeMassOp/timeOuterLoop << " %\n";
1002 cout << " Time for stiffness matrix operations = ";
1003 cout.width(9);
1004 cout << timeStifOp << " s ";
1005 cout.width(5);
1006 cout << 100*timeStifOp/timeOuterLoop << " %\n";
1007 cout << " Time for preconditioner applications = ";
1008 cout.width(9);
1009 cout << timePrecOp << " s ";
1010 cout.width(5);
1011 cout << 100*timePrecOp/timeOuterLoop << " %\n";
1012 cout << " Time for defining search directions = ";
1013 cout.width(9);
1014 cout << timeSearchP << " s ";
1015 cout.width(5);
1016 cout << 100*timeSearchP/timeOuterLoop << " %\n";
1017 cout << " Time for orthogonalizations = ";
1018 cout.width(9);
1019 cout << timeOrtho << " s ";
1020 cout.width(5);
1021 cout << 100*timeOrtho/timeOuterLoop << " %\n";
1022 cout << " Projection step : ";
1023 cout.width(9);
1024 cout << modalTool.getTimeProj() << " s\n";
1025 cout << " Q^T mult. :: ";
1026 cout.width(9);
1027 cout << modalTool.getTimeProj_QtMult() << " s\n";
1028 cout << " Q mult. :: ";
1029 cout.width(9);
1030 cout << modalTool.getTimeProj_QMult() << " s\n";
1031 cout << " Mass mult. :: ";
1032 cout.width(9);
1033 cout << modalTool.getTimeProj_MassMult() << " s\n";
1034 cout << " Normalization step : ";
1035 cout.width(9);
1036 cout << modalTool.getTimeNorm() << " s\n";
1037 cout << " Mass mult. :: ";
1038 cout.width(9);
1039 cout << modalTool.getTimeNorm_MassMult() << " s\n";
1040 cout << " Time for local projection = ";
1041 cout.width(9);
1042 cout << timeLocalProj << " s ";
1043 cout.width(5);
1044 cout << 100*timeLocalProj/timeOuterLoop << " %\n";
1045 cout << " Time for local eigensolve = ";
1046 cout.width(9);
1047 cout << timeLocalSolve << " s ";
1048 cout.width(5);
1049 cout << 100*timeLocalSolve/timeOuterLoop << " %\n";
1050 cout << " Time for local update = ";
1051 cout.width(9);
1052 cout << timeLocalUpdate << " s ";
1053 cout.width(5);
1054 cout << 100*timeLocalUpdate/timeOuterLoop << " %\n";
1055 cout << " Time for residual computations = ";
1056 cout.width(9);
1057 cout << timeResidual << " s ";
1058 cout.width(5);
1059 cout << 100*timeResidual/timeOuterLoop << " %\n";
1060 cout << " Time for residuals norm computations = ";
1061 cout.width(9);
1062 cout << timeNorm << " s ";
1063 cout.width(5);
1064 cout << 100*timeNorm/timeOuterLoop << " %\n";
1065 cout << " Time for restarting space definition = ";
1066 cout.width(9);
1067 cout << timeRestart << " s ";
1068 cout.width(5);
1069 cout << 100*timeRestart/timeOuterLoop << " %\n";
1070 cout << "\n";
1071 cout << " Total time for post-processing = ";
1072 cout.width(9);
1073 cout << timePostProce << " s\n";
1074 cout << endl;
1075 } // if (myPid == 0)
1076
1077}
1078
1079