Anasazi Version of the Day
Loading...
Searching...
No Matches
Davidson.cpp
1// @HEADER
2// *****************************************************************************
3// Anasazi: Block Eigensolvers Package
4//
5// Copyright 2004 NTESS and the Anasazi contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10//**************************************************************************
11//
12// NOTICE
13//
14// This software is a result of the research described in the cout
15//
16// " A comparison of algorithms for modal analysis in the absence
17// of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
18// Sandia National Laboratories, Technical cout SAND2003-1028J.
19//
20// It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
21// framework ( http://software.sandia.gov/trilinos/ ).
22//
23// The distribution of this software follows also the rules defined in Trilinos.
24// This notice shall be marked on any reproduction of this software, in whole or
25// in part.
26//
27// This program is distributed in the hope that it will be useful, but
28// WITHOUT ANY WARRANTY; without even the implied warranty of
29// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
30//
31// Code Authors: U. Hetmaniuk (ulhetma@sandia.gov), R. Lehoucq (rblehou@sandia.gov)
32//
33//**************************************************************************
34
35#include "Davidson.h"
36
37
38Davidson::Davidson(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
39 const Epetra_Operator *PP, int _blk, int _numBlk,
40 double _tol, int _maxIter, int _verb) :
41 myVerify(_Comm),
42 callBLAS(),
43 callFortran(),
44 modalTool(_Comm),
45 mySort(),
46 MyComm(_Comm),
47 K(KK),
48 M(0),
49 Prec(PP),
50 MyWatch(_Comm),
51 tolEigenSolve(_tol),
52 maxIterEigenSolve(_maxIter),
53 blockSize(_blk),
54 numBlock(_numBlk),
55 normWeight(0),
56 verbose(_verb),
57 historyCount(0),
58 resHistory(0),
59 maxSpaceSize(0),
60 sumSpaceSize(0),
61 spaceSizeHistory(0),
62 memRequested(0.0),
63 highMem(0.0),
64 massOp(0),
65 numRestart(0),
66 outerIter(0),
67 precOp(0),
68 residual(0),
69 stifOp(0),
70 timeLocalProj(0.0),
71 timeLocalSolve(0.0),
72 timeLocalUpdate(0.0),
73 timeMassOp(0.0),
74 timeNorm(0.0),
75 timeOrtho(0.0),
76 timeOuterLoop(0.0),
77 timePostProce(0.0),
78 timePrecOp(0.0),
79 timeResidual(0.0),
80 timeRestart(0.0),
81 timeStifOp(0.0)
82 {
83
84}
85
86
87Davidson::Davidson(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
88 const Epetra_Operator *MM, const Epetra_Operator *PP, int _blk, int _numBlk,
89 double _tol, int _maxIter, int _verb, double *_weight) :
90 myVerify(_Comm),
91 callBLAS(),
92 callFortran(),
93 modalTool(_Comm),
94 mySort(),
95 MyComm(_Comm),
96 K(KK),
97 M(MM),
98 Prec(PP),
99 MyWatch(_Comm),
100 tolEigenSolve(_tol),
101 maxIterEigenSolve(_maxIter),
102 blockSize(_blk),
103 numBlock(_numBlk),
104 normWeight(_weight),
105 verbose(_verb),
106 historyCount(0),
107 resHistory(0),
108 maxSpaceSize(0),
109 sumSpaceSize(0),
110 spaceSizeHistory(0),
111 memRequested(0.0),
112 highMem(0.0),
113 massOp(0),
114 numRestart(0),
115 outerIter(0),
116 precOp(0),
117 residual(0),
118 stifOp(0),
119 timeLocalProj(0.0),
120 timeLocalSolve(0.0),
121 timeLocalUpdate(0.0),
122 timeMassOp(0.0),
123 timeNorm(0.0),
124 timeOrtho(0.0),
125 timeOuterLoop(0.0),
126 timePostProce(0.0),
127 timePrecOp(0.0),
128 timeResidual(0.0),
129 timeRestart(0.0),
130 timeStifOp(0.0)
131 {
132
133}
134
135
136Davidson::~Davidson() {
137
138 if (resHistory) {
139 delete[] resHistory;
140 resHistory = 0;
141 }
142
143 if (spaceSizeHistory) {
144 delete[] spaceSizeHistory;
145 spaceSizeHistory = 0;
146 }
147
148}
149
150
151int Davidson::solve(int numEigen, Epetra_MultiVector &Q, double *lambda) {
152
153 return Davidson::reSolve(numEigen, Q, lambda);
154
155}
156
157
158int Davidson::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {
159
160 // Computes the smallest eigenvalues and the corresponding eigenvectors
161 // of the generalized eigenvalue problem
162 //
163 // K X = M X Lambda
164 //
165 // using a generalized Davidson algorithm
166 //
167 // Note that if M is not specified, then K X = X Lambda is solved.
168 //
169 // Input variables:
170 //
171 // numEigen (integer) = Number of eigenmodes requested
172 //
173 // Q (Epetra_MultiVector) = Converged eigenvectors
174 // The number of columns of Q must be at least numEigen + blockSize.
175 // The rows of Q are distributed across processors.
176 // At exit, the first numEigen columns contain the eigenvectors requested.
177 //
178 // lambda (array of doubles) = Converged eigenvalues
179 // At input, it must be of size numEigen + blockSize.
180 // At exit, the first numEigen locations contain the eigenvalues requested.
181 //
182 // startingEV (integer) = Number of existing converged eigenvectors
183 // We assume that the user has check the eigenvectors and
184 // their M-orthonormality.
185 //
186 // Return information on status of computation
187 //
188 // info >= 0 >> Number of converged eigenpairs at the end of computation
189 //
190 // // Failure due to input arguments
191 //
192 // info = - 1 >> The stiffness matrix K has not been specified.
193 // info = - 2 >> The maps for the matrix K and the matrix M differ.
194 // info = - 3 >> The maps for the matrix K and the preconditioner P differ.
195 // info = - 4 >> The maps for the vectors and the matrix K differ.
196 // info = - 5 >> Q is too small for the number of eigenvalues requested.
197 // info = - 6 >> Q is too small for the computation parameters.
198 //
199 // info = - 8 >> The number of blocks is too small for the number of eigenvalues.
200 //
201 // info = - 10 >> Failure during the mass orthonormalization
202 //
203 // info = - 30 >> MEMORY
204 //
205
206 // Check the input parameters
207
208 if (numEigen <= startingEV) {
209 return startingEV;
210 }
211
212 int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, minimumSpaceDimension(numEigen));
213 if (info < 0)
214 return info;
215
216 int myPid = MyComm.MyPID();
217
218 if (numBlock*blockSize < numEigen) {
219 if (myPid == 0) {
220 cerr << endl;
221 cerr << " !!! The space dimension (# of blocks x size of blocks) must be greater than ";
222 cerr << " the number of eigenvalues !!!\n";
223 cerr << " Number of blocks = " << numBlock << endl;
224 cerr << " Size of blocks = " << blockSize << endl;
225 cerr << " Number of eigenvalues = " << numEigen << endl;
226 cerr << endl;
227 }
228 return -8;
229 }
230
231 // Get the weight for approximating the M-inverse norm
232 Epetra_Vector *vectWeight = 0;
233 if (normWeight) {
234 vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
235 }
236
237 int knownEV = startingEV;
238 int localVerbose = verbose*(myPid==0);
239
240 // Define local block vectors
241 //
242 // MX = Working vectors (storing M*X if M is specified, else pointing to X)
243 // KX = Working vectors (storing K*X)
244 //
245 // R = Residuals
246
247 int xr = Q.MyLength();
248 int dimSearch = blockSize*numBlock;
249
250 Epetra_MultiVector X(View, Q, 0, dimSearch + blockSize);
251 if (knownEV > 0) {
252 Epetra_MultiVector copyX(View, Q, knownEV, blockSize);
253 copyX.Random();
254 }
255 else {
256 X.Random();
257 }
258
259 int tmp;
260 tmp = (M == 0) ? 2*blockSize*xr : 3*blockSize*xr;
261
262 double *work1 = new (nothrow) double[tmp];
263 if (work1 == 0) {
264 if (vectWeight)
265 delete vectWeight;
266 info = -30;
267 return info;
268 }
269 memRequested += sizeof(double)*tmp/(1024.0*1024.0);
270
271 highMem = (highMem > currentSize()) ? highMem : currentSize();
272
273 double *tmpD = work1;
274
275 Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize);
276 tmpD = tmpD + xr*blockSize;
277
278 Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize);
279 tmpD = (M) ? tmpD + xr*blockSize : tmpD;
280
281 Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);
282
283 // Define arrays
284 //
285 // theta = Store the local eigenvalues (size: dimSearch)
286 // normR = Store the norm of residuals (size: blockSize)
287 //
288 // KK = Local stiffness matrix (size: dimSearch x dimSearch)
289 //
290 // S = Local eigenvectors (size: dimSearch x dimSearch)
291 //
292 // tmpKK = Local workspace (size: blockSize x blockSize)
293
294 int lwork2 = blockSize + dimSearch + 2*dimSearch*dimSearch + blockSize*blockSize;
295 double *work2 = new (nothrow) double[lwork2];
296 if (work2 == 0) {
297 if (vectWeight)
298 delete vectWeight;
299 delete[] work1;
300 info = -30;
301 return info;
302 }
303
304 memRequested += sizeof(double)*lwork2/(1024.0*1024.0);
305 highMem = (highMem > currentSize()) ? highMem : currentSize();
306
307 tmpD = work2;
308
309 double *theta = tmpD;
310 tmpD = tmpD + dimSearch;
311
312 double *normR = tmpD;
313 tmpD = tmpD + blockSize;
314
315 double *KK = tmpD;
316 tmpD = tmpD + dimSearch*dimSearch;
317 memset(KK, 0, dimSearch*dimSearch*sizeof(double));
318
319 double *S = tmpD;
320 tmpD = tmpD + dimSearch*dimSearch;
321
322 double *tmpKK = tmpD;
323
324 // Define an array to store the residuals history
325 if (localVerbose > 2) {
326 resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
327 spaceSizeHistory = new (nothrow) int[maxIterEigenSolve];
328 if ((resHistory == 0) || (spaceSizeHistory == 0)) {
329 if (vectWeight)
330 delete vectWeight;
331 delete[] work1;
332 delete[] work2;
333 info = -30;
334 return info;
335 }
336 historyCount = 0;
337 }
338
339 // Miscellaneous definitions
340
341 bool reStart = false;
342 numRestart = 0;
343
344 bool criticalExit = false;
345
346 int bStart = 0;
347 int offSet = 0;
348 numBlock = (dimSearch/blockSize) - (knownEV/blockSize);
349
350 int nFound = blockSize;
351 int i, j;
352
353 if (localVerbose > 0) {
354 cout << endl;
355 cout << " *|* Problem: ";
356 if (M)
357 cout << "K*Q = M*Q D ";
358 else
359 cout << "K*Q = Q D ";
360 if (Prec)
361 cout << " with preconditioner";
362 cout << endl;
363 cout << " *|* Algorithm = Davidson algorithm (block version)" << endl;
364 cout << " *|* Size of blocks = " << blockSize << endl;
365 cout << " *|* Largest size of search space = " << numBlock*blockSize << endl;
366 cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
367 cout.precision(2);
368 cout.setf(ios::scientific, ios::floatfield);
369 cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
370 cout << " *|* Norm used for convergence: ";
371 if (vectWeight)
372 cout << "weighted L2-norm with user-provided weights" << endl;
373 else
374 cout << "L^2-norm" << endl;
375 if (startingEV > 0)
376 cout << " *|* Input converged eigenvectors = " << startingEV << endl;
377 cout << "\n -- Start iterations -- \n";
378 }
379
380 int maxBlock = (dimSearch/blockSize) - (knownEV/blockSize);
381
382 timeOuterLoop -= MyWatch.WallTime();
383 outerIter = 0;
384 while (outerIter <= maxIterEigenSolve) {
385
386 highMem = (highMem > currentSize()) ? highMem : currentSize();
387
388 int nb;
389 for (nb = bStart; nb < maxBlock; ++nb) {
390
391 outerIter += 1;
392 if (outerIter > maxIterEigenSolve)
393 break;
394
395 int localSize = nb*blockSize;
396
397 Epetra_MultiVector Xcurrent(View, X, localSize + knownEV, blockSize);
398
399 timeMassOp -= MyWatch.WallTime();
400 if (M)
401 M->Apply(Xcurrent, MX);
402 timeMassOp += MyWatch.WallTime();
403 massOp += blockSize;
404
405 // Orthonormalize X against the known eigenvectors and the previous vectors
406 // Note: Use R as a temporary work space
407 timeOrtho -= MyWatch.WallTime();
408 if (nb == bStart) {
409 if (nFound > 0) {
410 if (knownEV == 0) {
411 info = modalTool.massOrthonormalize(Xcurrent, MX, M, Q, nFound, 2, R.Values());
412 }
413 else {
414 Epetra_MultiVector copyQ(View, X, 0, knownEV + localSize);
415 info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, nFound, 0, R.Values());
416 }
417 }
418 nFound = 0;
419 }
420 else {
421 Epetra_MultiVector copyQ(View, X, 0, knownEV + localSize);
422 info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, blockSize, 0, R.Values());
423 }
424 timeOrtho += MyWatch.WallTime();
425
426 // Exit the code when the number of vectors exceeds the space dimension
427 if (info < 0) {
428 delete[] work1;
429 delete[] work2;
430 if (vectWeight)
431 delete vectWeight;
432 return -10;
433 }
434
435 timeStifOp -= MyWatch.WallTime();
436 K->Apply(Xcurrent, KX);
437 timeStifOp += MyWatch.WallTime();
438 stifOp += blockSize;
439
440 // Check the orthogonality properties of X
441 if (verbose > 2) {
442 if (knownEV + localSize == 0)
443 accuracyCheck(&Xcurrent, &MX, 0);
444 else {
445 Epetra_MultiVector copyQ(View, X, 0, knownEV + localSize);
446 accuracyCheck(&Xcurrent, &MX, &copyQ);
447 }
448 if (localVerbose > 0)
449 cout << endl;
450 } // if (verbose > 2)
451
452 // Define the local stiffness matrix
453 // Note: S is used as a workspace
454 timeLocalProj -= MyWatch.WallTime();
455 for (j = 0; j <= nb; ++j) {
456 callBLAS.GEMM('T', 'N', blockSize, blockSize, xr,
457 1.0, X.Values()+(knownEV+j*blockSize)*xr, xr, KX.Values(), xr,
458 0.0, tmpKK, blockSize);
459 MyComm.SumAll(tmpKK, S, blockSize*blockSize);
460 int iC;
461 for (iC = 0; iC < blockSize; ++iC) {
462 double *Kpointer = KK + localSize*dimSearch + j*blockSize + iC*dimSearch;
463 memcpy(Kpointer, S + iC*blockSize, blockSize*sizeof(double));
464 }
465 }
466 timeLocalProj += MyWatch.WallTime();
467
468 // Perform a spectral decomposition
469 timeLocalSolve -= MyWatch.WallTime();
470 int nevLocal = localSize + blockSize;
471 info = modalTool.directSolver(localSize+blockSize, KK, dimSearch, 0, 0,
472 nevLocal, S, dimSearch, theta, localVerbose, 10);
473 timeLocalSolve += MyWatch.WallTime();
474
475 if (info != 0) {
476 // Stop as spectral decomposition has a critical failure
477 if (info < 0) {
478 criticalExit = true;
479 break;
480 }
481 // Restart as spectral decomposition failed
482 if (localVerbose > 0) {
483 cout << " Iteration " << outerIter;
484 cout << "- Failure for spectral decomposition - RESTART with new random search\n";
485 }
486 reStart = true;
487 numRestart += 1;
488 timeRestart -= MyWatch.WallTime();
489 Epetra_MultiVector Xinit(View, X, knownEV, blockSize);
490 Xinit.Random();
491 timeRestart += MyWatch.WallTime();
492 nFound = blockSize;
493 bStart = 0;
494 break;
495 } // if (info != 0)
496
497 // Update the search space
498 // Note: Use KX as a workspace
499 timeLocalUpdate -= MyWatch.WallTime();
500 callBLAS.GEMM('N', 'N', xr, blockSize, localSize+blockSize, 1.0, X.Values()+knownEV*xr, xr,
501 S, dimSearch, 0.0, KX.Values(), xr);
502 timeLocalUpdate += MyWatch.WallTime();
503
504 // Apply the mass matrix for the next block
505 timeMassOp -= MyWatch.WallTime();
506 if (M)
507 M->Apply(KX, MX);
508 timeMassOp += MyWatch.WallTime();
509 massOp += blockSize;
510
511 // Apply the stiffness matrix for the next block
512 timeStifOp -= MyWatch.WallTime();
513 K->Apply(KX, R);
514 timeStifOp += MyWatch.WallTime();
515 stifOp += blockSize;
516
517 // Form the residuals
518 timeResidual -= MyWatch.WallTime();
519 if (M) {
520 for (j = 0; j < blockSize; ++j) {
521 callBLAS.AXPY(xr, -theta[j], MX.Values() + j*xr, R.Values() + j*xr);
522 }
523 }
524 else {
525 // Note KX contains the updated block
526 for (j = 0; j < blockSize; ++j) {
527 callBLAS.AXPY(xr, -theta[j], KX.Values() + j*xr, R.Values() + j*xr);
528 }
529 }
530 timeResidual += MyWatch.WallTime();
531 residual += blockSize;
532
533 // Compute the norm of residuals
534 timeNorm -= MyWatch.WallTime();
535 if (vectWeight) {
536 R.NormWeighted(*vectWeight, normR);
537 }
538 else {
539 R.Norm2(normR);
540 }
541 // Scale the norms of residuals with the eigenvalues
542 // Count the number of converged eigenvectors
543 nFound = 0;
544 for (j = 0; j < blockSize; ++j) {
545 normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
546 if (normR[j] < tolEigenSolve)
547 nFound += 1;
548 } // for (j = 0; j < blockSize; ++j)
549 timeNorm += MyWatch.WallTime();
550
551 // Store the residual history
552 if (localVerbose > 2) {
553 memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double));
554 spaceSizeHistory[historyCount] = localSize + blockSize;
555 historyCount += 1;
556 }
557 maxSpaceSize = (maxSpaceSize > localSize+blockSize) ? maxSpaceSize : localSize+blockSize;
558 sumSpaceSize += localSize + blockSize;
559
560 // Print information on current iteration
561 if (localVerbose > 0) {
562 cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
563 cout << knownEV + nFound << endl;
564 } // if (localVerbose > 0)
565
566 if (localVerbose > 1) {
567 cout << endl;
568 cout.precision(2);
569 cout.setf(ios::scientific, ios::floatfield);
570 for (i=0; i<blockSize; ++i) {
571 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
572 cout << " = " << normR[i] << endl;
573 }
574 cout << endl;
575 cout.precision(2);
576 for (i=0; i<nevLocal; ++i) {
577 cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
578 cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
579 cout << " = " << theta[i] << endl;
580 }
581 cout << endl;
582 }
583
584 // Exit the loop to treat the converged eigenvectors
585 if (nFound > 0) {
586 nb += 1;
587 offSet = 0;
588 break;
589 }
590
591 // Apply the preconditioner on the residuals
592 // Note: Use KX as a workspace
593 if (maxBlock == 1) {
594 if (Prec) {
595 timePrecOp -= MyWatch.WallTime();
596 Prec->ApplyInverse(R, Xcurrent);
597 timePrecOp += MyWatch.WallTime();
598 precOp += blockSize;
599 }
600 else {
601 memcpy(Xcurrent.Values(), R.Values(), blockSize*xr*sizeof(double));
602 }
603 timeRestart -= MyWatch.WallTime();
604 Xcurrent.Update(1.0, KX, -1.0);
605 timeRestart += MyWatch.WallTime();
606 break;
607 } // if (maxBlock == 1)
608
609 if (nb == maxBlock - 1) {
610 nb += 1;
611 break;
612 }
613
614 Epetra_MultiVector Xnext(View, X, knownEV+localSize+blockSize, blockSize);
615 if (Prec) {
616 timePrecOp -= MyWatch.WallTime();
617 Prec->ApplyInverse(R, Xnext);
618 timePrecOp += MyWatch.WallTime();
619 precOp += blockSize;
620 }
621 else {
622 memcpy(Xnext.Values(), R.Values(), blockSize*xr*sizeof(double));
623 }
624
625 } // for (nb = bStart; nb < maxBlock; ++nb)
626
627 if (outerIter > maxIterEigenSolve)
628 break;
629
630 if (reStart == true) {
631 reStart = false;
632 continue;
633 }
634
635 if (criticalExit == true)
636 break;
637
638 // Store the final converged eigenvectors
639 if (knownEV + nFound >= numEigen) {
640 for (j = 0; j < blockSize; ++j) {
641 if (normR[j] < tolEigenSolve) {
642 memcpy(X.Values() + knownEV*xr, KX.Values() + j*xr, xr*sizeof(double));
643 lambda[knownEV] = theta[j];
644 knownEV += 1;
645 }
646 }
647 if (localVerbose == 1) {
648 cout << endl;
649 cout.precision(2);
650 cout.setf(ios::scientific, ios::floatfield);
651 for (i=0; i<blockSize; ++i) {
652 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
653 cout << " = " << normR[i] << endl;
654 }
655 cout << endl;
656 }
657 break;
658 } // if (knownEV + nFound >= numEigen)
659
660 // Treat the particular case of 1 block
661 if (maxBlock == 1) {
662 if (nFound > 0) {
663 double *Xpointer = X.Values() + (knownEV+nFound)*xr;
664 nFound = 0;
665 for (j = 0; j < blockSize; ++j) {
666 if (normR[j] < tolEigenSolve) {
667 memcpy(X.Values() + knownEV*xr, KX.Values() + j*xr, xr*sizeof(double));
668 lambda[knownEV] = theta[j];
669 knownEV += 1;
670 nFound += 1;
671 }
672 else {
673 memcpy(Xpointer + (j-nFound)*xr, KX.Values() + j*xr, xr*sizeof(double));
674 }
675 }
676 Epetra_MultiVector Xnext(View, X, knownEV + blockSize - nFound, nFound);
677 Xnext.Random();
678 }
679 else {
680 nFound = blockSize;
681 }
682 continue;
683 }
684
685 // Define the restarting block when maxBlock > 1
686 if (nFound > 0) {
687 int firstIndex = blockSize;
688 for (j = 0; j < blockSize; ++j) {
689 if (normR[j] >= tolEigenSolve) {
690 firstIndex = j;
691 break;
692 }
693 } // for (j = 0; j < blockSize; ++j)
694 while (firstIndex < nFound) {
695 for (j = firstIndex; j < blockSize; ++j) {
696 if (normR[j] < tolEigenSolve) {
697 // Swap the j-th and firstIndex-th position
698 callFortran.SWAP(nb*blockSize, S + j*dimSearch, 1, S + firstIndex*dimSearch, 1);
699 callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
700 callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
701 break;
702 }
703 } // for (j = firstIndex; j < blockSize; ++j)
704 for (j = 0; j < blockSize; ++j) {
705 if (normR[j] >= tolEigenSolve) {
706 firstIndex = j;
707 break;
708 }
709 } // for (j = 0; j < blockSize; ++j)
710 } // while (firstIndex < nFound)
711
712 // Copy the converged eigenvalues
713 memcpy(lambda + knownEV, theta, nFound*sizeof(double));
714
715 } // if (nFound > 0)
716
717 // Define the restarting size
718 bStart = ((nb - offSet) > 2) ? (nb - offSet)/2 : 0;
719
720 // Define the restarting space and local stiffness
721 timeRestart -= MyWatch.WallTime();
722 memset(KK, 0, nb*blockSize*dimSearch*sizeof(double));
723 for (j = 0; j < bStart*blockSize; ++j) {
724 KK[j + j*dimSearch] = theta[j + nFound];
725 }
726 // Form the restarting space
727 int oldCol = nb*blockSize;
728 int newCol = nFound + (bStart+1)*blockSize;
729 newCol = (newCol > oldCol) ? oldCol : newCol;
730 callFortran.GEQRF(oldCol, newCol, S, dimSearch, theta, R.Values(), xr*blockSize, &info);
731 callFortran.ORMQR('R', 'N', xr, oldCol, newCol, S, dimSearch, theta,
732 X.Values()+knownEV*xr, xr, R.Values(), blockSize*xr, &info);
733 timeRestart += MyWatch.WallTime();
734
735 if (nFound == 0)
736 offSet += 1;
737
738 knownEV += nFound;
739 maxBlock = (dimSearch/blockSize) - (knownEV/blockSize);
740
741 // Put random vectors if the Rayleigh Ritz vectors are not enough
742 newCol = nFound + (bStart+1)*blockSize;
743 if (newCol > oldCol) {
744 Epetra_MultiVector Xnext(View, X, knownEV+blockSize-nFound, nFound);
745 Xnext.Random();
746 continue;
747 }
748
749 nFound = 0;
750
751 } // while (outerIter <= maxIterEigenSolve)
752 timeOuterLoop += MyWatch.WallTime();
753 highMem = (highMem > currentSize()) ? highMem : currentSize();
754
755 // Clean memory
756 delete[] work1;
757 delete[] work2;
758 if (vectWeight)
759 delete vectWeight;
760
761 // Sort the eigenpairs
762 timePostProce -= MyWatch.WallTime();
763 if ((info == 0) && (knownEV > 0)) {
764 mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
765 }
766 timePostProce += MyWatch.WallTime();
767
768 return (info == 0) ? knownEV : info;
769
770}
771
772
773void Davidson::accuracyCheck(const Epetra_MultiVector *X, const Epetra_MultiVector *MX,
774 const Epetra_MultiVector *Q) const {
775
776 cout.precision(2);
777 cout.setf(ios::scientific, ios::floatfield);
778 double tmp;
779
780 int myPid = MyComm.MyPID();
781
782 if (X) {
783 if (M) {
784 if (MX) {
785 tmp = myVerify.errorEquality(X, MX, M);
786 if (myPid == 0)
787 cout << " >> Difference between MX and M*X = " << tmp << endl;
788 }
789 tmp = myVerify.errorOrthonormality(X, M);
790 if (myPid == 0)
791 cout << " >> Error in X^T M X - I = " << tmp << endl;
792 }
793 else {
794 tmp = myVerify.errorOrthonormality(X, 0);
795 if (myPid == 0)
796 cout << " >> Error in X^T X - I = " << tmp << endl;
797 }
798 }
799
800 if (Q == 0)
801 return;
802
803 if (M) {
804 tmp = myVerify.errorOrthonormality(Q, M);
805 if (myPid == 0)
806 cout << " >> Error in Q^T M Q - I = " << tmp << endl;
807 if (X) {
808 tmp = myVerify.errorOrthogonality(Q, X, M);
809 if (myPid == 0)
810 cout << " >> Orthogonality Q^T M X up to " << tmp << endl;
811 }
812 }
813 else {
814 tmp = myVerify.errorOrthonormality(Q, 0);
815 if (myPid == 0)
816 cout << " >> Error in Q^T Q - I = " << tmp << endl;
817 if (X) {
818 tmp = myVerify.errorOrthogonality(Q, X, 0);
819 if (myPid == 0)
820 cout << " >> Orthogonality Q^T X up to " << tmp << endl;
821 }
822 }
823
824}
825
826
827int Davidson::minimumSpaceDimension(int nev) const {
828
829 int myPid = MyComm.MyPID();
830
831 if ((myPid == 0) && (numBlock*blockSize < nev)) {
832 cerr << endl;
833 cerr << " !!! The space dimension (# of blocks x size of blocks) must be greater than ";
834 cerr << " the number of eigenvalues !!!\n";
835 cerr << " Number of blocks = " << numBlock << endl;
836 cerr << " Size of blocks = " << blockSize << endl;
837 cerr << " Number of eigenvalues = " << nev << endl;
838 cerr << endl;
839 return -1;
840 }
841
842 return nev + blockSize;
843
844}
845
846
847void Davidson::initializeCounters() {
848
849 historyCount = 0;
850 if (resHistory) {
851 delete[] resHistory;
852 resHistory = 0;
853 }
854
855 maxSpaceSize = 0;
856 sumSpaceSize = 0;
857 if (spaceSizeHistory) {
858 delete[] spaceSizeHistory;
859 spaceSizeHistory = 0;
860 }
861
862 memRequested = 0.0;
863 highMem = 0.0;
864
865 massOp = 0;
866 numRestart = 0;
867 outerIter = 0;
868 precOp = 0;
869 residual = 0;
870 stifOp = 0;
871
872 timeLocalProj = 0.0;
873 timeLocalSolve = 0.0;
874 timeLocalUpdate = 0.0;
875 timeMassOp = 0.0;
876 timeNorm = 0.0;
877 timeOrtho = 0.0;
878 timeOuterLoop = 0.0;
879 timePostProce = 0.0;
880 timePrecOp = 0.0;
881 timeResidual = 0.0;
882 timeRestart = 0.0;
883 timeStifOp = 0.0;
884
885}
886
887
888void Davidson::algorithmInfo() const {
889
890 int myPid = MyComm.MyPID();
891
892 if (myPid ==0) {
893 cout << " Algorithm: Davidson algorithm (block version)\n";
894 cout << " Block Size: " << blockSize << endl;
895 cout << " Number of Blocks kept: " << numBlock << endl;
896 }
897
898}
899
900
901void Davidson::historyInfo() const {
902
903 if (resHistory) {
904 int j;
905 cout << " Block Size: " << blockSize << endl;
906 cout << endl;
907 cout << " Residuals Search Space Dim.\n";
908 cout << endl;
909 cout.precision(4);
910 cout.setf(ios::scientific, ios::floatfield);
911 for (j = 0; j < historyCount; ++j) {
912 int ii;
913 for (ii = 0; ii < blockSize; ++ii) {
914 cout << resHistory[blockSize*j + ii] << " ";
915 cout.width(4);
916 cout << spaceSizeHistory[j] << endl;
917 }
918 }
919 cout << endl;
920 }
921
922}
923
924
925void Davidson::memoryInfo() const {
926
927 int myPid = MyComm.MyPID();
928
929 double maxHighMem = 0.0;
930 double tmp = highMem;
931 MyComm.MaxAll(&tmp, &maxHighMem, 1);
932
933 double maxMemRequested = 0.0;
934 tmp = memRequested;
935 MyComm.MaxAll(&tmp, &maxMemRequested, 1);
936
937 if (myPid == 0) {
938 cout.precision(2);
939 cout.setf(ios::fixed, ios::floatfield);
940 cout << " Memory requested per processor by the eigensolver = (EST) ";
941 cout.width(6);
942 cout << maxMemRequested << " MB " << endl;
943 cout << endl;
944 cout << " High water mark in eigensolver = (EST) ";
945 cout.width(6);
946 cout << maxHighMem << " MB " << endl;
947 cout << endl;
948 }
949
950}
951
952
953void Davidson::operationInfo() const {
954
955 int myPid = MyComm.MyPID();
956
957 if (myPid == 0) {
958 cout << " --- Operations ---\n\n";
959 cout << " Total number of mass matrix multiplications = ";
960 cout.width(9);
961 cout << massOp + modalTool.getNumProj_MassMult() + modalTool.getNumNorm_MassMult() << endl;
962 cout << " Mass multiplications in solver loop = ";
963 cout.width(9);
964 cout << massOp << endl;
965 cout << " Mass multiplications for orthogonalization = ";
966 cout.width(9);
967 cout << modalTool.getNumProj_MassMult() << endl;
968 cout << " Mass multiplications for normalization = ";
969 cout.width(9);
970 cout << modalTool.getNumNorm_MassMult() << endl;
971 cout << " Total number of stiffness matrix operations = ";
972 cout.width(9);
973 cout << stifOp << endl;
974 cout << " Total number of preconditioner applications = ";
975 cout.width(9);
976 cout << precOp << endl;
977 cout << " Total number of computed eigen-residuals = ";
978 cout.width(9);
979 cout << residual << endl;
980 cout << "\n";
981 cout << " Total number of outer iterations = ";
982 cout.width(9);
983 cout << outerIter << endl;
984 cout << " Number of restarts = ";
985 cout.width(9);
986 cout << numRestart << endl;
987 cout << endl;
988 cout << " Maximum size of search space = ";
989 cout.width(9);
990 cout << maxSpaceSize << endl;
991 cout << " Average size of search space = ";
992 cout.setf(ios::fixed, ios::floatfield);
993 cout.precision(2);
994 cout.width(9);
995 cout << ((double) sumSpaceSize)/((double) outerIter) << endl;
996 cout << endl;
997 }
998
999}
1000
1001
1002void Davidson::timeInfo() const {
1003
1004 int myPid = MyComm.MyPID();
1005
1006 if (myPid == 0) {
1007 cout << " --- Timings ---\n\n";
1008 cout.setf(ios::fixed, ios::floatfield);
1009 cout.precision(2);
1010 cout << " Total time for outer loop = ";
1011 cout.width(9);
1012 cout << timeOuterLoop << " s" << endl;
1013 cout << " Time for mass matrix operations = ";
1014 cout.width(9);
1015 cout << timeMassOp << " s ";
1016 cout.width(5);
1017 cout << 100*timeMassOp/timeOuterLoop << " %\n";
1018 cout << " Time for stiffness matrix operations = ";
1019 cout.width(9);
1020 cout << timeStifOp << " s ";
1021 cout.width(5);
1022 cout << 100*timeStifOp/timeOuterLoop << " %\n";
1023 cout << " Time for preconditioner applications = ";
1024 cout.width(9);
1025 cout << timePrecOp << " s ";
1026 cout.width(5);
1027 cout << 100*timePrecOp/timeOuterLoop << " %\n";
1028 cout << " Time for orthogonalizations = ";
1029 cout.width(9);
1030 cout << timeOrtho << " s ";
1031 cout.width(5);
1032 cout << 100*timeOrtho/timeOuterLoop << " %\n";
1033 cout << " Projection step : ";
1034 cout.width(9);
1035 cout << modalTool.getTimeProj() << " s\n";
1036// cout << " Q^T mult. :: ";
1037// cout.width(9);
1038// cout << modalTool.getTimeProj_QtMult() << " s\n";
1039// cout << " Q mult. :: ";
1040// cout.width(9);
1041// cout << modalTool.getTimeProj_QMult() << " s\n";
1042// cout << " Mass mult. :: ";
1043// cout.width(9);
1044// cout << modalTool.getTimeProj_MassMult() << " s\n";
1045 cout << " Normalization step : ";
1046 cout.width(9);
1047 cout << modalTool.getTimeNorm() << " s\n";
1048// cout << " Mass mult. :: ";
1049// cout.width(9);
1050// cout << modalTool.getTimeNorm_MassMult() << " s\n";
1051 cout << " Time for local projection = ";
1052 cout.width(9);
1053 cout << timeLocalProj << " s ";
1054 cout.width(5);
1055 cout << 100*timeLocalProj/timeOuterLoop << " %\n";
1056 cout << " Time for local eigensolve = ";
1057 cout.width(9);
1058 cout << timeLocalSolve << " s ";
1059 cout.width(5);
1060 cout << 100*timeLocalSolve/timeOuterLoop << " %\n";
1061 cout << " Time for local update = ";
1062 cout.width(9);
1063 cout << timeLocalUpdate << " s ";
1064 cout.width(5);
1065 cout << 100*timeLocalUpdate/timeOuterLoop << " %\n";
1066 cout << " Time for residual computations = ";
1067 cout.width(9);
1068 cout << timeResidual << " s ";
1069 cout.width(5);
1070 cout << 100*timeResidual/timeOuterLoop << " %\n";
1071 cout << " Time for residuals norm computations = ";
1072 cout.width(9);
1073 cout << timeNorm << " s ";
1074 cout.width(5);
1075 cout << 100*timeNorm/timeOuterLoop << " %\n";
1076 cout << " Time for restarting space definition = ";
1077 cout.width(9);
1078 cout << timeRestart << " s ";
1079 cout.width(5);
1080 cout << 100*timeRestart/timeOuterLoop << " %\n";
1081 cout << "\n";
1082 cout << " Total time for post-processing = ";
1083 cout.width(9);
1084 cout << timePostProce << " s\n";
1085 cout << endl;
1086 } // if (myPid == 0)
1087
1088}
1089
1090