Anasazi Version of the Day
Loading...
Searching...
No Matches
JDPCG.cpp
1// @HEADER
2// *****************************************************************************
3// Anasazi: Block Eigensolvers Package
4//
5// Copyright 2004 NTESS and the Anasazi contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10//**************************************************************************
11//
12// NOTICE
13//
14// This software is a result of the research described in the cout
15//
16// " A comparison of algorithms for modal analysis in the absence
17// of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
18// Sandia National Laboratories, Technical cout SAND2003-1028J.
19//
20// It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
21// framework ( http://software.sandia.gov/trilinos/ ).
22//
23// The distribution of this software follows also the rules defined in Trilinos.
24// This notice shall be marked on any reproduction of this software, in whole or
25// in part.
26//
27// This program is distributed in the hope that it will be useful, but
28// WITHOUT ANY WARRANTY; without even the implied warranty of
29// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
30//
31// Code Authors: U. Hetmaniuk (ulhetma@sandia.gov), R. Lehoucq (rblehou@sandia.gov)
32//
33//**************************************************************************
34
35#include "JDPCG.h"
36#include <stdexcept>
37#include <Teuchos_Assert.hpp>
38
39
40JDPCG::JDPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
41 const Epetra_Operator *MM, const Epetra_Operator *PP, int _blk, int _numBlk,
42 double _tol, int _maxIterES, int _maxIterLS, int _verb, double *_weight) :
43 myVerify(_Comm),
44 callBLAS(),
45 callFortran(),
46 callLAPACK(),
47 modalTool(_Comm),
48 mySort(),
49 MyComm(_Comm),
50 K(KK),
51 M(MM),
52 Prec(PP),
53 MyWatch(_Comm),
54 tolEigenSolve(_tol),
55 maxIterEigenSolve(_maxIterES),
56 maxIterLinearSolve(_maxIterLS),
57 blockSize(_blk),
58 numBlock(_numBlk),
59 normWeight(_weight),
60 verbose(_verb),
61 historyCount(0),
62 resHistory(0),
63 maxSpaceSize(0),
64 sumSpaceSize(0),
65 spaceSizeHistory(0),
66 maxIterPCG(0),
67 sumIterPCG(0),
68 iterPCGHistory(0),
69 memRequested(0.0),
70 highMem(0.0),
71 massOp(0),
72 numCorrectionPrec(0),
73 numCorrectionSolve(0),
74 numPCGmassOp(0),
75 numPCGstifOp(0),
76 numRestart(0),
77 outerIter(0),
78 precOp(0),
79 residual(0),
80 stifOp(0),
81 timeBuildQtMPMQ(0.0),
82 timeCorrectionPrec(0.0),
83 timeCorrectionSolve(0.0),
84 timeLocalProj(0.0),
85 timeLocalSolve(0.0),
86 timeLocalUpdate(0.0),
87 timeMassOp(0.0),
88 timeNorm(0.0),
89 timeOrtho(0.0),
90 timeOuterLoop(0.0),
91 timePCGEigCheck(0.0),
92 timePCGLoop(0.0),
93 timePCGOpMult(0.0),
94 timePCGPrec(0.0),
95 timePostProce(0.0),
96 timePrecOp(0.0),
97 timeResidual(0.0),
98 timeRestart(0.0),
99 timeStifOp(0.0)
100 {
101
102}
103
104
105JDPCG::~JDPCG() {
106
107 if (resHistory) {
108 delete[] resHistory;
109 resHistory = 0;
110 }
111
112 if (spaceSizeHistory) {
113 delete[] spaceSizeHistory;
114 spaceSizeHistory = 0;
115 }
116
117 if (iterPCGHistory) {
118 delete[] iterPCGHistory;
119 iterPCGHistory = 0;
120 }
121
122}
123
124
125int JDPCG::jacobiPreconditioner(const Epetra_MultiVector &B, Epetra_MultiVector &PrecB,
126 const Epetra_MultiVector *U, const Epetra_MultiVector *V,
127 double *invQtMPMQ, int ldQtMPMQ, double *PMQ, double *work, double *WS) {
128
129 // This routine applies the "projected" preconditioner to the vectors B and stores
130 // the result in the vectors PrecB.
131 //
132 // PrecB = (I - P^{-1}MQ ( Q^tMP^{-1}MQ )^{-1} Q^t M) P^{-1} * B
133 //
134 // where P is the preconditioner.
135 //
136 // B = Input vectors
137 // PrecB = Preconditioned vectors
138 // V = converged eigenvectors
139 // U = tentative eigenvectors to be corrected.
140 //
141 // Note: Q = [V, U]
142 //
143 // invQtMPMQ = Factor form of the matrix Q^t*M*P^{-1}*M*Q (from POTRF)
144 // ldQtMPMQ = Leading dimension for invQtMPMQ
145 //
146 // PMQ = Array to store the column vectors P^{-1}*M*Q
147 // Assumption: PMQ has the same distribution than B over the processors
148 //
149 // work = Workspace array of size 2 * (# of columns in Q) * (# of columns in B)
150 //
151 // WS = Workspace array of size (# of rows in B) * (# of columns in B)
152
153 int info = 0;
154
155 int bC = B.NumVectors();
156 int bR = B.MyLength();
157
158 // Compute P^{-1} B
159 timeCorrectionPrec -= MyWatch.WallTime();
160 if (Prec) {
161 Prec->ApplyInverse(B, PrecB);
162 }
163 else {
164 memcpy(PrecB.Values(), B.Values(), bR*bC*sizeof(double));
165 }
166 timeCorrectionPrec += MyWatch.WallTime();
167
168 int uC = U->NumVectors();
169 int vC = (V) ? V->NumVectors() : 0;
170
171 int sizeQ = uC + vC;
172
173 // Compute Q^t M P^{-t} B
174 callBLAS.GEMM('T', 'N', sizeQ, bC, bR, 1.0, PMQ, bR, B.Values(), bR,
175 0.0, work + sizeQ*bC, sizeQ);
176 MyComm.SumAll(work + sizeQ*bC, work, sizeQ*bC);
177
178 memcpy(work + sizeQ*bC, work, sizeQ*bC*sizeof(double));
179
180 // Compute ( Q^t M P^{-1} M Q )^{-1} Q^t M P^{-t} B
181 callLAPACK.POTRS('U', sizeQ, bC, invQtMPMQ, ldQtMPMQ, work, sizeQ, &info);
182
183 // Subtract P^{-1} M Q ( Q^t M P^{-1} M Q )^{-1} Q^t M P^{-t} B from P^{-t} B
184 callBLAS.GEMM('N', 'N', bR, bC, sizeQ, -1.0, PMQ, bR, work, sizeQ, 1.0, PrecB.Values(), bR);
185
186 Epetra_MultiVector MPB(View, B.Map(), WS, bR, bC);
187 M->Apply(PrecB, MPB);
188
189 // Compute Q^t M PrecB
190 callBLAS.GEMM('T', 'N', uC, bC, bR, 1.0, U->Values(), bR, MPB.Values(), bR,
191 0.0, work + vC + sizeQ*bC, sizeQ);
192 if (V) {
193 callBLAS.GEMM('T', 'N', vC, bC, bR, 1.0, V->Values(), bR, MPB.Values(), bR,
194 0.0, work + sizeQ*bC, sizeQ);
195 }
196 MyComm.SumAll(work + sizeQ*bC, work, sizeQ*bC);
197
198 // Test for second projection
199 double *Mnorm = new double[bC];
200 MPB.Dot(PrecB, Mnorm);
201
202 bool doSecond = false;
203 for (int j = 0; j < bC; ++j) {
204 double tolOrtho = 1.0e-28*Mnorm[j];
205 for (int i = 0; i < sizeQ; ++i) {
206 double tmp = work[i + j*sizeQ];
207 if (tmp*tmp > tolOrtho) {
208 doSecond = true;
209 break;
210 }
211 }
212 if (doSecond == true) {
213 // Compute ( Q^t M P^{-1} M Q )^{-1} Q^t M PrecB
214 callLAPACK.POTRS('U', sizeQ, bC, invQtMPMQ, ldQtMPMQ, work, sizeQ, &info);
215 // Subtract P^{-1} M Q ( Q^t M P^{-1} M Q )^{-1} Q^t M PrecB from PrecB
216 callBLAS.GEMM('N', 'N', bR, bC, sizeQ, -1.0, PMQ, bR, work, sizeQ, 1.0, PrecB.Values(), bR);
217 break;
218 }
219 }
220 delete[] Mnorm;
221
222 numCorrectionPrec += bC;
223
224 return info;
225
226}
227
228
229int JDPCG::jacobiPCG(Epetra_MultiVector &Rlin, Epetra_MultiVector &Y,
230 const Epetra_MultiVector *U, const Epetra_MultiVector *V,
231 double eta, double tolCG, int iterMax,
232 double *invQtMPMQ, int ldQtMPMQ, double *PMQ,
233 double *workPrec, double *workPCG,
234 const Epetra_Vector *vectWeight, const Epetra_MultiVector *orthoVec) {
235
236 // This routine applies a block PCG algorithm to solve the equation
237 //
238 // (I - MQ*Q^t) * ( K - eta * M ) * (I - Q*Q^t*M) Y = X
239 //
240 // with (I - Q*Q^t*M) * Y = Y
241 // with Q = [V, U]
242 // where the preconditioner is given by
243 //
244 // (I - MQ*Q^t) * Prec^{-1} * (I - Q*Q^t*M)
245 //
246 // Rlin = Input vectors
247 // Y = Solution vectors
248 // V = converged eigenvectors
249 // U = tentative eigenvectors to be corrected.
250 //
251 // eta = shift for the linear operator
252 //
253 // tolCG = Tolerance required for convergence
254 // iterMax = Maximum number of iterations allowed
255 //
256 // invQtMPMQ = Factor form of the matrix Q^t*M*P^{-1}*M*Q (from POTRF)
257 // ldQtMPMQ = Leading dimension for invQtMPMQ
258 //
259 // PMQ = Array to store the column vectors P^{-1}*M*Q
260 // Assumption: PMQ has the same distribution than B over the processors
261 //
262 // workPrec = Workspace array of size 2 * (# of columns in Q) * (# of columns in X)
263 // This workspace is exclusively used to apply the preconditioner
264 //
265 // workPCG = Workspace array for the variables in the PCG algorithm
266 // Its size must allow the definition of
267 // - 5 blocks of (# of columns in X) vectors distributed as X across the processors
268 // - 4 arrays of length (# of columns in X)
269 // - 3 square matrices of size (# of columns in X)
270 //
271 // vectWeight = Weights for the L^2 norm to compute to check the eigenresiduals
272 //
273 // orthoVec = Space where CG computations are orthogonal to.
274
275 int xrow = Y.MyLength();
276 int xcol = Y.NumVectors();
277
278 int info = 0;
279 int localVerbose = verbose*(MyComm.MyPID() == 0);
280
281 double *pointer = workPCG;
282
283 // Arrays associated with the solution to the linear system
284
285 // Array to store the matrix PtKP
286 double *PtKP = pointer;
287 pointer = pointer + xcol*xcol;
288
289 // Array to store coefficient matrices
290 double *coeff = pointer;
291 pointer = pointer + xcol*xcol;
292
293 // Workspace array
294 double *workD = pointer;
295 pointer = pointer + xcol*xcol;
296
297 // Array to store the eigenvalues of P^t K P
298 double *da = pointer;
299 pointer = pointer + xcol;
300
301 // Array to store the norms of right hand sides
302 double *initNorm = pointer;
303 pointer = pointer + xcol;
304
305 // Array to store the norms of current residuals
306 double *resNorm = pointer;
307 pointer = pointer + xcol;
308
309 // Array to store the preconditioned residuals
310 double *valZ = pointer;
311 pointer = pointer + xrow*xcol;
312 Epetra_MultiVector Z(View, Y.Map(), valZ, xrow, xcol);
313
314 // Array to store the search directions
315 double *valP = pointer;
316 pointer = pointer + xrow*xcol;
317 Epetra_MultiVector P(View, Y.Map(), valP, xrow, xcol);
318
319 // Array to store the image of the search directions
320 double *valKP = pointer;
321 pointer = pointer + xrow*xcol;
322 Epetra_MultiVector KP(View, Y.Map(), valKP, xrow, xcol);
323
324 // Arrays associated to the corrected eigenvectors
325 // Array to store the projected stiffness matrix
326 double *UtKU = pointer;
327 pointer = pointer + xcol*xcol;
328
329 // Array to store the corrected eigenvalues
330 double *theta = pointer;
331 pointer = pointer + xcol;
332
333 // Array to store the norms of eigen-residuals for corrected vectors
334 double *resEig = pointer;
335 pointer = pointer + xcol;
336
337 // Array to store the norms of previous eigen-residuals for corrected vectors
338 double *oldEig = pointer;
339 pointer = pointer + xcol;
340
341 // Array to store the stiffness-image of the corrected eigenvectors
342 double *valKU = pointer;
343 pointer = pointer + xrow*xcol;
344 Epetra_MultiVector KU(View, Y.Map(), valKU, xrow, xcol);
345
346 // Array to store the mass-image of the corrected eigenvectors
347 Epetra_MultiVector MU(View, Y.Map(), (M) ? pointer : Z.Values(), xrow, xcol);
348
349 // Set the initial guess to zero
350 Y.PutScalar(0.0);
351
352 int ii;
353 int iter;
354 int nFound;
355
356 // Define the size of the "orthogonal" space [V, U]
357 // Note: We assume U is non zero and thus sizeQ is non zero.
358 int sizeQ = 0;
359 sizeQ += (U) ? U->NumVectors() : 0;
360 sizeQ += (V) ? V->NumVectors() : 0;
361
362 bool isNegative = false;
363
364 Rlin.Norm2(initNorm);
365
366 if (localVerbose > 3) {
367 cout << endl;
368 cout.precision(4);
369 cout.setf(ios::scientific, ios::floatfield);
370 for (ii = 0; ii < xcol; ++ii) {
371 cout << " ... Initial Residual Norm " << ii << " = " << initNorm[ii] << endl;
372 }
373 cout << endl;
374 }
375
376 // Iteration loop
377 timePCGLoop -= MyWatch.WallTime();
378 for (iter = 1; iter <= iterMax; ++iter) {
379
380 // Apply the preconditioner
381 timePCGPrec -= MyWatch.WallTime();
382 jacobiPreconditioner(Rlin, Z, U, V, invQtMPMQ, ldQtMPMQ, PMQ, workPrec, MU.Values());
383 timePCGPrec += MyWatch.WallTime();
384
385 if (orthoVec) {
386 // Note: Use MU as workspace
387 if (M)
388 M->Apply(Z, MU);
389 modalTool.massOrthonormalize(Z, MU, M, *orthoVec, blockSize, 1);
390 }
391
392 // Define the new search directions
393 if (iter == 1) {
394 P = Z;
395 }
396 else {
397 // Compute P^t K Z
398 callBLAS.GEMM('T', 'N', xcol, xcol, xrow, 1.0, KP.Values(), xrow, Z.Values(), xrow,
399 0.0, workD, xcol);
400 MyComm.SumAll(workD, coeff, xcol*xcol);
401
402 // Compute the coefficient (P^t K P)^{-1} P^t K Z
403 callBLAS.GEMM('T', 'N', xcol, xcol, xcol, 1.0, PtKP, xcol, coeff, xcol,
404 0.0, workD, xcol);
405 for (ii = 0; ii < xcol; ++ii)
406 callFortran.SCAL_INCX(xcol, da[ii], workD + ii, xcol);
407 callBLAS.GEMM('N', 'N', xcol, xcol, xcol, 1.0, PtKP, xcol, workD, xcol,
408 0.0, coeff, xcol);
409
410 // Update the search directions
411 // Note: Use KP as a workspace
412 memcpy(KP.Values(), P.Values(), xrow*xcol*sizeof(double));
413 callBLAS.GEMM('N', 'N', xrow, xcol, xcol, 1.0, KP.Values(), xrow, coeff, xcol,
414 0.0, P.Values(), xrow);
415
416 P.Update(1.0, Z, -1.0);
417
418 } // if (iter == 1)
419
420 timePCGOpMult -= MyWatch.WallTime();
421 K->Apply(P, KP);
422 numPCGstifOp += xcol;
423 if (eta != 0.0) {
424 // Apply the mass matrix
425 // Note: Use Z as a workspace
426 if (M) {
427 M->Apply(P, Z);
428 callBLAS.AXPY(xrow*xcol, -eta, Z.Values(), KP.Values());
429 }
430 else {
431 callBLAS.AXPY(xrow*xcol, -eta, P.Values(), KP.Values());
432 }
433 numPCGmassOp += xcol;
434 }
435 timePCGOpMult += MyWatch.WallTime();
436
437 // Compute P^t K P
438 callBLAS.GEMM('T', 'N', xcol, xcol, xrow, 1.0, P.Values(), xrow, KP.Values(), xrow,
439 0.0, workD, xcol);
440 MyComm.SumAll(workD, PtKP, xcol*xcol);
441
442 // Eigenvalue decomposition of P^t K P
443 int nev = xcol;
444 info = modalTool.directSolver(xcol, PtKP, xcol, 0, 0, nev, PtKP, xcol, da, 0, 10);
445
446 if (info != 0) {
447 // Break the loop as spectral decomposition failed
448 info = - iterMax - 1;
449 sumIterPCG += iter;
450 maxIterPCG = (iter > maxIterPCG) ? iter : maxIterPCG;
451 return info;
452 } // if (info != 0)
453
454 // Compute the pseudo-inverse of the eigenvalues
455 for (ii = 0; ii < xcol; ++ii) {
456 if (da[ii] < 0.0) {
457 isNegative = true;
458 break;
459 }
460 else {
461 da[ii] = (da[ii] == 0.0) ? 0.0 : 1.0/da[ii];
462 }
463 } // for (ii = 0; ii < xcol; ++ii)
464
465 if (isNegative == true) {
466 if (localVerbose > 0) {
467 cout << endl;
468 cout.precision(4);
469 cout.setf(ios::scientific, ios::floatfield);
470 cout << " !! Negative eigenvalue in block PCG (" << da[ii] << ") !!\n";
471 cout << endl;
472 }
473 info = - iter;
474 sumIterPCG += iter;
475 maxIterPCG = (iter > maxIterPCG) ? iter : maxIterPCG;
476 return info;
477 }
478
479 // Compute P^t R
480 callBLAS.GEMM('T', 'N', xcol, xcol, xrow, 1.0, P.Values(), xrow, Rlin.Values(), xrow,
481 0.0, workD, xcol);
482 MyComm.SumAll(workD, coeff, xcol*xcol);
483
484 // Compute the coefficient (P^t K P)^{-1} P^t R
485 callBLAS.GEMM('T', 'N', xcol, xcol, xcol, 1.0, PtKP, xcol, coeff, xcol,
486 0.0, workD, xcol);
487 for (ii = 0; ii < xcol; ++ii)
488 callFortran.SCAL_INCX(xcol, da[ii], workD + ii, xcol);
489 callBLAS.GEMM('N', 'N', xcol, xcol, xcol, 1.0, PtKP, xcol, workD, xcol,
490 0.0, coeff, xcol);
491
492 // Update the solutions of the linear system
493 callBLAS.GEMM('N', 'N', xrow, xcol, xcol, 1.0, P.Values(), xrow, coeff, xcol,
494 1.0, Y.Values(), xrow);
495
496 // Update the residuals for the linear system
497 callBLAS.GEMM('N', 'N', xrow, xcol, xcol, -1.0, KP.Values(), xrow, coeff, xcol,
498 1.0, Rlin.Values(), xrow);
499
500 // Check convergence
501 Rlin.Norm2(resNorm);
502 nFound = 0;
503 for (ii = 0; ii < xcol; ++ii) {
504 if (resNorm[ii] <= tolCG*initNorm[ii])
505 nFound += 1;
506 }
507
508 if (localVerbose > 3) {
509 cout << endl;
510 for (ii = 0; ii < xcol; ++ii) {
511 cout << " ... ";
512 cout.width(5);
513 cout << ii << " ... Residual = ";
514 cout.precision(4);
515 cout.setf(ios::scientific, ios::floatfield);
516 cout << resNorm[ii] << " ... Right Hand Side = " << initNorm[ii] << endl;
517 }
518 cout << endl;
519 }
520
521 if (nFound == xcol) {
522 info = iter;
523 break;
524 }
525
526 timePCGEigCheck -= MyWatch.WallTime();
527
528 // Check the residuals for the corrected eigenvectors
529 // Note: Use Z as workspace to store the corrected vectors
530 Z.Update(1.0, *U, -1.0, Y, 0.0);
531
532 // Compute U^t K U
533 K->Apply(Z, KU);
534 numPCGstifOp += xcol;
535 callBLAS.GEMM('T', 'N', xcol, xcol, xrow, 1.0, Z.Values(), xrow, KU.Values(), xrow,
536 0.0, workD, xcol);
537 MyComm.SumAll(workD, UtKU, xcol*xcol);
538
539 // Compute U^t M U
540 // Note: Use coeff as storage space
541 if (M) {
542 M->Apply(Z, MU);
543 numPCGmassOp += xcol;
544 callBLAS.GEMM('T', 'N', xcol, xcol, xrow, 1.0, Z.Values(), xrow, MU.Values(), xrow,
545 0.0, workD, xcol);
546 MyComm.SumAll(workD, coeff, xcol*xcol);
547 }
548 else {
549 callBLAS.GEMM('T', 'N', xcol, xcol, xrow, 1.0, Z.Values(), xrow, Z.Values(), xrow,
550 0.0, workD, xcol);
551 MyComm.SumAll(workD, coeff, xcol*xcol);
552 }
553
554 nev = xcol;
555 info = modalTool.directSolver(xcol, UtKU, xcol, coeff, xcol, nev, workD, xcol, theta, 0,
556 (blockSize == 1) ? 1 : 0);
557
558 if ((info < 0) || (nev < xcol)) {
559 // Break the loop as spectral decomposition failed
560 info = - iterMax - 1;
561 sumIterPCG += iter;
562 maxIterPCG = (iter > maxIterPCG) ? iter : maxIterPCG;
563 return info;
564 } // if ((info < 0) || (nev < xcol))
565
566 // Compute the eigenresiduals for the corrected vectors
567 // Note: Use Z as workspace to store the residuals
568 if (M) {
569 callBLAS.GEMM('N', 'N', xrow, xcol, xcol, 1.0, MU.Values(), xrow, workD, xcol,
570 0.0, Z.Values(), xrow);
571 }
572 for (ii = 0; ii < xcol; ++ii)
573 callBLAS.SCAL(xrow, theta[ii], Z.Values() + ii*xrow);
574 callBLAS.GEMM('N', 'N', xrow, xcol, xcol, 1.0, KU.Values(), xrow, workD, xcol,
575 -1.0, Z.Values(), xrow);
576
577 if (vectWeight)
578 Z.NormWeighted(*vectWeight, resEig);
579 else
580 Z.Norm2(resEig);
581
582 timePCGEigCheck += MyWatch.WallTime();
583
584 if (iter > 1) {
585 // Scale the norms of residuals with the eigenvalues
586 // Count the number of converged eigenvectors
587 nFound = 0;
588 int nGrow = 0;
589 for (ii = 0; ii < xcol; ++ii) {
590 nFound = (resEig[ii] < tolEigenSolve*theta[ii]) ? nFound + 1 : nFound;
591 nGrow = (resEig[ii] > oldEig[ii]) ? nGrow + 1 : nGrow;
592 } // for (ii = 0; ii < xcol; ++ii)
593 if ((nFound > 0) || (nGrow > 0)) {
594 info = iter;
595 break;
596 }
597 } // if (iter > 1)
598
599 memcpy(oldEig, resEig, xcol*sizeof(double));
600
601 } // for (iter = 1; iter <= maxIter; ++iter)
602 timePCGLoop += MyWatch.WallTime();
603
604 sumIterPCG += iter;
605 maxIterPCG = (iter > maxIterPCG) ? iter : maxIterPCG;
606
607 return info;
608
609}
610
611
612int JDPCG::solve(int numEigen, Epetra_MultiVector &Q, double *lambda) {
613
614 return JDPCG::reSolve(numEigen, Q, lambda);
615
616}
617
618
619int JDPCG::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {
620
621 // Computes the smallest eigenvalues and the corresponding eigenvectors
622 // of the generalized eigenvalue problem
623 //
624 // K X = M X Lambda
625 //
626 // using a Jacobi-Davidson algorithm with PCG (Block version of Notay's algorithm).
627 //
628 // Reference: "Combination of Jacobi-Davidson and conjugate gradients for the partial
629 // symmetric eigenproblem", Y. Notay, Numer. Linear Algebra Appl. (2002), 9:21-44.
630 //
631 // Input variables:
632 //
633 // numEigen (integer) = Number of eigenmodes requested
634 //
635 // Q (Epetra_MultiVector) = Converged eigenvectors
636 // The number of columns of Q must be at least numEigen + blockSize.
637 // The rows of Q are distributed across processors.
638 // At exit, the first numEigen columns contain the eigenvectors requested.
639 //
640 // lambda (array of doubles) = Converged eigenvalues
641 // At input, it must be of size numEigen + blockSize.
642 // At exit, the first numEigen locations contain the eigenvalues requested.
643 //
644 // startingEV (integer) = Number of existing converged eigenvectors
645 // We assume that the user has check the eigenvectors and
646 // their M-orthonormality.
647 //
648 // Return information on status of computation
649 //
650 // info >= 0 >> Number of converged eigenpairs at the end of computation
651 //
652 // // Failure due to input arguments
653 //
654 // info = - 1 >> The stiffness matrix K has not been specified.
655 // info = - 2 >> The maps for the matrix K and the matrix M differ.
656 // info = - 3 >> The maps for the matrix K and the preconditioner P differ.
657 // info = - 4 >> The maps for the vectors and the matrix K differ.
658 // info = - 5 >> Q is too small for the number of eigenvalues requested.
659 // info = - 6 >> Q is too small for the computation parameters.
660 //
661 // info = - 7 >> The mass matrix M has not been specified.
662 // info = - 8 >> The number of blocks is too small for the number of eigenvalues.
663 //
664 // info = - 10 >> Failure during the mass orthonormalization
665 //
666 // info = - 30 >> MEMORY
667 //
668
669 // Check the input parameters
670
671 if (numEigen <= startingEV) {
672 return startingEV;
673 }
674
675 int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, minimumSpaceDimension(numEigen));
676 if (info < 0)
677 return info;
678
679 int myPid = MyComm.MyPID();
680
681 if (M == 0) {
682 if (myPid == 0) {
683 cerr << endl;
684 cerr << " !!! The Epetra_Operator object for the mass matrix is not specified !!!" << endl;
685 cerr << endl;
686 }
687 return -7;
688 }
689
690 if (numBlock*blockSize < numEigen) {
691 if (myPid == 0) {
692 cerr << endl;
693 cerr << " !!! The space dimension (# of blocks x size of blocks) must be greater than ";
694 cerr << " the number of eigenvalues !!!\n";
695 cerr << " Number of blocks = " << numBlock << endl;
696 cerr << " Size of blocks = " << blockSize << endl;
697 cerr << " Number of eigenvalues = " << numEigen << endl;
698 cerr << endl;
699 }
700 return -8;
701 }
702
703 // Get the weight for approximating the M-inverse norm
704 Epetra_Vector *vectWeight = 0;
705 if (normWeight) {
706 vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
707 }
708
709 int knownEV = startingEV;
710 int localVerbose = verbose*(myPid==0);
711
712 // Define local block vectors
713 //
714 // PMQ = Preconditioned M-times converged eigenvectors + one working block
715 //
716 // KX = Working vectors (storing K times one block)
717 //
718 // R = Working vectors (storing residuals for one block)
719 //
720 // MX = Working vectors (storing M times one block)
721 //
722
723 int xr = Q.MyLength();
724 int dimSearch = blockSize*numBlock;
725
726 Epetra_MultiVector X(View, Q, 0, dimSearch + blockSize);
727 if (knownEV > 0) {
728 Epetra_MultiVector copyX(View, Q, knownEV, blockSize);
729 copyX.Random();
730 }
731 else {
732 X.Random();
733 }
734
735 int lwork = 0;
736 lwork += (numEigen-startingEV + blockSize)*xr + 2*blockSize*xr;
737 lwork += (M) ? blockSize*xr : 0;
738
739 // Workspace for PCG
740 lwork += (maxIterLinearSolve > 0) ? 4*xr*blockSize + 6*blockSize + 4*blockSize*blockSize : 0;
741
742 // Workspace for JDCG
743 lwork += blockSize + dimSearch + 2*dimSearch*dimSearch;
744 lwork += 2*(numEigen-startingEV+blockSize)*(numEigen-startingEV+blockSize);
745 lwork += 2*(numEigen-startingEV+blockSize)*blockSize;
746
747 double *work = new (nothrow) double[lwork];
748 if (work == 0) {
749 if (vectWeight)
750 delete vectWeight;
751 info = -30;
752 return info;
753 }
754 memRequested += sizeof(double)*lwork/(1024.0*1024.0);
755
756 highMem = (highMem > currentSize()) ? highMem : currentSize();
757
758 double *tmpD = work;
759
760 Epetra_MultiVector PMQ(View, Q.Map(), tmpD, xr, numEigen-startingEV+blockSize);
761 tmpD = tmpD + (numEigen-startingEV+blockSize)*xr;
762
763 Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);
764 tmpD = tmpD + blockSize*xr;
765
766 Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize);
767 tmpD = tmpD + blockSize*xr;
768
769 Epetra_MultiVector MX(View, Q.Map(), tmpD, xr, blockSize);
770 tmpD = tmpD + blockSize*xr;
771
772 // Note: Use MX as workspace in PCG iterations
773 double *workPCG = 0;
774 if (maxIterLinearSolve > 0) {
775 workPCG = tmpD - blockSize*xr;
776 tmpD = tmpD + 4*xr*blockSize + 6*blockSize + 4*blockSize*blockSize;
777 }
778
779 // theta = Store the local eigenvalues (size: dimSearch)
780 //
781 // normR = Store the norm of residuals (size: blockSize)
782 //
783 // KK = Local stiffness matrix (size: dimSearch x dimSearch)
784 //
785 // S = Local eigenvectors (size: dimSearch x dimSearch)
786 //
787 // QtMPMQ = Projected "preconditioner" (size: (numEigen-startingEV+blockSize) ^ 2)
788 //
789 // invQtMPMQ = Inverse of QtMPMQ (size: (numEigen-startingEV+blockSize) ^ 2)
790 //
791 // tmpArray = Temporary workspace (size: 2*(numEigen-startingEV+blockSize) x blockSize)
792
793 double *theta = tmpD;
794 tmpD = tmpD + dimSearch;
795
796 double *normR = tmpD;
797 tmpD = tmpD + blockSize;
798
799 double *KK = tmpD;
800 tmpD = tmpD + dimSearch*dimSearch;
801 memset(KK, 0, dimSearch*dimSearch*sizeof(double));
802
803 double *S = tmpD;
804 tmpD = tmpD + dimSearch*dimSearch;
805
806 double *QtMPMQ = tmpD;
807 tmpD = tmpD + (numEigen-startingEV+blockSize)*(numEigen-startingEV+blockSize);
808 int ldQtMPMQ = numEigen - startingEV + blockSize;
809
810 double *invQtMPMQ = tmpD;
811 tmpD = tmpD + (numEigen-startingEV+blockSize)*(numEigen-startingEV+blockSize);
812
813 double *tmpArray = tmpD;
814
815 // Define an array to store the residuals history
816 if (localVerbose > 2) {
817 resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
818 spaceSizeHistory = new (nothrow) int[maxIterEigenSolve];
819 iterPCGHistory = new (nothrow) int[maxIterEigenSolve];
820 if ((resHistory == 0) || (spaceSizeHistory == 0) || (iterPCGHistory == 0)) {
821 if (vectWeight)
822 delete vectWeight;
823 delete[] work;
824 info = -30;
825 return info;
826 }
827 historyCount = 0;
828 }
829
830 // Miscellaneous definitions
831 bool reStart = false;
832 bool criticalExit = false;
833
834 int i, j;
835 int nFound = blockSize;
836 int bStart = 0;
837 int offSet = 0;
838 numRestart = 0;
839
840 double tau = 0.0;
841 double eta = tau;
842 double sqrtTol = sqrt(tolEigenSolve);
843 double coefDecay = 0.5;
844 double tolPCG = 1.0;
845
846 if (localVerbose > 0) {
847 cout << endl;
848 cout << " *|* Problem: ";
849 if (M)
850 cout << "K*Q = M*Q D ";
851 else
852 cout << "K*Q = Q D ";
853 if (Prec)
854 cout << " with preconditioner";
855 cout << endl;
856 cout << " *|* Algorithm = Jacobi-Davidson algorithm with PCG (block version)\n";
857 cout << " *|* Size of blocks = " << blockSize << endl;
858 cout << " *|* Largest size of search space = " << numBlock*blockSize << endl;
859 cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
860 cout.precision(2);
861 cout.setf(ios::scientific, ios::floatfield);
862 cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
863 cout << " *|* Norm used for convergence: ";
864 if (vectWeight)
865 cout << "weighted L2-norm with user-provided weights" << endl;
866 else
867 cout << "L^2-norm" << endl;
868 if (startingEV > 0)
869 cout << " *|* Input converged eigenvectors = " << startingEV << endl;
870 cout << "\n -- Start iterations -- \n";
871 }
872
873 int maxBlock = (dimSearch/blockSize) - (knownEV/blockSize);
874
875 timeOuterLoop -= MyWatch.WallTime();
876 outerIter = 0;
877 while (outerIter <= maxIterEigenSolve) {
878
879 highMem = (highMem > currentSize()) ? highMem : currentSize();
880
881 Epetra_MultiVector PMX(View, PMQ, knownEV - startingEV, blockSize);
882
883 int nb;
884 for (nb = bStart; nb < maxBlock; ++nb) {
885
886 outerIter += 1;
887 if (outerIter > maxIterEigenSolve)
888 break;
889
890 int localSize = nb*blockSize;
891
892 Epetra_MultiVector Xcurrent(View, X, localSize + knownEV, blockSize);
893
894 timeMassOp -= MyWatch.WallTime();
895 if (M)
896 M->Apply(Xcurrent, MX);
897 timeMassOp += MyWatch.WallTime();
898 massOp += blockSize;
899
900 // Orthonormalize X against the known eigenvectors and the previous vectors
901 // Note: Use R as a temporary work space
902 timeOrtho -= MyWatch.WallTime();
903 if (nb == bStart) {
904 if (nFound > 0) {
905 if (knownEV == 0) {
906 info = modalTool.massOrthonormalize(Xcurrent, MX, M, Q, nFound, 2, R.Values());
907 }
908 else {
909 if (localSize == 0) {
910 Epetra_MultiVector copyQ(View, X, 0, knownEV);
911 info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, nFound, 0, R.Values());
912 }
913 else {
914 Epetra_MultiVector copyQ(View, X, knownEV, localSize);
915 info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, nFound, 0, R.Values());
916 } // if (localSize == 0)
917 } // if (knownEV == 0)
918 } // if (nFound > 0)
919 nFound = 0;
920 }
921 else {
922 Epetra_MultiVector copyQ(View, X, knownEV, localSize);
923 info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, blockSize, 0, R.Values());
924 }
925 timeOrtho += MyWatch.WallTime();
926
927 // Exit the code when the number of vectors exceeds the space dimension
928 if (info < 0) {
929 info = -10;
930 if (vectWeight)
931 delete vectWeight;
932 delete[] work;
933 if (workPCG)
934 delete[] workPCG;
935 return info;
936 }
937
938 timeStifOp -= MyWatch.WallTime();
939 K->Apply(Xcurrent, KX);
940 timeStifOp += MyWatch.WallTime();
941 stifOp += blockSize;
942
943 if (verbose > 3) {
944 if (knownEV + localSize == 0)
945 accuracyCheck(&Xcurrent, &MX, 0);
946 else {
947 Epetra_MultiVector copyQ(View, X, 0, knownEV + localSize);
948 accuracyCheck(&Xcurrent, &MX, &copyQ);
949 }
950 if (localVerbose > 0)
951 cout << endl;
952 } // if (verbose > 3)
953
954 // Define the local stiffness matrix
955 timeLocalProj -= MyWatch.WallTime();
956 for (j = 0; j <= nb; ++j) {
957 callBLAS.GEMM('T', 'N', blockSize, blockSize, xr,
958 1.0, X.Values()+(knownEV+j*blockSize)*xr, xr, KX.Values(), xr,
959 0.0, tmpArray + blockSize*blockSize, blockSize);
960 MyComm.SumAll(tmpArray + blockSize*blockSize, tmpArray, blockSize*blockSize);
961 int iC;
962 for (iC = 0; iC < blockSize; ++iC) {
963 double *Kpointer = KK + localSize*dimSearch + j*blockSize + iC*dimSearch;
964 memcpy(Kpointer, tmpArray + iC*blockSize, blockSize*sizeof(double));
965 } // for (iC = 0; iC < blockSize; ++iC)
966 } // for (j = 0; j <= nb; ++j)
967 timeLocalProj += MyWatch.WallTime();
968
969 // Perform a spectral decomposition
970 timeLocalSolve -= MyWatch.WallTime();
971 int nevLocal = localSize + blockSize;
972 info = modalTool.directSolver(localSize + blockSize, KK, dimSearch, 0, 0,
973 nevLocal, S, dimSearch, theta, localVerbose, 10);
974 timeLocalSolve += MyWatch.WallTime();
975
976 if ((info != 0) && (theta[0] < 0.0)) {
977 // Stop as spectral decomposition has a critical failure
978 if (info < 0) {
979 criticalExit = true;
980 break;
981 }
982 // Restart as spectral decomposition failed
983 if (localVerbose > 0) {
984 cout << " Iteration " << outerIter;
985 cout << "- Failure for spectral decomposition - RESTART with new random search\n";
986 }
987 reStart = true;
988 numRestart += 1;
989 timeRestart -= MyWatch.WallTime();
990 Epetra_MultiVector Xinit(View, X, knownEV, blockSize);
991 Xinit.Random();
992 timeRestart += MyWatch.WallTime();
993 nFound = blockSize;
994 bStart = 0;
995 break;
996 } // if (info != 0)
997
998 // Start the definition of the new block
999 // Note: Use KX as a workspace to store the updated directions
1000 timeLocalUpdate -= MyWatch.WallTime();
1001 callBLAS.GEMM('N', 'N', xr, blockSize, localSize+blockSize, 1.0, X.Values()+knownEV*xr, xr,
1002 S, dimSearch, 0.0, KX.Values(), xr);
1003 timeLocalUpdate += MyWatch.WallTime();
1004
1005 // Apply the mass matrix on the new block
1006 timeMassOp -= MyWatch.WallTime();
1007 if (M)
1008 M->Apply(KX, MX);
1009 timeMassOp += MyWatch.WallTime();
1010 massOp += blockSize;
1011
1012 // Apply the stiffness matrix on the new block
1013 timeStifOp -= MyWatch.WallTime();
1014 K->Apply(KX, R);
1015 timeStifOp += MyWatch.WallTime();
1016 stifOp += blockSize;
1017
1018 // Form the residuals
1019 timeResidual -= MyWatch.WallTime();
1020 for (j = 0; j < blockSize; ++j) {
1021 callBLAS.AXPY(xr, -theta[j], MX.Values()+j*xr, R.Values()+j*xr);
1022 }
1023 timeResidual += MyWatch.WallTime();
1024 residual += blockSize;
1025
1026 // Compute the norm of residuals
1027 timeNorm -= MyWatch.WallTime();
1028 if (vectWeight)
1029 R.NormWeighted(*vectWeight, normR);
1030 else
1031 R.Norm2(normR);
1032 // Scale the norms of residuals with the eigenvalues
1033 // Count the number of converged eigenvectors
1034 nFound = 0;
1035 for (j = 0; j < blockSize; ++j) {
1036 normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
1037 if (normR[j] < tolEigenSolve) {
1038 nFound += 1;
1039 }
1040 } // for (j = 0; j < blockSize; ++j)
1041 timeNorm += MyWatch.WallTime();
1042
1043 maxSpaceSize = (maxSpaceSize > localSize+blockSize) ? maxSpaceSize : localSize+blockSize;
1044 sumSpaceSize += localSize + blockSize;
1045
1046 // Print information on current iteration
1047 if (localVerbose > 0) {
1048 cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
1049 cout << knownEV + nFound << endl;
1050 }
1051
1052 if (localVerbose > 2) {
1053 memcpy(resHistory + blockSize*historyCount, normR, blockSize*sizeof(double));
1054 spaceSizeHistory[historyCount] = localSize + blockSize;
1055 }
1056
1057 if (localVerbose > 1) {
1058 cout << endl;
1059 cout.precision(2);
1060 cout.setf(ios::scientific, ios::floatfield);
1061 for (i=0; i<blockSize; ++i) {
1062 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
1063 cout << " = " << normR[i] << endl;
1064 }
1065 cout << endl;
1066 cout.precision(2);
1067 for (i=0; i<localSize; ++i) {
1068 cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
1069 cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
1070 cout << " = " << theta[i] << endl;
1071 }
1072 cout << endl;
1073 }
1074
1075 // Exit the loop when some vectors have converged
1076 if (nFound > 0) {
1077 tolPCG = 1.0;
1078 offSet = 0;
1079 nb += 1;
1080 if (localVerbose > 2) {
1081 iterPCGHistory[historyCount] = 0;
1082 historyCount += 1;
1083 }
1084 break;
1085 }
1086
1087 // Exit the loop when all positions are filled
1088 if ((maxBlock > 1) && (nb == maxBlock - 1)) {
1089 if (localVerbose > 2) {
1090 iterPCGHistory[historyCount] = 0;
1091 historyCount += 1;
1092 }
1093 continue;
1094 }
1095
1096 // Apply the preconditioner on the new direction
1097 if (Prec) {
1098 timePrecOp -= MyWatch.WallTime();
1099 Prec->ApplyInverse(MX, PMX);
1100 timePrecOp += MyWatch.WallTime();
1101 precOp += blockSize;
1102 } // if (Prec)
1103 else {
1104 memcpy(PMX.Values(), MX.Values(), xr*blockSize*sizeof(double));
1105 } // if (Prec)
1106
1107 // Update the upper triangular part of the matrix QtMPMQ
1108 // Note: Use tmpArray as a workspace
1109 timeBuildQtMPMQ -= MyWatch.WallTime();
1110 int qLength = knownEV - startingEV + blockSize;
1111 callBLAS.GEMM('T', 'N', qLength, blockSize, xr, 1.0, PMQ.Values(), xr, MX.Values(), xr,
1112 0.0, tmpArray + qLength*blockSize, qLength);
1113 MyComm.SumAll(tmpArray + qLength*blockSize, tmpArray, qLength*blockSize);
1114 for (j = 0; j < blockSize; ++j) {
1115 memcpy(QtMPMQ + (knownEV-startingEV+j)*ldQtMPMQ, tmpArray + j*qLength,
1116 qLength*sizeof(double));
1117 }
1118 timeBuildQtMPMQ += MyWatch.WallTime();
1119
1120 // Factor the matrix QtMPMQ
1121 for (j = 0; j < qLength; ++j)
1122 memcpy(invQtMPMQ + j*ldQtMPMQ, QtMPMQ + j*ldQtMPMQ, (j+1)*sizeof(double));
1123 callLAPACK.POTRF('U', qLength, invQtMPMQ, ldQtMPMQ, &info);
1124
1125 // Treat the error messages for Cholesky factorization
1126 if (info != 0) {
1127 // mfh 14 Jan 2011: INFO < 0 is definitely a logic error.
1128 TEUCHOS_TEST_FOR_EXCEPTION(info < 0, std::logic_error, "Argument number "
1129 << -info << " of LAPACK's DPOTRF routine had "
1130 "an illegal value.");
1131 // Restart as factorization failed
1132 if (localVerbose > 0) {
1133 cout << " Iteration " << outerIter;
1134 cout << " - Failure for local factorization (" << info << "/" << qLength << ")";
1135 cout << " - RESTART with new random search";
1136 cout << endl;
1137 }
1138 reStart = true;
1139 numRestart += 1;
1140 timeRestart -= MyWatch.WallTime();
1141 Epetra_MultiVector Xinit(View, X, knownEV, blockSize);
1142 Xinit.Random();
1143 timeRestart += MyWatch.WallTime();
1144 nFound = blockSize;
1145 bStart = 0;
1146 if (localVerbose > 2) {
1147 iterPCGHistory[historyCount] = 0;
1148 historyCount += 1;
1149 }
1150 break;
1151 }
1152
1153 // Correction equation
1154 // Note that the new working block U is stored in KX,
1155 // while MX contains M*U and PMX contains P^{-1}*M*U
1156 Epetra_MultiVector Xnext(View, X, (maxBlock == 1) ? knownEV
1157 : knownEV+localSize+blockSize, blockSize);
1158 if (normR[0] < sqrtTol)
1159 eta = theta[0];
1160 else
1161 eta = tau;
1162
1163 if (verbose > 3) {
1164 double maxDotQ = 0.0;
1165 if (knownEV > 0) {
1166 Epetra_MultiVector copyQ(View, X, 0, knownEV);
1167 maxDotQ = myVerify.errorOrthogonality(&copyQ, &R, 0);
1168 }
1169 double maxDotU = myVerify.errorOrthogonality(&KX, &R, 0);
1170 if (myPid == 0) {
1171 cout << " >> Orthogonality for the right hand side of the correction equation = ";
1172 double tmp = (maxDotU > maxDotQ) ? maxDotU : maxDotQ;
1173 cout.precision(4);
1174 cout.setf(ios::scientific, ios::floatfield);
1175 cout << tmp << endl << endl;
1176 }
1177 }
1178
1179 timeCorrectionSolve -= MyWatch.WallTime();
1180 if (startingEV > 0) {
1181 Epetra_MultiVector startQ(View, X, 0, startingEV);
1182 if (knownEV > startingEV) {
1183 Epetra_MultiVector copyQ(View, X, startingEV, knownEV - startingEV);
1184 info = jacobiPCG(R, Xnext, &KX, &copyQ, eta, tolPCG, maxIterLinearSolve, invQtMPMQ,
1185 ldQtMPMQ, PMQ.Values(), tmpArray, workPCG, vectWeight, &startQ);
1186 }
1187 else {
1188 info = jacobiPCG(R, Xnext, &KX, 0, eta, tolPCG, maxIterLinearSolve, invQtMPMQ,
1189 ldQtMPMQ, PMQ.Values(), tmpArray, workPCG, vectWeight, &startQ);
1190 }
1191 }
1192 else {
1193 if (knownEV > 0) {
1194 Epetra_MultiVector copyQ(View, X, 0, knownEV);
1195 info = jacobiPCG(R, Xnext, &KX, &copyQ, eta, tolPCG, maxIterLinearSolve, invQtMPMQ,
1196 ldQtMPMQ, PMQ.Values(), tmpArray, workPCG, vectWeight);
1197 }
1198 else {
1199 info = jacobiPCG(R, Xnext, &KX, 0, eta, tolPCG, maxIterLinearSolve, invQtMPMQ,
1200 ldQtMPMQ, PMQ.Values(), tmpArray, workPCG, vectWeight);
1201 }
1202 }
1203 timeCorrectionSolve += MyWatch.WallTime();
1204
1205 if (verbose > 3) {
1206 double maxDotQ = 0.0;
1207 if (knownEV > 0) {
1208 Epetra_MultiVector copyQ(View, X, 0, knownEV);
1209 maxDotQ = myVerify.errorOrthogonality(&copyQ, &Xnext, M);
1210 }
1211 double maxDotU = myVerify.errorOrthogonality(&KX, &Xnext, M);
1212 if (myPid == 0) {
1213 double tmp = (maxDotU > maxDotQ) ? maxDotU : maxDotQ;
1214 cout << " >> Orthogonality for the solution of the correction equation = ";
1215 cout.precision(4);
1216 cout.setf(ios::scientific, ios::floatfield);
1217 cout << tmp << endl << endl;
1218 }
1219 }
1220
1221 numCorrectionSolve += blockSize;
1222 if (info < 0) {
1223 if ((info == -1) || (info == -maxIterLinearSolve-1)) {
1224 if (localVerbose > 0) {
1225 cout << " Iteration " << outerIter;
1226 cout << " - Failure inside PCG";
1227 cout << " - RESTART with new random search";
1228 cout << endl;
1229 }
1230 if (localVerbose > 2) {
1231 iterPCGHistory[historyCount] = -1;
1232 historyCount += 1;
1233 }
1234 reStart = true;
1235 numRestart += 1;
1236 timeRestart -= MyWatch.WallTime();
1237 Epetra_MultiVector Xinit(View, X, knownEV, blockSize);
1238 Xinit.Random();
1239 timeRestart += MyWatch.WallTime();
1240 nFound = blockSize;
1241 bStart = 0;
1242 info = 0;
1243 break;
1244 } // if ((info == -1) || (info == -iterMax-1))
1245 else {
1246 if (localVerbose > 2) {
1247 iterPCGHistory[historyCount] = (info < 0) ? -info : info;
1248 historyCount += 1;
1249 }
1250 } // if ((info == -1) || (info == -iterMax-1))
1251 } // if (info < 0)
1252 else {
1253 if (localVerbose > 2) {
1254 iterPCGHistory[historyCount] = info;
1255 historyCount += 1;
1256 }
1257 } // if (info < 0)
1258 info = 0;
1259
1260 tolPCG *= coefDecay;
1261
1262 if (maxBlock == 1) {
1263 Xcurrent.Update(1.0, KX, -1.0);
1264 }
1265
1266 } // for (nb = bstart; nb < maxBlock; ++nb)
1267
1268 if (outerIter > maxIterEigenSolve)
1269 break;
1270
1271 if (reStart == true) {
1272 reStart = false;
1273 continue;
1274 }
1275
1276 if (criticalExit == true)
1277 break;
1278
1279 // Store the final converged eigenvectors
1280 if (knownEV + nFound >= numEigen) {
1281 for (j = 0; j < blockSize; ++j) {
1282 if (normR[j] < tolEigenSolve) {
1283 memcpy(X.Values() + knownEV*xr, KX.Values() + j*xr, xr*sizeof(double));
1284 lambda[knownEV] = theta[j];
1285 knownEV += 1;
1286 }
1287 }
1288 if (localVerbose == 1) {
1289 cout << endl;
1290 cout.precision(2);
1291 cout.setf(ios::scientific, ios::floatfield);
1292 for (i=0; i<blockSize; ++i) {
1293 cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
1294 cout << " = " << normR[i] << endl;
1295 }
1296 cout << endl;
1297 }
1298 break;
1299 } // if (knownEV + nFound >= numEigen)
1300
1301 // Treat the particular case of one block
1302 if ((maxBlock == 1) && (nFound == 0)) {
1303 nFound = blockSize;
1304 continue;
1305 }
1306
1307 // Define the restarting space when there is only one block
1308 if (maxBlock == 1) {
1309 timeRestart -= MyWatch.WallTime();
1310 double *Xpointer = X.Values() + (knownEV+nFound)*xr;
1311 nFound = 0;
1312 for (j = 0; j < blockSize; ++j) {
1313 if (normR[j] < tolEigenSolve) {
1314 memcpy(X.Values() + knownEV*xr, KX.Values() + j*xr, xr*sizeof(double));
1315 lambda[knownEV] = theta[j];
1316 knownEV += 1;
1317 nFound += 1;
1318 }
1319 else {
1320 memcpy(Xpointer + (j-nFound)*xr, KX.Values() + j*xr, xr*sizeof(double));
1321 }
1322 }
1323 knownEV -= nFound;
1324 Epetra_MultiVector Xnext(View, X, knownEV + blockSize, nFound);
1325 Xnext.Random();
1326 timeRestart += MyWatch.WallTime();
1327 }
1328
1329 // Define the restarting block when there is more than one block
1330 int oldCol, newCol;
1331 if (maxBlock > 1) {
1332 timeRestart -= MyWatch.WallTime();
1333 int firstIndex = blockSize;
1334 for (j = 0; j < blockSize; ++j) {
1335 if (normR[j] >= tolEigenSolve) {
1336 firstIndex = j;
1337 break;
1338 }
1339 } // for (j = 0; j < blockSize; ++j)
1340 while (firstIndex < nFound) {
1341 for (j = firstIndex; j < blockSize; ++j) {
1342 if (normR[j] < tolEigenSolve) {
1343 // Swap the j-th and firstIndex-th position
1344 callFortran.SWAP(nb*blockSize, S + j*dimSearch, 1, S + firstIndex*dimSearch, 1);
1345 callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
1346 callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
1347 break;
1348 }
1349 } // for (j = firstIndex; j < blockSize; ++j)
1350 for (j = 0; j < blockSize; ++j) {
1351 if (normR[j] >= tolEigenSolve) {
1352 firstIndex = j;
1353 break;
1354 }
1355 } // for (j = 0; j < blockSize; ++j)
1356 } // while (firstIndex < nFound)
1357
1358 // Copy the converged eigenvalues
1359 memcpy(lambda + knownEV, theta, nFound*sizeof(double));
1360
1361 // Define the restarting size
1362 bStart = ((nb - offSet) > 2) ? (nb - offSet)/2 : 0;
1363
1364 // Define the restarting space and local stiffness
1365 memset(KK, 0, nb*blockSize*dimSearch*sizeof(double));
1366 for (j = 0; j < bStart*blockSize; ++j) {
1367 KK[j + j*dimSearch] = theta[j + nFound];
1368 }
1369 // Form the restarting space
1370 oldCol = nb*blockSize;
1371 newCol = nFound + (bStart+1)*blockSize;
1372 newCol = (newCol > oldCol) ? oldCol : newCol;
1373 callFortran.GEQRF(oldCol, newCol, S, dimSearch, theta, R.Values(), xr*blockSize, &info);
1374 callFortran.ORMQR('R', 'N', xr, oldCol, newCol, S, dimSearch, theta,
1375 X.Values()+knownEV*xr, xr, R.Values(), xr*blockSize, &info);
1376 // Put random vectors if the Rayleigh Ritz vectors are not enough
1377 newCol = nFound + (bStart+1)*blockSize;
1378 if (newCol > oldCol) {
1379 Epetra_MultiVector Xnext(View, X, knownEV+blockSize-nFound, nFound);
1380 Xnext.Random();
1381 }
1382 timeRestart += MyWatch.WallTime();
1383 } // if (maxBlock > 1)
1384
1385 if (nFound > 0) {
1386 // Update MQ
1387 Epetra_MultiVector Qconv(View, X, knownEV, nFound);
1388 Epetra_MultiVector MQconv(View, MX, 0, nFound);
1389 timeMassOp -= MyWatch.WallTime();
1390 if (M)
1391 M->Apply(Qconv, MQconv);
1392 timeMassOp += MyWatch.WallTime();
1393 massOp += nFound;
1394 // Update PMQ
1395 Epetra_MultiVector PMQconv(View, PMQ, knownEV-startingEV, nFound);
1396 if (Prec) {
1397 timePrecOp -= MyWatch.WallTime();
1398 Prec->ApplyInverse(MQconv, PMQconv);
1399 timePrecOp += MyWatch.WallTime();
1400 precOp += nFound;
1401 }
1402 else {
1403 memcpy(PMQconv.Values(), MQconv.Values(), xr*nFound*sizeof(double));
1404 }
1405 // Update QtMPMQ
1406 // Note: Use tmpArray as workspace
1407 int newEV = knownEV - startingEV;
1408 timeBuildQtMPMQ -= MyWatch.WallTime();
1409 callBLAS.GEMM('T', 'N', newEV + nFound, nFound, xr, 1.0, PMQ.Values(), xr,
1410 MX.Values(), xr, 0.0, QtMPMQ + newEV*ldQtMPMQ, newEV + nFound);
1411 MyComm.SumAll(QtMPMQ + newEV*ldQtMPMQ, tmpArray, (newEV + nFound)*nFound);
1412 for (j = 0; j < nFound; ++j) {
1413 memcpy(QtMPMQ + (newEV+j)*ldQtMPMQ, tmpArray + j*(newEV+nFound),
1414 (newEV+nFound)*sizeof(double));
1415 }
1416 timeBuildQtMPMQ += MyWatch.WallTime();
1417 } // if (nFound > 0)
1418 else {
1419 offSet += 1;
1420 } // if (nFound > 0)
1421
1422 knownEV += nFound;
1423 maxBlock = (dimSearch/blockSize) - (knownEV/blockSize);
1424
1425 // The value of nFound commands how many vectors will be orthogonalized.
1426 if ((maxBlock > 1) && (newCol <= oldCol))
1427 nFound = 0;
1428
1429 } // while (outerIter <= maxIterEigenSolve)
1430 timeOuterLoop += MyWatch.WallTime();
1431 highMem = (highMem > currentSize()) ? highMem : currentSize();
1432
1433 // Clean memory
1434 delete[] work;
1435 if (vectWeight)
1436 delete vectWeight;
1437
1438 // Sort the eigenpairs
1439 timePostProce -= MyWatch.WallTime();
1440 if ((info == 0) && (knownEV > 0)) {
1441 mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
1442 }
1443 timePostProce += MyWatch.WallTime();
1444
1445 return (info == 0) ? knownEV : info;
1446
1447}
1448
1449
1450void JDPCG::accuracyCheck(const Epetra_MultiVector *X, const Epetra_MultiVector *MX,
1451 const Epetra_MultiVector *Q) const {
1452
1453 cout.precision(2);
1454 cout.setf(ios::scientific, ios::floatfield);
1455 double tmp;
1456
1457 int myPid = MyComm.MyPID();
1458
1459 if (X) {
1460 if (M) {
1461 if (MX) {
1462 tmp = myVerify.errorEquality(X, MX, M);
1463 if (myPid == 0)
1464 cout << " >> Difference between MX and M*X = " << tmp << endl;
1465 }
1466 tmp = myVerify.errorOrthonormality(X, M);
1467 if (myPid == 0)
1468 cout << " >> Error in X^T M X - I = " << tmp << endl;
1469 }
1470 else {
1471 tmp = myVerify.errorOrthonormality(X, 0);
1472 if (myPid == 0)
1473 cout << " >> Error in X^T X - I = " << tmp << endl;
1474 }
1475 }
1476
1477 if (Q == 0)
1478 return;
1479
1480 if (M) {
1481 tmp = myVerify.errorOrthonormality(Q, M);
1482 if (myPid == 0)
1483 cout << " >> Error in Q^T M Q - I = " << tmp << endl;
1484 if (X) {
1485 tmp = myVerify.errorOrthogonality(Q, X, M);
1486 if (myPid == 0)
1487 cout << " >> Orthogonality Q^T M X up to " << tmp << endl;
1488 }
1489 }
1490 else {
1491 tmp = myVerify.errorOrthonormality(Q, 0);
1492 if (myPid == 0)
1493 cout << " >> Error in Q^T Q - I = " << tmp << endl;
1494 if (X) {
1495 tmp = myVerify.errorOrthogonality(Q, X, 0);
1496 if (myPid == 0)
1497 cout << " >> Orthogonality Q^T X up to " << tmp << endl;
1498 }
1499 }
1500
1501}
1502
1503
1504int JDPCG::minimumSpaceDimension(int nev) const {
1505
1506 int myPid = MyComm.MyPID();
1507
1508 if ((myPid == 0) && (numBlock*blockSize < nev)) {
1509 cerr << endl;
1510 cerr << " !!! The space dimension (# of blocks x size of blocks) must be greater than ";
1511 cerr << " the number of eigenvalues !!!\n";
1512 cerr << " Number of blocks = " << numBlock << endl;
1513 cerr << " Size of blocks = " << blockSize << endl;
1514 cerr << " Number of eigenvalues = " << nev << endl;
1515 cerr << endl;
1516 return -1;
1517 }
1518
1519 return nev + blockSize;
1520
1521}
1522
1523
1524void JDPCG::initializeCounters() {
1525
1526 historyCount = 0;
1527 if (resHistory) {
1528 delete[] resHistory;
1529 resHistory = 0;
1530 }
1531
1532 maxSpaceSize = 0;
1533 sumSpaceSize = 0;
1534 if (spaceSizeHistory) {
1535 delete[] spaceSizeHistory;
1536 spaceSizeHistory = 0;
1537 }
1538
1539 maxIterPCG = 0;
1540 sumIterPCG = 0;
1541 if (iterPCGHistory) {
1542 delete[] iterPCGHistory;
1543 iterPCGHistory = 0;
1544 }
1545
1546 memRequested = 0.0;
1547 highMem = 0.0;
1548
1549 massOp = 0;
1550 numCorrectionPrec = 0;
1551 numCorrectionSolve = 0;
1552 numPCGmassOp = 0;
1553 numPCGstifOp = 0;
1554 numRestart = 0;
1555 outerIter = 0;
1556 precOp = 0;
1557 residual = 0;
1558 stifOp = 0;
1559
1560 timeBuildQtMPMQ = 0.0;
1561 timeCorrectionPrec = 0.0;
1562 timeCorrectionSolve = 0.0;
1563 timeLocalProj = 0.0;
1564 timeLocalSolve = 0.0;
1565 timeLocalUpdate = 0.0;
1566 timeMassOp = 0.0;
1567 timeNorm = 0.0;
1568 timeOrtho = 0.0;
1569 timeOuterLoop = 0.0;
1570 timePCGEigCheck = 0.0;
1571 timePCGLoop = 0.0;
1572 timePCGOpMult = 0.0;
1573 timePCGPrec = 0.0;
1574 timePostProce = 0.0;
1575 timePrecOp = 0.0;
1576 timeResidual = 0.0;
1577 timeRestart = 0.0;
1578 timeStifOp = 0.0;
1579
1580}
1581
1582
1583void JDPCG::algorithmInfo() const {
1584
1585 int myPid = MyComm.MyPID();
1586
1587 if (myPid ==0) {
1588 cout << " Algorithm: Jacobi-Davidson algorithm with PCG (block version)\n";
1589 cout << " Block Size: " << blockSize << endl;
1590 cout << " Number of Blocks kept: " << numBlock << endl;
1591 }
1592
1593}
1594
1595
1596void JDPCG::historyInfo() const {
1597
1598 if (resHistory) {
1599 int j;
1600 cout << " Block Size: " << blockSize << endl;
1601 cout << endl;
1602 cout << " Residuals Search Space Dim. Inner Iter. \n";
1603 cout << endl;
1604 cout.precision(4);
1605 cout.setf(ios::scientific, ios::floatfield);
1606 for (j = 0; j < historyCount; ++j) {
1607 int ii;
1608 for (ii = 0; ii < blockSize; ++ii) {
1609 cout << resHistory[blockSize*j + ii] << " ";
1610 cout.width(4);
1611 cout << spaceSizeHistory[j] << " ";
1612 cout.width(4);
1613 cout << iterPCGHistory[j] << endl;
1614 }
1615 }
1616 cout << endl;
1617 }
1618
1619}
1620
1621
1622void JDPCG::memoryInfo() const {
1623
1624 int myPid = MyComm.MyPID();
1625
1626 double maxHighMem = 0.0;
1627 double tmp = highMem;
1628 MyComm.MaxAll(&tmp, &maxHighMem, 1);
1629
1630 double maxMemRequested = 0.0;
1631 tmp = memRequested;
1632 MyComm.MaxAll(&tmp, &maxMemRequested, 1);
1633
1634 if (myPid == 0) {
1635 cout.precision(2);
1636 cout.setf(ios::fixed, ios::floatfield);
1637 cout << " Memory requested per processor by the eigensolver = (EST) ";
1638 cout.width(6);
1639 cout << maxMemRequested << " MB " << endl;
1640 cout << endl;
1641 cout << " High water mark in eigensolver = (EST) ";
1642 cout.width(6);
1643 cout << maxHighMem << " MB " << endl;
1644 cout << endl;
1645 }
1646
1647}
1648
1649
1650void JDPCG::operationInfo() const {
1651
1652 int myPid = MyComm.MyPID();
1653
1654 if (myPid == 0) {
1655 cout << " --- Operations ---\n\n";
1656 cout << " Total number of mass matrix multiplications = ";
1657 cout.width(9);
1658 int tmp = massOp + modalTool.getNumProj_MassMult() + modalTool.getNumNorm_MassMult();
1659 tmp += numPCGmassOp;
1660 cout << tmp << endl;
1661 cout << " Mass multiplications in solver loop = ";
1662 cout.width(9);
1663 cout << massOp << endl;
1664 cout << " Mass multiplications for orthogonalization = ";
1665 cout.width(9);
1666 cout << modalTool.getNumProj_MassMult() << endl;
1667 cout << " Mass multiplications for normalization = ";
1668 cout.width(9);
1669 cout << modalTool.getNumNorm_MassMult() << endl;
1670 cout << " Mass multiplications in PCG loop = ";
1671 cout.width(9);
1672 cout << numPCGmassOp << endl;
1673 cout << endl;
1674 cout << " Total number of stiffness matrix operations = ";
1675 cout.width(9);
1676 cout << stifOp + numPCGstifOp << endl;
1677 cout << " Stiffness multiplications in solver loop = ";
1678 cout.width(9);
1679 cout << stifOp << endl;
1680 cout << " Stiffness multiplications in PCG loop = ";
1681 cout.width(9);
1682 cout << numPCGstifOp << endl;
1683 cout << endl;
1684 cout << " Total number of preconditioner applications = ";
1685 cout.width(9);
1686 cout << precOp << endl;
1687 cout << endl;
1688 cout << " Total number of PCG solve = ";
1689 cout.width(9);
1690 cout << numCorrectionSolve << endl;
1691 cout << " Number of projected precond. appl. : ";
1692 cout.width(9);
1693 cout << numCorrectionPrec << endl;
1694 cout.setf(ios::fixed, ios::floatfield);
1695 cout.precision(2);
1696 cout.width(9);
1697 cout << " Average number of iter. per solve : ";
1698 cout.width(9);
1699 cout << ((double) sumIterPCG)*blockSize/((double) numCorrectionSolve) << endl;
1700 cout << " Maximum number of iter. per solve : ";
1701 cout.width(9);
1702 cout << maxIterPCG << endl;
1703 cout << endl;
1704 cout << " Total number of computed eigen-residuals = ";
1705 cout.width(9);
1706 cout << residual << endl;
1707 cout << endl;
1708 cout << " Total number of outer iterations = ";
1709 cout.width(9);
1710 cout << outerIter << endl;
1711 cout << " Number of restarts = ";
1712 cout.width(9);
1713 cout << numRestart << endl;
1714 cout << endl;
1715 cout << " Maximum size of search space = ";
1716 cout.width(9);
1717 cout << maxSpaceSize << endl;
1718 cout << " Average size of search space = ";
1719 cout.setf(ios::fixed, ios::floatfield);
1720 cout.precision(2);
1721 cout.width(9);
1722 cout << ((double) sumSpaceSize)/((double) outerIter) << endl;
1723 cout << endl;
1724 }
1725
1726}
1727
1728
1729void JDPCG::timeInfo() const {
1730
1731 int myPid = MyComm.MyPID();
1732
1733 if (myPid == 0) {
1734 cout << " --- Timings ---\n\n";
1735 cout.setf(ios::fixed, ios::floatfield);
1736 cout.precision(2);
1737 cout << " Total time for outer loop = ";
1738 cout.width(9);
1739 cout << timeOuterLoop << " s" << endl;
1740 cout << " Time for mass matrix operations = ";
1741 cout.width(9);
1742 cout << timeMassOp << " s ";
1743 cout.width(5);
1744 cout << 100*timeMassOp/timeOuterLoop << " %\n";
1745 cout << " Time for stiffness matrix operations = ";
1746 cout.width(9);
1747 cout << timeStifOp << " s ";
1748 cout.width(5);
1749 cout << 100*timeStifOp/timeOuterLoop << " %\n";
1750 cout << " Time for preconditioner applications = ";
1751 cout.width(9);
1752 cout << timePrecOp << " s ";
1753 cout.width(5);
1754 cout << 100*timePrecOp/timeOuterLoop << " %\n";
1755 cout << " Time for orthogonalizations = ";
1756 cout.width(9);
1757 cout << timeOrtho << " s ";
1758 cout.width(5);
1759 cout << 100*timeOrtho/timeOuterLoop << " %\n";
1760 cout << " Projection step : ";
1761 cout.width(9);
1762 cout << modalTool.getTimeProj() << " s\n";
1763 cout << " Normalization step : ";
1764 cout.width(9);
1765 cout << modalTool.getTimeNorm() << " s\n";
1766 cout << " Time for local projection = ";
1767 cout.width(9);
1768 cout << timeLocalProj << " s ";
1769 cout.width(5);
1770 cout << 100*timeLocalProj/timeOuterLoop << " %\n";
1771 cout << " Time for local eigensolve = ";
1772 cout.width(9);
1773 cout << timeLocalSolve << " s ";
1774 cout.width(5);
1775 cout << 100*timeLocalSolve/timeOuterLoop << " %\n";
1776 cout << " Time for local update = ";
1777 cout.width(9);
1778 cout << timeLocalUpdate << " s ";
1779 cout.width(5);
1780 cout << 100*timeLocalUpdate/timeOuterLoop << " %\n";
1781 cout << " Time for building the matrix QtMP^{-1}MQ = ";
1782 cout.width(9);
1783 cout << timeBuildQtMPMQ << " s ";
1784 cout.width(5);
1785 cout << 100*timeBuildQtMPMQ/timeOuterLoop << " %\n";
1786 cout << " Time for solving the correction equation = ";
1787 cout.width(9);
1788 cout << timeCorrectionSolve << " s ";
1789 cout.width(5);
1790 cout << 100*timeCorrectionSolve/timeOuterLoop << " %\n";
1791 cout << " Mult. Preconditioner : ";
1792 cout.width(9);
1793 cout << timeCorrectionPrec << " s" << endl;
1794 cout << " Correction of Prec. : ";
1795 cout.width(9);
1796 cout << (timePCGPrec - timeCorrectionPrec) << " s" << endl;
1797 cout << " Shifted Matrix Mult. : ";
1798 cout.width(9);
1799 cout << timePCGOpMult << " s" << endl;
1800 cout << " Eigen-residuals checks : ";
1801 cout.width(9);
1802 cout << timePCGEigCheck << " s" << endl;
1803 cout << " Time for restarting space definition = ";
1804 cout.width(9);
1805 cout << timeRestart << " s ";
1806 cout.width(5);
1807 cout << 100*timeRestart/timeOuterLoop << " %\n";
1808 cout << " Time for residual computations = ";
1809 cout.width(9);
1810 cout << timeResidual << " s ";
1811 cout.width(5);
1812 cout << 100*timeResidual/timeOuterLoop << " %\n";
1813 cout << "\n";
1814 cout << " Total time for post-processing = ";
1815 cout.width(9);
1816 cout << timePostProce << " s\n";
1817 cout << endl;
1818 } // if (myPid == 0)
1819
1820}
1821
1822