Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Relaxation_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_RELAXATION_DEF_HPP
11#define IFPACK2_RELAXATION_DEF_HPP
12
13#include "Teuchos_StandardParameterEntryValidators.hpp"
14#include "Teuchos_TimeMonitor.hpp"
15#include "Tpetra_CrsMatrix.hpp"
16#include "Tpetra_BlockCrsMatrix.hpp"
17#include "Tpetra_BlockView.hpp"
18#include "Ifpack2_Utilities.hpp"
19#include "Ifpack2_Details_getCrsMatrix.hpp"
20#include "MatrixMarket_Tpetra.hpp"
21#include "Tpetra_Details_residual.hpp"
22#include <cstdlib>
23#include <memory>
24#include <sstream>
25#include "KokkosSparse_gauss_seidel.hpp"
26
27namespace {
28// Validate that a given int is nonnegative.
29class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
30 public:
31 // Constructor (does nothing).
32 NonnegativeIntValidator() {}
33
34 // ParameterEntryValidator wants this method.
35 Teuchos::ParameterEntryValidator::ValidStringsList validStringValues() const {
36 return Teuchos::null;
37 }
38
39 // Actually validate the parameter's value.
40 void
41 validate(const Teuchos::ParameterEntry& entry,
42 const std::string& paramName,
43 const std::string& sublistName) const {
44 using std::endl;
45 Teuchos::any anyVal = entry.getAny(true);
46 const std::string entryName = entry.getAny(false).typeName();
47
48 TEUCHOS_TEST_FOR_EXCEPTION(
49 anyVal.type() != typeid(int),
50 Teuchos::Exceptions::InvalidParameterType,
51 "Parameter \"" << paramName << "\" in sublist \"" << sublistName
52 << "\" has the wrong type." << endl
53 << "Parameter: " << paramName
54 << endl
55 << "Type specified: " << entryName << endl
56 << "Type required: int" << endl);
57
58 const int val = Teuchos::any_cast<int>(anyVal);
59 TEUCHOS_TEST_FOR_EXCEPTION(
60 val < 0, Teuchos::Exceptions::InvalidParameterValue,
61 "Parameter \"" << paramName << "\" in sublist \"" << sublistName
62 << "\" is negative." << endl
63 << "Parameter: " << paramName
64 << endl
65 << "Value specified: " << val << endl
66 << "Required range: [0, INT_MAX]" << endl);
67 }
68
69 // ParameterEntryValidator wants this method.
70 const std::string getXMLTypeName() const {
71 return "NonnegativeIntValidator";
72 }
73
74 // ParameterEntryValidator wants this method.
75 void
76 printDoc(const std::string& docString,
77 std::ostream& out) const {
78 Teuchos::StrUtils::printLines(out, "# ", docString);
79 out << "#\tValidator Used: " << std::endl;
80 out << "#\t\tNonnegativeIntValidator" << std::endl;
81 }
82};
83
84// A way to get a small positive number (eps() for floating-point
85// types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
86// define it (for example, for integer values).
87template <class Scalar, const bool isOrdinal = Teuchos::ScalarTraits<Scalar>::isOrdinal>
88class SmallTraits {
89 public:
90 // Return eps if Scalar is a floating-point type, else return 1.
91 static const Scalar eps();
92};
93
94// Partial specialization for when Scalar is not a floating-point type.
95template <class Scalar>
96class SmallTraits<Scalar, true> {
97 public:
98 static const Scalar eps() {
99 return Teuchos::ScalarTraits<Scalar>::one();
100 }
101};
102
103// Partial specialization for when Scalar is a floating-point type.
104template <class Scalar>
105class SmallTraits<Scalar, false> {
106 public:
107 static const Scalar eps() {
108 return Teuchos::ScalarTraits<Scalar>::eps();
109 }
110};
111
112// Work-around for GitHub Issue #5269.
113template <class Scalar,
114 const bool isComplex = Teuchos::ScalarTraits<Scalar>::isComplex>
115struct RealTraits {};
116
117template <class Scalar>
118struct RealTraits<Scalar, false> {
119 using val_type = Scalar;
120 using mag_type = Scalar;
121 static KOKKOS_INLINE_FUNCTION mag_type real(const val_type& z) {
122 return z;
123 }
124};
125
126template <class Scalar>
127struct RealTraits<Scalar, true> {
128 using val_type = Scalar;
129 using mag_type = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
130 static KOKKOS_INLINE_FUNCTION mag_type real(const val_type& z) {
131 return KokkosKernels::ArithTraits<val_type>::real(z);
132 }
133};
134
135template <class Scalar>
136KOKKOS_INLINE_FUNCTION typename RealTraits<Scalar>::mag_type
137getRealValue(const Scalar& z) {
138 return RealTraits<Scalar>::real(z);
139}
140
141} // namespace
142
143namespace Ifpack2 {
144
145template <class MatrixType>
146void Relaxation<MatrixType>::
147 updateCachedMultiVector(const Teuchos::RCP<const Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>>& map,
148 size_t numVecs) const {
149 // Allocate a multivector if the cached one isn't perfect. Checking
150 // for map pointer equality is much cheaper than Map::isSameAs.
151 if (cachedMV_.is_null() ||
152 map.get() != cachedMV_->getMap().get() ||
153 cachedMV_->getNumVectors() != numVecs) {
154 using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
155 global_ordinal_type, node_type>;
156 cachedMV_ = Teuchos::rcp(new MV(map, numVecs, false));
157 }
158}
159
160template <class MatrixType>
162 setMatrix(const Teuchos::RCP<const row_matrix_type>& A) {
163 if (A.getRawPtr() != A_.getRawPtr()) { // it's a different matrix
164 Importer_ = Teuchos::null;
165 pointImporter_ = Teuchos::null;
166 Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
167 isInitialized_ = false;
168 IsComputed_ = false;
169 diagOffsets_ = Kokkos::View<size_t*, device_type>();
170 savedDiagOffsets_ = false;
171 hasBlockCrsMatrix_ = false;
172 serialGaussSeidel_ = Teuchos::null;
173 if (!A.is_null()) {
174 IsParallel_ = (A->getRowMap()->getComm()->getSize() > 1);
175 }
176 A_ = A;
177 }
178}
179
180template <class MatrixType>
182 Relaxation(const Teuchos::RCP<const row_matrix_type>& A)
183 : A_(A)
184 , IsParallel_((A.is_null() || A->getRowMap().is_null() || A->getRowMap()->getComm().is_null()) ? false : // a reasonable default if there's no communicator
185 A->getRowMap()->getComm()->getSize() > 1) {
186 this->setObjectLabel("Ifpack2::Relaxation");
187}
188
189template <class MatrixType>
190Teuchos::RCP<const Teuchos::ParameterList>
192 using Teuchos::Array;
193 using Teuchos::ParameterList;
194 using Teuchos::parameterList;
195 using Teuchos::RCP;
196 using Teuchos::rcp;
197 using Teuchos::rcp_const_cast;
198 using Teuchos::rcp_implicit_cast;
199 using Teuchos::setStringToIntegralParameter;
200 typedef Teuchos::ParameterEntryValidator PEV;
201
202 if (validParams_.is_null()) {
203 RCP<ParameterList> pl = parameterList("Ifpack2::Relaxation");
204
205 // Set a validator that automatically converts from the valid
206 // string options to their enum values.
207 Array<std::string> precTypes(8);
208 precTypes[0] = "Jacobi";
209 precTypes[1] = "Gauss-Seidel";
210 precTypes[2] = "Symmetric Gauss-Seidel";
211 precTypes[3] = "MT Gauss-Seidel";
212 precTypes[4] = "MT Symmetric Gauss-Seidel";
213 precTypes[5] = "Richardson";
214 precTypes[6] = "Two-stage Gauss-Seidel";
215 precTypes[7] = "Two-stage Symmetric Gauss-Seidel";
216 Array<Details::RelaxationType> precTypeEnums(8);
217 precTypeEnums[0] = Details::JACOBI;
218 precTypeEnums[1] = Details::GS;
219 precTypeEnums[2] = Details::SGS;
220 precTypeEnums[3] = Details::MTGS;
221 precTypeEnums[4] = Details::MTSGS;
222 precTypeEnums[5] = Details::RICHARDSON;
223 precTypeEnums[6] = Details::GS2;
224 precTypeEnums[7] = Details::SGS2;
225 const std::string defaultPrecType("Jacobi");
226 setStringToIntegralParameter<Details::RelaxationType>("relaxation: type",
227 defaultPrecType, "Relaxation method", precTypes(), precTypeEnums(),
228 pl.getRawPtr());
229
230 const int numSweeps = 1;
231 RCP<PEV> numSweepsValidator =
232 rcp_implicit_cast<PEV>(rcp(new NonnegativeIntValidator));
233 pl->set("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
234 rcp_const_cast<const PEV>(numSweepsValidator));
235
236 // number of 'local' outer sweeps for two-stage GS
237 const int numOuterSweeps = 1;
238 RCP<PEV> numOuterSweepsValidator =
239 rcp_implicit_cast<PEV>(rcp(new NonnegativeIntValidator));
240 pl->set("relaxation: outer sweeps", numOuterSweeps, "Number of outer local relaxation sweeps for two-stage GS",
241 rcp_const_cast<const PEV>(numOuterSweepsValidator));
242 // number of 'local' inner sweeps for two-stage GS
243 const int numInnerSweeps = 1;
244 RCP<PEV> numInnerSweepsValidator =
245 rcp_implicit_cast<PEV>(rcp(new NonnegativeIntValidator));
246 pl->set("relaxation: inner sweeps", numInnerSweeps, "Number of inner local relaxation sweeps for two-stage GS",
247 rcp_const_cast<const PEV>(numInnerSweepsValidator));
248 // specify damping factor for the inner sweeps of two-stage GS
249 const scalar_type innerDampingFactor = STS::one();
250 pl->set("relaxation: inner damping factor", innerDampingFactor, "Damping factor for the inner sweep of two-stage GS");
251 // specify whether to use sptrsv instead of inner-iterations for two-stage GS
252 const bool innerSpTrsv = false;
253 pl->set("relaxation: inner sparse-triangular solve", innerSpTrsv, "Specify whether to use sptrsv instead of JR iterations for two-stage GS");
254 // specify whether to use compact form of recurrence for two-stage GS
255 const bool compactForm = false;
256 pl->set("relaxation: compact form", compactForm, "Specify whether to use compact form of recurrence for two-stage GS");
257
258 const scalar_type dampingFactor = STS::one();
259 pl->set("relaxation: damping factor", dampingFactor);
260
261 const bool zeroStartingSolution = true;
262 pl->set("relaxation: zero starting solution", zeroStartingSolution);
263
264 const bool doBackwardGS = false;
265 pl->set("relaxation: backward mode", doBackwardGS);
266
267 const bool doL1Method = false;
268 pl->set("relaxation: use l1", doL1Method);
269
270 const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
271 (STM::one() + STM::one()); // 1.5
272 pl->set("relaxation: l1 eta", l1eta);
273
274 const scalar_type minDiagonalValue = STS::zero();
275 pl->set("relaxation: min diagonal value", minDiagonalValue);
276
277 const bool fixTinyDiagEntries = false;
278 pl->set("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
279
280 const bool checkDiagEntries = false;
281 pl->set("relaxation: check diagonal entries", checkDiagEntries);
282
283 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
284 pl->set("relaxation: local smoothing indices", localSmoothingIndices);
285
286 const bool is_matrix_structurally_symmetric = false;
287 pl->set("relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
288
289 const bool ifpack2_dump_matrix = false;
290 pl->set("relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
291
292 const int cluster_size = 1;
293 pl->set("relaxation: mtgs cluster size", cluster_size);
294
295 pl->set("relaxation: mtgs coloring algorithm", "Default");
296
297 const int long_row_threshold = 0;
298 pl->set("relaxation: long row threshold", long_row_threshold);
299
300 const bool timer_for_apply = true;
301 pl->set("timer for apply", timer_for_apply);
302
303 validParams_ = rcp_const_cast<const ParameterList>(pl);
304 }
305 return validParams_;
306}
307
308template <class MatrixType>
309void Relaxation<MatrixType>::setParametersImpl(Teuchos::ParameterList& pl) {
310 using Teuchos::getIntegralValue;
311 using Teuchos::getStringValue;
312 using Teuchos::ParameterList;
313 using Teuchos::RCP;
314 typedef scalar_type ST; // just to make code below shorter
315
316 if (pl.isType<double>("relaxation: damping factor")) {
317 // Make sure that ST=complex can run with a damping factor that is
318 // a double.
319 ST df = pl.get<double>("relaxation: damping factor");
320 pl.remove("relaxation: damping factor");
321 pl.set("relaxation: damping factor", df);
322 }
323
324 pl.validateParametersAndSetDefaults(*getValidParameters());
325
326 const Details::RelaxationType precType =
327 getIntegralValue<Details::RelaxationType>(pl, "relaxation: type");
328 const std::string precTypeStr = getStringValue<Details::RelaxationType>(pl, "relaxation: type");
329 // We only access "relaxation: type" using strings in the rest of the code
330 pl.set<std::string>("relaxation: type", precTypeStr);
331 pl.get<std::string>("relaxation: type"); // We need to mark the parameter as "used"
332 const int numSweeps = pl.get<int>("relaxation: sweeps");
333 const ST dampingFactor = pl.get<ST>("relaxation: damping factor");
334 const bool zeroStartSol = pl.get<bool>("relaxation: zero starting solution");
335 const bool doBackwardGS = pl.get<bool>("relaxation: backward mode");
336 const bool doL1Method = pl.get<bool>("relaxation: use l1");
337 const magnitude_type l1Eta = pl.get<magnitude_type>("relaxation: l1 eta");
338 const ST minDiagonalValue = pl.get<ST>("relaxation: min diagonal value");
339 const bool fixTinyDiagEntries = pl.get<bool>("relaxation: fix tiny diagonal entries");
340 const bool checkDiagEntries = pl.get<bool>("relaxation: check diagonal entries");
341 const bool is_matrix_structurally_symmetric = pl.get<bool>("relaxation: symmetric matrix structure");
342 const bool ifpack2_dump_matrix = pl.get<bool>("relaxation: ifpack2 dump matrix");
343 const bool timer_for_apply = pl.get<bool>("timer for apply");
344 int cluster_size = 1;
345 if (pl.isParameter("relaxation: mtgs cluster size")) // optional parameter
346 cluster_size = pl.get<int>("relaxation: mtgs cluster size");
347 int long_row_threshold = 0;
348 if (pl.isParameter("relaxation: long row threshold")) // optional parameter
349 long_row_threshold = pl.get<int>("relaxation: long row threshold");
350 std::string color_algo_name = pl.get<std::string>("relaxation: mtgs coloring algorithm");
351 // convert to lowercase
352 for (char& c : color_algo_name)
353 c = tolower(c);
354 if (color_algo_name == "default")
355 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_DEFAULT;
356 else if (color_algo_name == "serial")
357 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_SERIAL;
358 else if (color_algo_name == "vb")
359 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VB;
360 else if (color_algo_name == "vbbit")
361 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBBIT;
362 else if (color_algo_name == "vbcs")
363 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBCS;
364 else if (color_algo_name == "vbd")
365 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBD;
366 else if (color_algo_name == "vbdbit")
367 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBDBIT;
368 else if (color_algo_name == "eb")
369 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_EB;
370 else {
371 std::ostringstream msg;
372 msg << "Ifpack2::Relaxation: 'relaxation: mtgs coloring algorithm' = '" << color_algo_name << "' is not valid.\n";
373 msg << "Choices (not case sensitive) are: Default, Serial, VB, VBBIT, VBCS, VBD, VBDBIT, EB.";
374 TEUCHOS_TEST_FOR_EXCEPTION(
375 true, std::invalid_argument, msg.str());
376 }
377
378 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type>>("relaxation: local smoothing indices");
379
380 // for Two-stage Gauss-Seidel
381 if (!std::is_same<double, ST>::value && pl.isType<double>("relaxation: inner damping factor")) {
382 // Make sure that ST=complex can run with a damping factor that is
383 // a double.
384 ST df = pl.get<double>("relaxation: inner damping factor");
385 pl.remove("relaxation: inner damping factor");
386 pl.set("relaxation: inner damping factor", df);
387 }
388 // If long row algorithm was requested, make sure non-cluster (point) multicolor Gauss-Seidel (aka MTGS/MTSGS) will be used.
389 if (long_row_threshold > 0) {
390 TEUCHOS_TEST_FOR_EXCEPTION(
391 cluster_size != 1, std::invalid_argument,
392 "Ifpack2::Relaxation: "
393 "Requested long row MTGS/MTSGS algorithm and cluster GS/SGS, but those are not compatible.");
394 TEUCHOS_TEST_FOR_EXCEPTION(
395 precType != Details::RelaxationType::MTGS && precType != Details::RelaxationType::MTSGS,
396 std::invalid_argument,
397 "Ifpack2::Relaxation: "
398 "Requested long row MTGS/MTSGS algorithm, but this is only compatible with preconditioner types "
399 "'MT Gauss-Seidel' and 'MT Symmetric Gauss-Seidel'.");
400 }
401
402 const ST innerDampingFactor = pl.get<ST>("relaxation: inner damping factor");
403 const int numInnerSweeps = pl.get<int>("relaxation: inner sweeps");
404 const int numOuterSweeps = pl.get<int>("relaxation: outer sweeps");
405 const bool innerSpTrsv = pl.get<bool>("relaxation: inner sparse-triangular solve");
406 const bool compactForm = pl.get<bool>("relaxation: compact form");
407
408 // "Commit" the changes, now that we've validated everything.
409 PrecType_ = precType;
410 NumSweeps_ = numSweeps;
411 DampingFactor_ = dampingFactor;
412 ZeroStartingSolution_ = zeroStartSol;
413 DoBackwardGS_ = doBackwardGS;
414 DoL1Method_ = doL1Method;
415 L1Eta_ = l1Eta;
416 MinDiagonalValue_ = minDiagonalValue;
417 fixTinyDiagEntries_ = fixTinyDiagEntries;
418 checkDiagEntries_ = checkDiagEntries;
419 clusterSize_ = cluster_size;
420 longRowThreshold_ = long_row_threshold;
421 is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
422 ifpack2_dump_matrix_ = ifpack2_dump_matrix;
423 localSmoothingIndices_ = localSmoothingIndices;
424 // for Two-stage GS
425 NumInnerSweeps_ = numInnerSweeps;
426 NumOuterSweeps_ = numOuterSweeps;
427 InnerSpTrsv_ = innerSpTrsv;
428 InnerDampingFactor_ = innerDampingFactor;
429 CompactForm_ = compactForm;
430 TimerForApply_ = timer_for_apply;
431}
432
433template <class MatrixType>
434void Relaxation<MatrixType>::setParameters(const Teuchos::ParameterList& pl) {
435 // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
436 // but otherwise, we will get [unused] in pl
437 this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
438}
439
440template <class MatrixType>
441Teuchos::RCP<const Teuchos::Comm<int>>
443 TEUCHOS_TEST_FOR_EXCEPTION(
444 A_.is_null(), std::runtime_error,
445 "Ifpack2::Relaxation::getComm: "
446 "The input matrix A is null. Please call setMatrix() with a nonnull "
447 "input matrix before calling this method.");
448 return A_->getRowMap()->getComm();
449}
450
451template <class MatrixType>
452Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type>
454 return A_;
455}
456
457template <class MatrixType>
458Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
459 typename MatrixType::global_ordinal_type,
460 typename MatrixType::node_type>>
462 TEUCHOS_TEST_FOR_EXCEPTION(
463 A_.is_null(), std::runtime_error,
464 "Ifpack2::Relaxation::getDomainMap: "
465 "The input matrix A is null. Please call setMatrix() with a nonnull "
466 "input matrix before calling this method.");
467 return A_->getDomainMap();
468}
469
470template <class MatrixType>
471Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
472 typename MatrixType::global_ordinal_type,
473 typename MatrixType::node_type>>
475 TEUCHOS_TEST_FOR_EXCEPTION(
476 A_.is_null(), std::runtime_error,
477 "Ifpack2::Relaxation::getRangeMap: "
478 "The input matrix A is null. Please call setMatrix() with a nonnull "
479 "input matrix before calling this method.");
480 return A_->getRangeMap();
481}
482
483template <class MatrixType>
485 return true;
486}
487
488template <class MatrixType>
490 return (NumInitialize_);
491}
492
493template <class MatrixType>
495 return (NumCompute_);
496}
497
498template <class MatrixType>
500 return (NumApply_);
501}
502
503template <class MatrixType>
505 return (InitializeTime_);
506}
507
508template <class MatrixType>
510 return (ComputeTime_);
511}
512
513template <class MatrixType>
515 return (ApplyTime_);
516}
517
518template <class MatrixType>
520 return (ComputeFlops_);
521}
522
523template <class MatrixType>
525 return (ApplyFlops_);
526}
527
528template <class MatrixType>
530 TEUCHOS_TEST_FOR_EXCEPTION(
531 A_.is_null(), std::runtime_error,
532 "Ifpack2::Relaxation::getNodeSmootherComplexity: "
533 "The input matrix A is null. Please call setMatrix() with a nonnull "
534 "input matrix, then call compute(), before calling this method.");
535 // Relaxation methods cost roughly one apply + one diagonal inverse per iteration
536 return A_->getLocalNumRows() + A_->getLocalNumEntries();
537}
538
539template <class MatrixType>
541 apply(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
542 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
543 Teuchos::ETransp /* mode */,
544 scalar_type alpha,
545 scalar_type beta) const {
546 using Teuchos::as;
547 using Teuchos::RCP;
548 using Teuchos::rcp;
549 using Teuchos::rcpFromRef;
550 typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
552 MV;
553 TEUCHOS_TEST_FOR_EXCEPTION(
554 A_.is_null(), std::runtime_error,
555 "Ifpack2::Relaxation::apply: "
556 "The input matrix A is null. Please call setMatrix() with a nonnull "
557 "input matrix, then call compute(), before calling this method.");
558 TEUCHOS_TEST_FOR_EXCEPTION(
559 !isComputed(),
560 std::runtime_error,
561 "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
562 "preconditioner instance before you may call apply(). You may call "
563 "isComputed() to find out if compute() has been called already.");
564 TEUCHOS_TEST_FOR_EXCEPTION(
565 X.getNumVectors() != Y.getNumVectors(),
566 std::runtime_error,
567 "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
568 "X has "
569 << X.getNumVectors() << " columns, but Y has "
570 << Y.getNumVectors() << " columns.");
571 TEUCHOS_TEST_FOR_EXCEPTION(
572 beta != STS::zero(), std::logic_error,
573 "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
574 "implemented.");
575
576 Teuchos::RCP<Teuchos::Time> timer;
577 const std::string timerName("Ifpack2::Relaxation::apply");
578 if (TimerForApply_) {
579 timer = Teuchos::TimeMonitor::lookupCounter(timerName);
580 if (timer.is_null()) {
581 timer = Teuchos::TimeMonitor::getNewCounter(timerName);
582 }
583 }
584
585 Teuchos::Time time = Teuchos::Time(timerName);
586 double startTime = time.wallTime();
587
588 {
589 Teuchos::RCP<Teuchos::TimeMonitor> timeMon;
590 if (TimerForApply_)
591 timeMon = Teuchos::rcp(new Teuchos::TimeMonitor(*timer));
592
593 // Special case: alpha == 0.
594 if (alpha == STS::zero()) {
595 // No floating-point operations, so no need to update a count.
596 Y.putScalar(STS::zero());
597 } else {
598 // If X and Y alias one another, then we need to create an
599 // auxiliary vector, Xcopy (a deep copy of X).
600 RCP<const MV> Xcopy;
601 {
602 if (X.aliases(Y)) {
603 Xcopy = rcp(new MV(X, Teuchos::Copy));
604 } else {
605 Xcopy = rcpFromRef(X);
606 }
607 }
608
609 // Each of the following methods updates the flop count itself.
610 // All implementations handle zeroing out the starting solution
611 // (if necessary) themselves.
612 switch (PrecType_) {
613 case Ifpack2::Details::JACOBI:
614 ApplyInverseJacobi(*Xcopy, Y);
615 break;
616 case Ifpack2::Details::GS:
617 ApplyInverseSerialGS(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
618 break;
619 case Ifpack2::Details::SGS:
620 ApplyInverseSerialGS(*Xcopy, Y, Tpetra::Symmetric);
621 break;
622 case Ifpack2::Details::MTGS:
623 case Ifpack2::Details::GS2:
624 ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
625 break;
626 case Ifpack2::Details::MTSGS:
627 case Ifpack2::Details::SGS2:
628 ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, Tpetra::Symmetric);
629 break;
630 case Ifpack2::Details::RICHARDSON:
631 ApplyInverseRichardson(*Xcopy, Y);
632 break;
633
634 default:
635 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
636 "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
637 << PrecType_ << ". Please report this bug to the Ifpack2 developers.");
638 }
639 if (alpha != STS::one()) {
640 Y.scale(alpha);
641 const double numGlobalRows = as<double>(A_->getGlobalNumRows());
642 const double numVectors = as<double>(Y.getNumVectors());
643 ApplyFlops_ += numGlobalRows * numVectors;
644 }
645 }
646 }
647 ApplyTime_ += (time.wallTime() - startTime);
648 ++NumApply_;
649}
650
651template <class MatrixType>
653 applyMat(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
654 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
655 Teuchos::ETransp mode) const {
656 TEUCHOS_TEST_FOR_EXCEPTION(
657 A_.is_null(), std::runtime_error,
658 "Ifpack2::Relaxation::applyMat: "
659 "The input matrix A is null. Please call setMatrix() with a nonnull "
660 "input matrix, then call compute(), before calling this method.");
661 TEUCHOS_TEST_FOR_EXCEPTION(
662 !isComputed(), std::runtime_error,
663 "Ifpack2::Relaxation::applyMat: "
664 "isComputed() must be true before you may call applyMat(). "
665 "Please call compute() before calling this method.");
666 TEUCHOS_TEST_FOR_EXCEPTION(
667 X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
668 "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors()
669 << " != Y.getNumVectors() = " << Y.getNumVectors() << ".");
670 A_->apply(X, Y, mode);
671}
672
673template <class MatrixType>
675 const char methodName[] = "Ifpack2::Relaxation::initialize";
676
677 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, methodName << ": The "
678 "input matrix A is null. Please call setMatrix() with "
679 "a nonnull input matrix before calling this method.");
680
681 Teuchos::RCP<Teuchos::Time> timer =
682 Teuchos::TimeMonitor::getNewCounter(methodName);
683
684 double startTime = timer->wallTime();
685
686 { // Timing of initialize starts here
687 Teuchos::TimeMonitor timeMon(*timer);
688 isInitialized_ = false;
689
690 {
691 auto rowMap = A_->getRowMap();
692 auto comm = rowMap.is_null() ? Teuchos::null : rowMap->getComm();
693 IsParallel_ = !comm.is_null() && comm->getSize() > 1;
694 }
695
696 // mfh 21 Mar 2013, 07 May 2019: The Import object may be null,
697 // but in that case, the domain and column Maps are the same and
698 // we don't need to Import anyway.
699 //
700 // mfh 07 May 2019: A_->getGraph() might be an
701 // OverlappingRowGraph, which doesn't have an Import object.
702 // However, in that case, the comm should have size 1.
703 Importer_ = IsParallel_ ? A_->getGraph()->getImporter() : Teuchos::null;
704
705 {
706 Teuchos::RCP<const block_crs_matrix_type> A_bcrs =
707 Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(A_);
708 hasBlockCrsMatrix_ = !A_bcrs.is_null();
709 }
710
711 serialGaussSeidel_ = Teuchos::null;
712
713 if (PrecType_ == Details::MTGS || PrecType_ == Details::MTSGS ||
714 PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
715 auto crsMat = Details::getCrsMatrix(A_);
716 TEUCHOS_TEST_FOR_EXCEPTION(crsMat.is_null(), std::logic_error, methodName << ": "
717 "Multithreaded Gauss-Seidel methods currently only work "
718 "when the input matrix is a Tpetra::CrsMatrix.");
719
720 // FIXME (mfh 27 May 2019) Dumping the matrix belongs in
721 // compute, not initialize, since users may change the matrix's
722 // values at any time before calling compute.
723 if (ifpack2_dump_matrix_) {
724 static int sequence_number = 0;
725 const std::string file_name = "Ifpack2_MT_GS_" +
726 std::to_string(sequence_number++) + ".mtx";
727 using writer_type = Tpetra::MatrixMarket::Writer<crs_matrix_type>;
728 writer_type::writeSparseFile(file_name, crsMat);
729 }
730
731 this->mtKernelHandle_ = Teuchos::rcp(new mt_kernel_handle_type());
732 if (mtKernelHandle_->get_gs_handle() == nullptr) {
733 if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2)
734 mtKernelHandle_->create_gs_handle(KokkosSparse::GS_TWOSTAGE);
735 else if (this->clusterSize_ == 1) {
736 mtKernelHandle_->create_gs_handle(KokkosSparse::GS_DEFAULT, this->mtColoringAlgorithm_);
737 mtKernelHandle_->get_point_gs_handle()->set_long_row_threshold(longRowThreshold_);
738 } else
739 mtKernelHandle_->create_gs_handle(KokkosSparse::CLUSTER_DEFAULT, this->clusterSize_, this->mtColoringAlgorithm_);
740 }
741 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice();
742 if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
743 // set parameters for two-stage GS
744 mtKernelHandle_->set_gs_set_num_inner_sweeps(NumInnerSweeps_);
745 mtKernelHandle_->set_gs_set_num_outer_sweeps(NumOuterSweeps_);
746 mtKernelHandle_->set_gs_set_inner_damp_factor(InnerDampingFactor_);
747 mtKernelHandle_->set_gs_twostage(!InnerSpTrsv_, A_->getLocalNumRows());
748 mtKernelHandle_->set_gs_twostage_compact_form(CompactForm_);
749 }
750
751 KokkosSparse::gauss_seidel_symbolic(
752 mtKernelHandle_.getRawPtr(),
753 A_->getLocalNumRows(),
754 A_->getLocalNumCols(),
755 kcsr.graph.row_map,
756 kcsr.graph.entries,
757 is_matrix_structurally_symmetric_);
758 }
759 } // timing of initialize stops here
760
761 InitializeTime_ += (timer->wallTime() - startTime);
762 ++NumInitialize_;
763 isInitialized_ = true;
764}
765
766namespace Impl {
767template <typename BlockDiagView>
768struct InvertDiagBlocks {
769 typedef typename BlockDiagView::size_type Size;
770
771 private:
772 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
773 template <typename View>
774 using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
775 typename View::device_type, Unmanaged>;
776
777 typedef typename BlockDiagView::non_const_value_type Scalar;
778 typedef typename BlockDiagView::device_type Device;
779 typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
780 typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
781
782 UnmanagedView<BlockDiagView> block_diag_;
783 // TODO Use thread team and scratch memory space. In this first
784 // pass, provide workspace for each block.
785 RWrk rwrk_buf_;
786 UnmanagedView<RWrk> rwrk_;
787 IWrk iwrk_buf_;
788 UnmanagedView<IWrk> iwrk_;
789
790 public:
791 InvertDiagBlocks(BlockDiagView& block_diag)
792 : block_diag_(block_diag) {
793 const auto blksz = block_diag.extent(1);
794 Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
795 rwrk_ = rwrk_buf_;
796 Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
797 iwrk_ = iwrk_buf_;
798 }
799
800 KOKKOS_INLINE_FUNCTION
801 void operator()(const Size i, int& jinfo) const {
802 auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
803 auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
804 auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
805 int info = 0;
806 Tpetra::GETF2(D_cur, ipiv, info);
807 if (info) {
808 ++jinfo;
809 return;
810 }
811 Tpetra::GETRI(D_cur, ipiv, work, info);
812 if (info) ++jinfo;
813 }
814};
815} // namespace Impl
816
817template <class MatrixType>
818void Relaxation<MatrixType>::computeBlockCrs() {
819 using Kokkos::ALL;
820 using Teuchos::Array;
821 using Teuchos::ArrayRCP;
822 using Teuchos::ArrayView;
823 using Teuchos::as;
824 using Teuchos::Comm;
825 using Teuchos::RCP;
826 using Teuchos::rcp;
827 using Teuchos::rcp_dynamic_cast;
828 using Teuchos::REDUCE_MAX;
829 using Teuchos::REDUCE_MIN;
830 using Teuchos::REDUCE_SUM;
831 using Teuchos::reduceAll;
832 typedef local_ordinal_type LO;
833
834 const std::string timerName("Ifpack2::Relaxation::computeBlockCrs");
835 Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter(timerName);
836 if (timer.is_null()) {
837 timer = Teuchos::TimeMonitor::getNewCounter(timerName);
838 }
839 double startTime = timer->wallTime();
840 {
841 Teuchos::TimeMonitor timeMon(*timer);
842
843 TEUCHOS_TEST_FOR_EXCEPTION(
844 A_.is_null(), std::runtime_error,
845 "Ifpack2::Relaxation::"
846 "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
847 "with a nonnull input matrix, then call initialize(), before calling "
848 "this method.");
849 auto blockCrsA = rcp_dynamic_cast<const block_crs_matrix_type>(A_);
850 TEUCHOS_TEST_FOR_EXCEPTION(
851 blockCrsA.is_null(), std::logic_error,
852 "Ifpack2::Relaxation::"
853 "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
854 "got this far. Please report this bug to the Ifpack2 developers.");
855
856 const scalar_type one = STS::one();
857
858 // Reset state.
859 IsComputed_ = false;
860
861 const LO lclNumMeshRows =
862 blockCrsA->getCrsGraph().getLocalNumRows();
863 const LO blockSize = blockCrsA->getBlockSize();
864
865 if (!savedDiagOffsets_) {
866 blockDiag_ = block_diag_type(); // clear it before reallocating
867 blockDiag_ = block_diag_type("Ifpack2::Relaxation::blockDiag_",
868 lclNumMeshRows, blockSize, blockSize);
869 if (Teuchos::as<LO>(diagOffsets_.extent(0)) < lclNumMeshRows) {
870 // Clear diagOffsets_ first (by assigning an empty View to it)
871 // to save memory, before reallocating.
872 diagOffsets_ = Kokkos::View<size_t*, device_type>();
873 diagOffsets_ = Kokkos::View<size_t*, device_type>("offsets", lclNumMeshRows);
874 }
875 blockCrsA->getCrsGraph().getLocalDiagOffsets(diagOffsets_);
876 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(diagOffsets_.extent(0)) !=
877 static_cast<size_t>(blockDiag_.extent(0)),
878 std::logic_error, "diagOffsets_.extent(0) = " << diagOffsets_.extent(0) << " != blockDiag_.extent(0) = " << blockDiag_.extent(0) << ". Please report this bug to the Ifpack2 developers.");
879 savedDiagOffsets_ = true;
880 }
881 blockCrsA->getLocalDiagCopy(blockDiag_, diagOffsets_);
882
883 // Use an unmanaged View in this method, so that when we take
884 // subviews of it (to get each diagonal block), we don't have to
885 // touch the reference count. Reference count updates are a
886 // thread scalability bottleneck and have a performance cost even
887 // without using threads.
888 unmanaged_block_diag_type blockDiag = blockDiag_;
889
890 if (DoL1Method_ && IsParallel_) {
891 const scalar_type two = one + one;
892 const size_t maxLength = A_->getLocalMaxNumRowEntries();
893 nonconst_local_inds_host_view_type indices("indices", maxLength);
894 nonconst_values_host_view_type values_("values", maxLength * blockSize * blockSize);
895 size_t numEntries = 0;
896
897 for (LO i = 0; i < lclNumMeshRows; ++i) {
898 // FIXME (mfh 16 Dec 2015) Get views instead of copies.
899 blockCrsA->getLocalRowCopy(i, indices, values_, numEntries);
900 scalar_type* values = reinterpret_cast<scalar_type*>(values_.data());
901
902 auto diagBlock = Kokkos::subview(blockDiag, i, ALL(), ALL());
903 for (LO subRow = 0; subRow < blockSize; ++subRow) {
904 magnitude_type diagonal_boost = STM::zero();
905 for (size_t k = 0; k < numEntries; ++k) {
906 if (indices[k] >= lclNumMeshRows) {
907 const size_t offset = blockSize * blockSize * k + subRow * blockSize;
908 for (LO subCol = 0; subCol < blockSize; ++subCol) {
909 diagonal_boost += STS::magnitude(values[offset + subCol] / two);
910 }
911 }
912 }
913 if (STS::magnitude(diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
914 diagBlock(subRow, subRow) += diagonal_boost;
915 }
916 }
917 }
918 }
919
920 int info = 0;
921 {
922 Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
923 typedef typename unmanaged_block_diag_type::execution_space exec_space;
924 typedef Kokkos::RangePolicy<exec_space, LO> range_type;
925
926 Kokkos::parallel_reduce(range_type(0, lclNumMeshRows), idb, info);
927 // FIXME (mfh 19 May 2017) Different processes may not
928 // necessarily have this error all at the same time, so it would
929 // be better just to preserve a local error state and let users
930 // check.
931 TEUCHOS_TEST_FOR_EXCEPTION(info > 0, std::runtime_error, "GETF2 or GETRI failed on = " << info << " diagonal blocks.");
932 }
933
934 // In a debug build, do an extra test to make sure that all the
935 // factorizations were computed correctly.
936#ifdef HAVE_IFPACK2_DEBUG
937 const int numResults = 2;
938 // Use "max = -min" trick to get min and max in a single all-reduce.
939 int lclResults[2], gblResults[2];
940 lclResults[0] = info;
941 lclResults[1] = -info;
942 gblResults[0] = 0;
943 gblResults[1] = 0;
944 reduceAll<int, int>(*(A_->getGraph()->getComm()), REDUCE_MIN,
945 numResults, lclResults, gblResults);
946 TEUCHOS_TEST_FOR_EXCEPTION(gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
947 "Ifpack2::Relaxation::compute: When processing the input "
948 "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
949 "failed on one or more (MPI) processes.");
950#endif // HAVE_IFPACK2_DEBUG
951 serialGaussSeidel_ = rcp(new SerialGaussSeidel(blockCrsA, blockDiag_, localSmoothingIndices_, DampingFactor_));
952 } // end TimeMonitor scope
953
954 ComputeTime_ += (timer->wallTime() - startTime);
955 ++NumCompute_;
956 IsComputed_ = true;
957}
958
959template <class MatrixType>
961 using Teuchos::Array;
962 using Teuchos::ArrayRCP;
963 using Teuchos::ArrayView;
964 using Teuchos::as;
965 using Teuchos::Comm;
966 using Teuchos::RCP;
967 using Teuchos::rcp;
968 using Teuchos::REDUCE_MAX;
969 using Teuchos::REDUCE_MIN;
970 using Teuchos::REDUCE_SUM;
971 using Teuchos::reduceAll;
972 using LO = local_ordinal_type;
973 using vector_type = Tpetra::Vector<scalar_type, local_ordinal_type,
975 using IST = typename vector_type::impl_scalar_type;
976 using KAT = KokkosKernels::ArithTraits<IST>;
977
978 const char methodName[] = "Ifpack2::Relaxation::compute";
979 const scalar_type zero = STS::zero();
980 const scalar_type one = STS::one();
981
982 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, methodName << ": "
983 "The input matrix A is null. Please call setMatrix() with a nonnull "
984 "input matrix, then call initialize(), before calling this method.");
985
986 // We don't count initialization in compute() time.
987 if (!isInitialized()) {
988 initialize();
989 }
990
991 if (hasBlockCrsMatrix_) {
992 computeBlockCrs();
993 return;
994 }
995
996 Teuchos::RCP<Teuchos::Time> timer =
997 Teuchos::TimeMonitor::getNewCounter(methodName);
998 double startTime = timer->wallTime();
999
1000 { // Timing of compute starts here.
1001 Teuchos::TimeMonitor timeMon(*timer);
1002
1003 // Precompute some quantities for "fixing" zero or tiny diagonal
1004 // entries. We'll only use them if this "fixing" is enabled.
1005 //
1006 // SmallTraits covers for the lack of eps() in
1007 // Teuchos::ScalarTraits when its template parameter is not a
1008 // floating-point type. (Ifpack2 sometimes gets instantiated for
1009 // integer Scalar types.)
1010 IST oneOverMinDiagVal = KAT::one() / static_cast<IST>(SmallTraits<scalar_type>::eps());
1011 if (MinDiagonalValue_ != zero)
1012 oneOverMinDiagVal = KAT::one() / static_cast<IST>(MinDiagonalValue_);
1013
1014 // It's helpful not to have to recompute this magnitude each time.
1015 const magnitude_type minDiagValMag = STS::magnitude(MinDiagonalValue_);
1016
1017 const LO numMyRows = static_cast<LO>(A_->getLocalNumRows());
1018
1019 TEUCHOS_TEST_FOR_EXCEPTION(NumSweeps_ < 0, std::logic_error, methodName << ": NumSweeps_ = " << NumSweeps_ << " < 0. "
1020 "Please report this bug to the Ifpack2 developers.");
1021 IsComputed_ = false;
1022
1023 if (Diagonal_.is_null()) {
1024 // A_->getLocalDiagCopy fills in all Vector entries, even if the
1025 // matrix has no stored entries in the corresponding rows.
1026 Diagonal_ = rcp(new vector_type(A_->getRowMap(), false));
1027 }
1028
1029 if (checkDiagEntries_) {
1030 //
1031 // Check diagonal elements, replace zero elements with the minimum
1032 // diagonal value, and store their inverses. Count the number of
1033 // "small" and zero diagonal entries while we're at it.
1034 //
1035 size_t numSmallDiagEntries = 0; // "small" includes zero
1036 size_t numZeroDiagEntries = 0; // # zero diagonal entries
1037 size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
1038 magnitude_type minMagDiagEntryMag = STM::zero();
1039 magnitude_type maxMagDiagEntryMag = STM::zero();
1040
1041 // FIXME: We are extracting the diagonal more than once. That
1042 // isn't very efficient, but we shouldn't be checking
1043 // diagonal entries at scale anyway.
1044 A_->getLocalDiagCopy(*Diagonal_); // slow path for row matrix
1045 std::unique_ptr<vector_type> origDiag;
1046 origDiag = std::unique_ptr<vector_type>(new vector_type(*Diagonal_, Teuchos::Copy));
1047 auto diag2d = Diagonal_->getLocalViewHost(Tpetra::Access::ReadWrite);
1048 auto diag = Kokkos::subview(diag2d, Kokkos::ALL(), 0);
1049 // As we go, keep track of the diagonal entries with the
1050 // least and greatest magnitude. We could use the trick of
1051 // starting min with +Inf and max with -Inf, but that
1052 // doesn't work if scalar_type is a built-in integer type.
1053 // Thus, we have to start by reading the first diagonal
1054 // entry redundantly.
1055 if (numMyRows != 0) {
1056 const magnitude_type d_0_mag = KAT::abs(diag(0));
1057 minMagDiagEntryMag = d_0_mag;
1058 maxMagDiagEntryMag = d_0_mag;
1059 }
1060
1061 // Go through all the diagonal entries. Compute counts of
1062 // small-magnitude, zero, and negative-real-part entries.
1063 // Invert the diagonal entries that aren't too small. For
1064 // those too small in magnitude, replace them with
1065 // 1/MinDiagonalValue_ (or 1/eps if MinDiagonalValue_
1066 // happens to be zero).
1067 for (LO i = 0; i < numMyRows; ++i) {
1068 const IST d_i = diag(i);
1069 const magnitude_type d_i_mag = KAT::abs(d_i);
1070 // Work-around for GitHub Issue #5269.
1071 // const magnitude_type d_i_real = KAT::real (d_i);
1072 const auto d_i_real = getRealValue(d_i);
1073
1074 // We can't compare complex numbers, but we can compare their
1075 // real parts.
1076 if (d_i_real < STM::zero()) {
1077 ++numNegDiagEntries;
1078 }
1079 if (d_i_mag < minMagDiagEntryMag) {
1080 minMagDiagEntryMag = d_i_mag;
1081 }
1082 if (d_i_mag > maxMagDiagEntryMag) {
1083 maxMagDiagEntryMag = d_i_mag;
1084 }
1085
1086 if (fixTinyDiagEntries_) {
1087 // <= not <, in case minDiagValMag is zero.
1088 if (d_i_mag <= minDiagValMag) {
1089 ++numSmallDiagEntries;
1090 if (d_i_mag == STM::zero()) {
1091 ++numZeroDiagEntries;
1092 }
1093 diag(i) = oneOverMinDiagVal;
1094 } else {
1095 diag(i) = KAT::one() / d_i;
1096 }
1097 } else { // Don't fix zero or tiny diagonal entries.
1098 // <= not <, in case minDiagValMag is zero.
1099 if (d_i_mag <= minDiagValMag) {
1100 ++numSmallDiagEntries;
1101 if (d_i_mag == STM::zero()) {
1102 ++numZeroDiagEntries;
1103 }
1104 }
1105 diag(i) = KAT::one() / d_i;
1106 }
1107 }
1108
1109 // Collect global data about the diagonal entries. Only do this
1110 // (which involves O(1) all-reduces) if the user asked us to do
1111 // the extra work.
1112 //
1113 // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
1114 // zero rows. Fixing this requires one of two options:
1115 //
1116 // 1. Use a custom reduction operation that ignores processes
1117 // with zero diagonal entries.
1118 // 2. Split the communicator, compute all-reduces using the
1119 // subcommunicator over processes that have a nonzero number
1120 // of diagonal entries, and then broadcast from one of those
1121 // processes (if there is one) to the processes in the other
1122 // subcommunicator.
1123 const Comm<int>& comm = *(A_->getRowMap()->getComm());
1124
1125 // Compute global min and max magnitude of entries.
1126 Array<magnitude_type> localVals(2);
1127 localVals[0] = minMagDiagEntryMag;
1128 // (- (min (- x))) is the same as (max x).
1129 localVals[1] = -maxMagDiagEntryMag;
1130 Array<magnitude_type> globalVals(2);
1131 reduceAll<int, magnitude_type>(comm, REDUCE_MIN, 2,
1132 localVals.getRawPtr(),
1133 globalVals.getRawPtr());
1134 globalMinMagDiagEntryMag_ = globalVals[0];
1135 globalMaxMagDiagEntryMag_ = -globalVals[1];
1136
1137 Array<size_t> localCounts(3);
1138 localCounts[0] = numSmallDiagEntries;
1139 localCounts[1] = numZeroDiagEntries;
1140 localCounts[2] = numNegDiagEntries;
1141 Array<size_t> globalCounts(3);
1142 reduceAll<int, size_t>(comm, REDUCE_SUM, 3,
1143 localCounts.getRawPtr(),
1144 globalCounts.getRawPtr());
1145 globalNumSmallDiagEntries_ = globalCounts[0];
1146 globalNumZeroDiagEntries_ = globalCounts[1];
1147 globalNumNegDiagEntries_ = globalCounts[2];
1148
1149 // Compute and save the difference between the computed inverse
1150 // diagonal, and the original diagonal's inverse.
1151 vector_type diff(A_->getRowMap());
1152 diff.reciprocal(*origDiag);
1153 diff.update(-one, *Diagonal_, one);
1154 globalDiagNormDiff_ = diff.norm2();
1155 }
1156
1157 // Extract the diagonal entries. The CrsMatrix graph version is
1158 // faster than the row matrix version for subsequent calls to
1159 // compute(), since it caches the diagonal offsets.
1160
1161 bool debugAgainstSlowPath = false;
1162
1163 auto crsMat = Details::getCrsMatrix(A_);
1164
1165 if (crsMat.get() && crsMat->isFillComplete()) {
1166 // The invDiagKernel object computes diagonal offsets if
1167 // necessary. The "compute" call extracts diagonal enties,
1168 // optionally applies the L1 method and replacement of small
1169 // entries, and then inverts.
1170 if (invDiagKernel_.is_null())
1171 invDiagKernel_ = rcp(new Ifpack2::Details::InverseDiagonalKernel<op_type>(crsMat));
1172 else
1173 invDiagKernel_->setMatrix(crsMat);
1174 invDiagKernel_->compute(*Diagonal_,
1175 DoL1Method_ && IsParallel_, L1Eta_,
1176 fixTinyDiagEntries_, minDiagValMag);
1177 savedDiagOffsets_ = true;
1178
1179 // mfh 27 May 2019: Later on, we should introduce an IFPACK2_DEBUG
1180 // environment variable to control this behavior at run time.
1181#ifdef HAVE_IFPACK2_DEBUG
1182 debugAgainstSlowPath = true;
1183#endif
1184 }
1185
1186 if (crsMat.is_null() || !crsMat->isFillComplete() || debugAgainstSlowPath) {
1187 // We could not call the CrsMatrix version above, or want to
1188 // debug by comparing against the slow path.
1189
1190 // FIXME: The L1 method in this code path runs on host, since we
1191 // don't have device access for row matrices.
1192
1193 RCP<vector_type> Diagonal;
1194 if (debugAgainstSlowPath)
1195 Diagonal = rcp(new vector_type(A_->getRowMap()));
1196 else
1197 Diagonal = Diagonal_;
1198
1199 A_->getLocalDiagCopy(*Diagonal); // slow path for row matrix
1200
1201 // Setup for L1 Methods.
1202 // Here we add half the value of the off-processor entries in the row,
1203 // but only if diagonal isn't sufficiently large.
1204 //
1205 // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
1206 // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM
1207 // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
1208 //
1209 if (DoL1Method_ && IsParallel_) {
1210 const row_matrix_type& A_row = *A_;
1211 auto diag = Diagonal->getLocalViewHost(Tpetra::Access::ReadWrite);
1212 const magnitude_type two = STM::one() + STM::one();
1213 const size_t maxLength = A_row.getLocalMaxNumRowEntries();
1214 nonconst_local_inds_host_view_type indices("indices", maxLength);
1215 nonconst_values_host_view_type values("values", maxLength);
1216 size_t numEntries;
1217
1218 for (LO i = 0; i < numMyRows; ++i) {
1219 A_row.getLocalRowCopy(i, indices, values, numEntries);
1220 magnitude_type diagonal_boost = STM::zero();
1221 for (size_t k = 0; k < numEntries; ++k) {
1222 if (indices[k] >= numMyRows) {
1223 diagonal_boost += STS::magnitude(values[k] / two);
1224 }
1225 }
1226 if (KAT::magnitude(diag(i, 0)) < L1Eta_ * diagonal_boost) {
1227 diag(i, 0) += diagonal_boost;
1228 }
1229 }
1230 }
1231
1232 //
1233 // Invert the matrix's diagonal entries (in Diagonal).
1234 //
1235 if (fixTinyDiagEntries_) {
1236 // Go through all the diagonal entries. Invert those that
1237 // aren't too small in magnitude. For those that are too
1238 // small in magnitude, replace them with oneOverMinDiagVal.
1239 auto localDiag = Diagonal->getLocalViewDevice(Tpetra::Access::ReadWrite);
1240 Kokkos::parallel_for(
1241 Kokkos::RangePolicy<MyExecSpace>(0, localDiag.extent(0)),
1242 KOKKOS_LAMBDA(local_ordinal_type i) {
1243 auto d_i = localDiag(i, 0);
1244 const magnitude_type d_i_mag = KAT::magnitude(d_i);
1245 // <= not <, in case minDiagValMag is zero.
1246 if (d_i_mag <= minDiagValMag) {
1247 d_i = oneOverMinDiagVal;
1248 } else {
1249 // For Stokhos types, operator/ returns an expression
1250 // type. Explicitly convert to IST before returning.
1251 d_i = IST(KAT::one() / d_i);
1252 }
1253 localDiag(i, 0) = d_i;
1254 });
1255 } else { // don't fix tiny or zero diagonal entries
1256 Diagonal->reciprocal(*Diagonal);
1257 }
1258
1259 if (debugAgainstSlowPath) {
1260 // Validate the fast-path diagonal against the slow-path diagonal.
1261 Diagonal->update(STS::one(), *Diagonal_, -STS::one());
1262 const magnitude_type err = Diagonal->normInf();
1263 // The two diagonals should be exactly the same, so their
1264 // difference should be exactly zero.
1265 TEUCHOS_TEST_FOR_EXCEPTION(err > 100 * STM::eps(), std::logic_error, methodName << ": "
1266 << "\"fast-path\" diagonal computation failed. "
1267 "\\|D1 - D2\\|_inf = "
1268 << err << ".");
1269 }
1270 }
1271
1272 // Count floating-point operations of computing the inverse diagonal.
1273 //
1274 // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
1275 if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1276 ComputeFlops_ += 4.0 * numMyRows;
1277 } else {
1278 ComputeFlops_ += numMyRows;
1279 }
1280
1281 if (PrecType_ == Ifpack2::Details::MTGS ||
1282 PrecType_ == Ifpack2::Details::MTSGS ||
1283 PrecType_ == Ifpack2::Details::GS2 ||
1284 PrecType_ == Ifpack2::Details::SGS2) {
1285 // KokkosKernels GaussSeidel Initialization.
1286 using scalar_view_t = typename local_matrix_device_type::values_type;
1287
1288 TEUCHOS_TEST_FOR_EXCEPTION(crsMat.is_null(), std::logic_error, methodName << ": "
1289 "Multithreaded Gauss-Seidel methods currently only work "
1290 "when the input matrix is a Tpetra::CrsMatrix.");
1291 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice();
1292
1293 // TODO BMK: This should be ReadOnly, and KokkosKernels should accept a
1294 // const-valued view for user-provided D^-1. OK for now, Diagonal_ is nonconst.
1295 auto diagView_2d = Diagonal_->getLocalViewDevice(Tpetra::Access::ReadWrite);
1296 scalar_view_t diagView_1d = Kokkos::subview(diagView_2d, Kokkos::ALL(), 0);
1297 KokkosSparse::gauss_seidel_numeric(
1298 mtKernelHandle_.getRawPtr(),
1299 A_->getLocalNumRows(),
1300 A_->getLocalNumCols(),
1301 kcsr.graph.row_map,
1302 kcsr.graph.entries,
1303 kcsr.values,
1304 diagView_1d,
1305 is_matrix_structurally_symmetric_);
1306 } else if (PrecType_ == Ifpack2::Details::GS || PrecType_ == Ifpack2::Details::SGS) {
1307 if (crsMat)
1308 serialGaussSeidel_ = rcp(new SerialGaussSeidel(crsMat, Diagonal_, localSmoothingIndices_, DampingFactor_));
1309 else
1310 serialGaussSeidel_ = rcp(new SerialGaussSeidel(A_, Diagonal_, localSmoothingIndices_, DampingFactor_));
1311 }
1312 } // end TimeMonitor scope
1313
1314 ComputeTime_ += (timer->wallTime() - startTime);
1315 ++NumCompute_;
1316 IsComputed_ = true;
1317}
1318
1319template <class MatrixType>
1321 ApplyInverseRichardson(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1322 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y) const {
1323 using Teuchos::as;
1324 const double numGlobalRows = as<double>(A_->getGlobalNumRows());
1325 const double numVectors = as<double>(X.getNumVectors());
1326 if (ZeroStartingSolution_) {
1327 // For the first Richardson sweep, if we are allowed to assume that
1328 // the initial guess is zero, then Richardson is just alpha times the RHS
1329 // Compute it as Y(i,j) = DampingFactor_ * X(i,j).
1330 Y.scale(DampingFactor_, X);
1331
1332 // Count (global) floating-point operations. Ifpack2 represents
1333 // this as a floating-point number rather than an integer, so that
1334 // overflow (for a very large number of calls, or a very large
1335 // problem) is approximate instead of catastrophic.
1336 double flopUpdate = 0.0;
1337 if (DampingFactor_ == STS::one()) {
1338 // Y(i,j) = X(i,j): one multiply for each entry of Y.
1339 flopUpdate = numGlobalRows * numVectors;
1340 } else {
1341 // Y(i,j) = DampingFactor_ * X(i,j):
1342 // One multiplies per entry of Y.
1343 flopUpdate = numGlobalRows * numVectors;
1344 }
1345 ApplyFlops_ += flopUpdate;
1346 if (NumSweeps_ == 1) {
1347 return;
1348 }
1349 }
1350 // If we were allowed to assume that the starting guess was zero,
1351 // then we have already done the first sweep above.
1352 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1353
1354 // Allocate a multivector if the cached one isn't perfect
1355 updateCachedMultiVector(Y.getMap(), as<size_t>(numVectors));
1356
1357 for (int j = startSweep; j < NumSweeps_; ++j) {
1358 // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1359 Tpetra::Details::residual(*A_, Y, X, *cachedMV_);
1360 Y.update(DampingFactor_, *cachedMV_, STS::one());
1361 }
1362
1363 // For each column of output, for each pass over the matrix:
1364 //
1365 // - One + and one * for each matrix entry
1366 // - One / and one + for each row of the matrix
1367 // - If the damping factor is not one: one * for each row of the
1368 // matrix. (It's not fair to count this if the damping factor is
1369 // one, since the implementation could skip it. Whether it does
1370 // or not is the implementation's choice.)
1371
1372 // Floating-point operations due to the damping factor, per matrix
1373 // row, per direction, per columm of output.
1374 const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
1375 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1376 ApplyFlops_ += as<double>(NumSweeps_ - startSweep) * numVectors *
1377 (2.0 * numGlobalNonzeros + dampingFlops);
1378}
1379
1380template <class MatrixType>
1381void Relaxation<MatrixType>::
1382 ApplyInverseJacobi(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1383 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y) const {
1384 using Teuchos::as;
1385 if (hasBlockCrsMatrix_) {
1386 ApplyInverseJacobi_BlockCrsMatrix(X, Y);
1387 return;
1388 }
1389
1390 const double numGlobalRows = as<double>(A_->getGlobalNumRows());
1391 const double numVectors = as<double>(X.getNumVectors());
1392 if (ZeroStartingSolution_) {
1393 // For the first Jacobi sweep, if we are allowed to assume that
1394 // the initial guess is zero, then Jacobi is just diagonal
1395 // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1396 // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
1397 Y.elementWiseMultiply(DampingFactor_, *Diagonal_, X, STS::zero());
1398
1399 // Count (global) floating-point operations. Ifpack2 represents
1400 // this as a floating-point number rather than an integer, so that
1401 // overflow (for a very large number of calls, or a very large
1402 // problem) is approximate instead of catastrophic.
1403 double flopUpdate = 0.0;
1404 if (DampingFactor_ == STS::one()) {
1405 // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
1406 flopUpdate = numGlobalRows * numVectors;
1407 } else {
1408 // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
1409 // Two multiplies per entry of Y.
1410 flopUpdate = 2.0 * numGlobalRows * numVectors;
1411 }
1412 ApplyFlops_ += flopUpdate;
1413 if (NumSweeps_ == 1) {
1414 return;
1415 }
1416 }
1417 // If we were allowed to assume that the starting guess was zero,
1418 // then we have already done the first sweep above.
1419 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1420
1421 // Allocate a multivector if the cached one isn't perfect
1422 updateCachedMultiVector(Y.getMap(), as<size_t>(numVectors));
1423
1424 for (int j = startSweep; j < NumSweeps_; ++j) {
1425 // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1426 Tpetra::Details::residual(*A_, Y, X, *cachedMV_);
1427 Y.elementWiseMultiply(DampingFactor_, *Diagonal_, *cachedMV_, STS::one());
1428 }
1429
1430 // For each column of output, for each pass over the matrix:
1431 //
1432 // - One + and one * for each matrix entry
1433 // - One / and one + for each row of the matrix
1434 // - If the damping factor is not one: one * for each row of the
1435 // matrix. (It's not fair to count this if the damping factor is
1436 // one, since the implementation could skip it. Whether it does
1437 // or not is the implementation's choice.)
1438
1439 // Floating-point operations due to the damping factor, per matrix
1440 // row, per direction, per columm of output.
1441 const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
1442 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1443 ApplyFlops_ += as<double>(NumSweeps_ - startSweep) * numVectors *
1444 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1445}
1446
1447template <class MatrixType>
1448void Relaxation<MatrixType>::
1449 ApplyInverseJacobi_BlockCrsMatrix(const Tpetra::MultiVector<scalar_type,
1450 local_ordinal_type,
1451 global_ordinal_type,
1452 node_type>& X,
1453 Tpetra::MultiVector<scalar_type,
1454 local_ordinal_type,
1455 global_ordinal_type,
1456 node_type>& Y) const {
1457 using Tpetra::BlockMultiVector;
1458 using BMV = BlockMultiVector<scalar_type, local_ordinal_type,
1459 global_ordinal_type, node_type>;
1460
1461 const block_crs_matrix_type* blockMatConst =
1462 dynamic_cast<const block_crs_matrix_type*>(A_.getRawPtr());
1463 TEUCHOS_TEST_FOR_EXCEPTION(blockMatConst == nullptr, std::logic_error,
1464 "This method should "
1465 "never be called if the matrix A_ is not a BlockCrsMatrix. "
1466 "Please report this bug to the Ifpack2 developers.");
1467 // mfh 23 Jan 2016: Unfortunately, the const cast is necessary.
1468 // This is because applyBlock() is nonconst (more accurate), while
1469 // apply() is const (required by Tpetra::Operator interface, but a
1470 // lie, because it possibly allocates temporary buffers).
1471 block_crs_matrix_type* blockMat =
1472 const_cast<block_crs_matrix_type*>(blockMatConst);
1473
1474 auto meshRowMap = blockMat->getRowMap();
1475 auto meshColMap = blockMat->getColMap();
1476 const local_ordinal_type blockSize = blockMat->getBlockSize();
1477
1478 BMV X_blk(X, *meshColMap, blockSize);
1479 BMV Y_blk(Y, *meshRowMap, blockSize);
1480
1481 if (ZeroStartingSolution_) {
1482 // For the first sweep, if we are allowed to assume that the
1483 // initial guess is zero, then block Jacobi is just block diagonal
1484 // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1485 Y_blk.blockWiseMultiply(DampingFactor_, blockDiag_, X_blk);
1486 if (NumSweeps_ == 1) {
1487 return;
1488 }
1489 }
1490
1491 auto pointRowMap = Y.getMap();
1492 const size_t numVecs = X.getNumVectors();
1493
1494 // We don't need to initialize the result MV, since the sparse
1495 // mat-vec will clobber its contents anyway.
1496 BMV A_times_Y(*meshRowMap, *pointRowMap, blockSize, numVecs);
1497
1498 // If we were allowed to assume that the starting guess was zero,
1499 // then we have already done the first sweep above.
1500 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1501
1502 for (int j = startSweep; j < NumSweeps_; ++j) {
1503 blockMat->applyBlock(Y_blk, A_times_Y);
1504 // Y := Y + \omega D^{-1} (X - A*Y). Use A_times_Y as scratch.
1505 Y_blk.blockJacobiUpdate(DampingFactor_, blockDiag_,
1506 X_blk, A_times_Y, STS::one());
1507 }
1508}
1509
1510template <class MatrixType>
1511void Relaxation<MatrixType>::
1512 ApplyInverseSerialGS(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1513 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1514 Tpetra::ESweepDirection direction) const {
1515 using this_type = Relaxation<MatrixType>;
1516 // The CrsMatrix version is faster, because it can access the sparse
1517 // matrix data directly, rather than by copying out each row's data
1518 // in turn. Thus, we check whether the RowMatrix is really a
1519 // CrsMatrix.
1520 //
1521 // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
1522 // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
1523 // will still be correct if the cast fails, but it will use an
1524 // unoptimized kernel.
1525 auto blockCrsMat = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(A_);
1526 auto crsMat = Details::getCrsMatrix(A_);
1527 if (blockCrsMat.get()) {
1528 const_cast<this_type&>(*this).ApplyInverseSerialGS_BlockCrsMatrix(*blockCrsMat, X, Y, direction);
1529 } else if (crsMat.get()) {
1530 ApplyInverseSerialGS_CrsMatrix(*crsMat, X, Y, direction);
1531 } else {
1532 ApplyInverseSerialGS_RowMatrix(X, Y, direction);
1533 }
1534}
1535
1536template <class MatrixType>
1537void Relaxation<MatrixType>::
1538 ApplyInverseSerialGS_RowMatrix(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1539 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1540 Tpetra::ESweepDirection direction) const {
1541 using Teuchos::Array;
1542 using Teuchos::ArrayRCP;
1543 using Teuchos::ArrayView;
1544 using Teuchos::as;
1545 using Teuchos::RCP;
1546 using Teuchos::rcp;
1547 using Teuchos::rcpFromRef;
1548 typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> MV;
1549
1550 // Tpetra's GS implementation for CrsMatrix handles zeroing out the
1551 // starting multivector itself. The generic RowMatrix version here
1552 // does not, so we have to zero out Y here.
1553 if (ZeroStartingSolution_) {
1554 Y.putScalar(STS::zero());
1555 }
1556
1557 size_t NumVectors = X.getNumVectors();
1558
1559 RCP<MV> Y2;
1560 if (IsParallel_) {
1561 if (Importer_.is_null()) { // domain and column Maps are the same.
1562 updateCachedMultiVector(Y.getMap(), NumVectors);
1563 } else {
1564 updateCachedMultiVector(Importer_->getTargetMap(), NumVectors);
1565 }
1566 Y2 = cachedMV_;
1567 } else {
1568 Y2 = rcpFromRef(Y);
1569 }
1570
1571 for (int j = 0; j < NumSweeps_; ++j) {
1572 // data exchange is here, once per sweep
1573 if (IsParallel_) {
1574 if (Importer_.is_null()) {
1575 // FIXME (mfh 27 May 2019) This doesn't deep copy -- not
1576 // clear if this is correct. Reevaluate at some point.
1577
1578 *Y2 = Y; // just copy, since domain and column Maps are the same
1579 } else {
1580 Y2->doImport(Y, *Importer_, Tpetra::INSERT);
1581 }
1582 }
1583 serialGaussSeidel_->apply(*Y2, X, direction);
1584
1585 // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1586 if (IsParallel_) {
1587 Tpetra::deep_copy(Y, *Y2);
1588 }
1589 }
1590
1591 // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
1592 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1593 const double numVectors = as<double>(X.getNumVectors());
1594 const double numGlobalRows = as<double>(A_->getGlobalNumRows());
1595 const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
1596 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1597 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1598}
1599
1600template <class MatrixType>
1601void Relaxation<MatrixType>::
1602 ApplyInverseSerialGS_CrsMatrix(const crs_matrix_type& A,
1603 const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& B,
1604 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1605 Tpetra::ESweepDirection direction) const {
1606 using Teuchos::null;
1607 using Teuchos::RCP;
1608 using Teuchos::rcp;
1609 using Teuchos::rcp_const_cast;
1610 using Teuchos::rcpFromRef;
1611 typedef scalar_type Scalar;
1612 const char prefix[] = "Ifpack2::Relaxation::SerialGS: ";
1613 const scalar_type ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1614
1615 TEUCHOS_TEST_FOR_EXCEPTION(
1616 !A.isFillComplete(), std::runtime_error,
1617 prefix << "The matrix is not fill complete.");
1618 TEUCHOS_TEST_FOR_EXCEPTION(
1619 NumSweeps_ < 0, std::invalid_argument,
1620 prefix << "The number of sweeps must be nonnegative, "
1621 "but you provided numSweeps = "
1622 << NumSweeps_ << " < 0.");
1623
1624 if (NumSweeps_ == 0) {
1625 return;
1626 }
1627
1628 RCP<const import_type> importer = A.getGraph()->getImporter();
1629
1630 RCP<const map_type> domainMap = A.getDomainMap();
1631 RCP<const map_type> rangeMap = A.getRangeMap();
1632 RCP<const map_type> rowMap = A.getGraph()->getRowMap();
1633 RCP<const map_type> colMap = A.getGraph()->getColMap();
1634
1635#ifdef HAVE_IFPACK2_DEBUG
1636 {
1637 // The relation 'isSameAs' is transitive. It's also a
1638 // collective, so we don't have to do a "shared" test for
1639 // exception (i.e., a global reduction on the test value).
1640 TEUCHOS_TEST_FOR_EXCEPTION(
1641 !X.getMap()->isSameAs(*domainMap), std::runtime_error,
1642 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1643 "multivector X be in the domain Map of the matrix.");
1644 TEUCHOS_TEST_FOR_EXCEPTION(
1645 !B.getMap()->isSameAs(*rangeMap), std::runtime_error,
1646 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1647 "B be in the range Map of the matrix.");
1648 TEUCHOS_TEST_FOR_EXCEPTION(
1649 !Diagonal_->getMap()->isSameAs(*rowMap), std::runtime_error,
1650 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1651 "D be in the row Map of the matrix.");
1652 TEUCHOS_TEST_FOR_EXCEPTION(
1653 !rowMap->isSameAs(*rangeMap), std::runtime_error,
1654 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
1655 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1656 TEUCHOS_TEST_FOR_EXCEPTION(
1657 !domainMap->isSameAs(*rangeMap), std::runtime_error,
1658 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
1659 "the range Map of the matrix be the same.");
1660 }
1661#endif
1662
1663 // Fetch a (possibly cached) temporary column Map multivector
1664 // X_colMap, and a domain Map view X_domainMap of it. Both have
1665 // constant stride by construction. We know that the domain Map
1666 // must include the column Map, because our Gauss-Seidel kernel
1667 // requires that the row Map, domain Map, and range Map are all
1668 // the same, and that each process owns all of its own diagonal
1669 // entries of the matrix.
1670
1671 RCP<multivector_type> X_colMap;
1672 RCP<multivector_type> X_domainMap;
1673 bool copyBackOutput = false;
1674 if (importer.is_null()) {
1675 X_colMap = Teuchos::rcpFromRef(X);
1676 X_domainMap = Teuchos::rcpFromRef(X);
1677 // Column Map and domain Map are the same, so there are no
1678 // remote entries. Thus, if we are not setting the initial
1679 // guess to zero, we don't have to worry about setting remote
1680 // entries to zero, even though we are not doing an Import in
1681 // this case.
1682 if (ZeroStartingSolution_) {
1683 X_colMap->putScalar(ZERO);
1684 }
1685 // No need to copy back to X at end.
1686 } else { // Column Map and domain Map are _not_ the same.
1687 updateCachedMultiVector(colMap, X.getNumVectors());
1688 X_colMap = cachedMV_;
1689 X_domainMap = X_colMap->offsetViewNonConst(domainMap, 0);
1690
1691 if (ZeroStartingSolution_) {
1692 // No need for an Import, since we're filling with zeros.
1693 X_colMap->putScalar(ZERO);
1694 } else {
1695 // We could just copy X into X_domainMap. However, that
1696 // wastes a copy, because the Import also does a copy (plus
1697 // communication). Since the typical use case for
1698 // Gauss-Seidel is a small number of sweeps (2 is typical), we
1699 // don't want to waste that copy. Thus, we do the Import
1700 // here, and skip the first Import in the first sweep.
1701 // Importing directly from X effects the copy into X_domainMap
1702 // (which is a view of X_colMap).
1703 X_colMap->doImport(X, *importer, Tpetra::INSERT);
1704 }
1705 copyBackOutput = true; // Don't forget to copy back at end.
1706 } // if column and domain Maps are (not) the same
1707
1708 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1709 if (!importer.is_null() && sweep > 0) {
1710 // We already did the first Import for the zeroth sweep above,
1711 // if it was necessary.
1712 X_colMap->doImport(*X_domainMap, *importer, Tpetra::INSERT);
1713 }
1714 // Do local Gauss-Seidel (forward, backward or symmetric)
1715 serialGaussSeidel_->apply(*X_colMap, B, direction);
1716 }
1717
1718 if (copyBackOutput) {
1719 try {
1720 deep_copy(X, *X_domainMap); // Copy result back into X.
1721 } catch (std::exception& e) {
1722 TEUCHOS_TEST_FOR_EXCEPTION(
1723 true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
1724 "threw an exception: "
1725 << e.what());
1726 }
1727 }
1728
1729 // For each column of output, for each sweep over the matrix:
1730 //
1731 // - One + and one * for each matrix entry
1732 // - One / and one + for each row of the matrix
1733 // - If the damping factor is not one: one * for each row of the
1734 // matrix. (It's not fair to count this if the damping factor is
1735 // one, since the implementation could skip it. Whether it does
1736 // or not is the implementation's choice.)
1737
1738 // Floating-point operations due to the damping factor, per matrix
1739 // row, per direction, per columm of output.
1740 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1741 const double numVectors = X.getNumVectors();
1742 const double numGlobalRows = A_->getGlobalNumRows();
1743 const double numGlobalNonzeros = A_->getGlobalNumEntries();
1744 ApplyFlops_ += NumSweeps_ * numVectors *
1745 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1746}
1747
1748template <class MatrixType>
1749void Relaxation<MatrixType>::
1750 ApplyInverseSerialGS_BlockCrsMatrix(const block_crs_matrix_type& A,
1751 const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1752 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1753 Tpetra::ESweepDirection direction) {
1754 using Teuchos::RCP;
1755 using Teuchos::rcp;
1756 using Teuchos::rcpFromRef;
1757 using Tpetra::INSERT;
1758
1759 // FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
1760 //
1761 // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
1762 // multiple right-hand sides, unless the input or output MultiVector
1763 // does not have constant stride. We should check for that case
1764 // here, in case it doesn't work in localGaussSeidel (which is
1765 // entirely possible).
1766 block_multivector_type yBlock(Y, *(A.getGraph()->getDomainMap()), A.getBlockSize());
1767 const block_multivector_type xBlock(X, *(A.getColMap()), A.getBlockSize());
1768
1769 bool performImport = false;
1770 RCP<block_multivector_type> yBlockCol;
1771 if (Importer_.is_null()) {
1772 yBlockCol = rcpFromRef(yBlock);
1773 } else {
1774 if (yBlockColumnPointMap_.is_null() ||
1775 yBlockColumnPointMap_->getNumVectors() != yBlock.getNumVectors() ||
1776 yBlockColumnPointMap_->getBlockSize() != yBlock.getBlockSize()) {
1777 yBlockColumnPointMap_ =
1778 rcp(new block_multivector_type(*(A.getColMap()), A.getBlockSize(),
1779 static_cast<local_ordinal_type>(yBlock.getNumVectors())));
1780 }
1781 yBlockCol = yBlockColumnPointMap_;
1782 if (pointImporter_.is_null()) {
1783 auto srcMap = rcp(new map_type(yBlock.getPointMap()));
1784 auto tgtMap = rcp(new map_type(yBlockCol->getPointMap()));
1785 pointImporter_ = rcp(new import_type(srcMap, tgtMap));
1786 }
1787 performImport = true;
1788 }
1789
1790 multivector_type yBlock_mv;
1791 multivector_type yBlockCol_mv;
1792 RCP<const multivector_type> yBlockColPointDomain;
1793 if (performImport) { // create views (shallow copies)
1794 yBlock_mv = yBlock.getMultiVectorView();
1795 yBlockCol_mv = yBlockCol->getMultiVectorView();
1796 yBlockColPointDomain =
1797 yBlockCol_mv.offsetView(A.getDomainMap(), 0);
1798 }
1799
1800 if (ZeroStartingSolution_) {
1801 yBlockCol->putScalar(STS::zero());
1802 } else if (performImport) {
1803 yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1804 }
1805
1806 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1807 if (performImport && sweep > 0) {
1808 yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1809 }
1810 serialGaussSeidel_->applyBlock(*yBlockCol, xBlock, direction);
1811 if (performImport) {
1812 Tpetra::deep_copy(Y, *yBlockColPointDomain);
1813 }
1814 }
1815}
1816
1817template <class MatrixType>
1818void Relaxation<MatrixType>::
1819 ApplyInverseMTGS_CrsMatrix(
1820 const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& B,
1821 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1822 Tpetra::ESweepDirection direction) const {
1823 using Teuchos::as;
1824 using Teuchos::null;
1825 using Teuchos::RCP;
1826 using Teuchos::rcp;
1827 using Teuchos::rcp_const_cast;
1828 using Teuchos::rcpFromRef;
1829
1830 typedef scalar_type Scalar;
1831
1832 const char prefix[] = "Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1833 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1834
1835 auto crsMat = Details::getCrsMatrix(A_);
1836 TEUCHOS_TEST_FOR_EXCEPTION(crsMat.is_null(), std::logic_error,
1837 "Ifpack2::Relaxation::apply: "
1838 "Multithreaded Gauss-Seidel methods currently only work when the "
1839 "input matrix is a Tpetra::CrsMatrix.");
1840
1841 // Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
1842 TEUCHOS_TEST_FOR_EXCEPTION(!localSmoothingIndices_.is_null(), std::logic_error,
1843 "Ifpack2's implementation of Multithreaded Gauss-Seidel does not "
1844 "implement the case where the user supplies an iteration order. "
1845 "This error used to appear as \"MT GaussSeidel ignores the given "
1846 "order\". "
1847 "I tried to add more explanation, but I didn't implement \"MT "
1848 "GaussSeidel\" [sic]. "
1849 "You'll have to ask the person who did.");
1850
1851 TEUCHOS_TEST_FOR_EXCEPTION(!crsMat->isFillComplete(), std::runtime_error, prefix << "The "
1852 "input CrsMatrix is not fill complete. Please call fillComplete "
1853 "on the matrix, then do setup again, before calling apply(). "
1854 "\"Do setup\" means that if only the matrix's values have changed "
1855 "since last setup, you need only call compute(). If the matrix's "
1856 "structure may also have changed, you must first call initialize(), "
1857 "then call compute(). If you have not set up this preconditioner "
1858 "for this matrix before, you must first call initialize(), then "
1859 "call compute().");
1860 TEUCHOS_TEST_FOR_EXCEPTION(NumSweeps_ < 0, std::logic_error, prefix << ": NumSweeps_ = " << NumSweeps_ << " < 0. Please report this bug to the Ifpack2 "
1861 "developers.");
1862 if (NumSweeps_ == 0) {
1863 return;
1864 }
1865
1866 RCP<const import_type> importer = crsMat->getGraph()->getImporter();
1867 TEUCHOS_TEST_FOR_EXCEPTION(
1868 !crsMat->getGraph()->getExporter().is_null(), std::runtime_error,
1869 "This method's implementation currently requires that the matrix's row, "
1870 "domain, and range Maps be the same. This cannot be the case, because "
1871 "the matrix has a nontrivial Export object.");
1872
1873 RCP<const map_type> domainMap = crsMat->getDomainMap();
1874 RCP<const map_type> rangeMap = crsMat->getRangeMap();
1875 RCP<const map_type> rowMap = crsMat->getGraph()->getRowMap();
1876 RCP<const map_type> colMap = crsMat->getGraph()->getColMap();
1877
1878#ifdef HAVE_IFPACK2_DEBUG
1879 {
1880 // The relation 'isSameAs' is transitive. It's also a
1881 // collective, so we don't have to do a "shared" test for
1882 // exception (i.e., a global reduction on the test value).
1883 TEUCHOS_TEST_FOR_EXCEPTION(
1884 !X.getMap()->isSameAs(*domainMap), std::runtime_error,
1885 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1886 "multivector X be in the domain Map of the matrix.");
1887 TEUCHOS_TEST_FOR_EXCEPTION(
1888 !B.getMap()->isSameAs(*rangeMap), std::runtime_error,
1889 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1890 "B be in the range Map of the matrix.");
1891 TEUCHOS_TEST_FOR_EXCEPTION(
1892 !rowMap->isSameAs(*rangeMap), std::runtime_error,
1893 "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1894 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1895 TEUCHOS_TEST_FOR_EXCEPTION(
1896 !domainMap->isSameAs(*rangeMap), std::runtime_error,
1897 "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1898 "the range Map of the matrix be the same.");
1899 }
1900#else
1901 // Forestall any compiler warnings for unused variables.
1902 (void)rangeMap;
1903 (void)rowMap;
1904#endif // HAVE_IFPACK2_DEBUG
1905
1906 // Fetch a (possibly cached) temporary column Map multivector
1907 // X_colMap, and a domain Map view X_domainMap of it. Both have
1908 // constant stride by construction. We know that the domain Map
1909 // must include the column Map, because our Gauss-Seidel kernel
1910 // requires that the row Map, domain Map, and range Map are all
1911 // the same, and that each process owns all of its own diagonal
1912 // entries of the matrix.
1913
1914 RCP<multivector_type> X_colMap;
1915 RCP<multivector_type> X_domainMap;
1916 bool copyBackOutput = false;
1917 if (importer.is_null()) {
1918 if (X.isConstantStride()) {
1919 X_colMap = rcpFromRef(X);
1920 X_domainMap = rcpFromRef(X);
1921
1922 // Column Map and domain Map are the same, so there are no
1923 // remote entries. Thus, if we are not setting the initial
1924 // guess to zero, we don't have to worry about setting remote
1925 // entries to zero, even though we are not doing an Import in
1926 // this case.
1927 if (ZeroStartingSolution_) {
1928 X_colMap->putScalar(ZERO);
1929 }
1930 // No need to copy back to X at end.
1931 } else {
1932 // We must copy X into a constant stride multivector.
1933 // Just use the cached column Map multivector for that.
1934 // force=true means fill with zeros, so no need to fill
1935 // remote entries (not in domain Map) with zeros.
1936 updateCachedMultiVector(colMap, as<size_t>(X.getNumVectors()));
1937 X_colMap = cachedMV_;
1938 // X_domainMap is always a domain Map view of the column Map
1939 // multivector. In this case, the domain and column Maps are
1940 // the same, so X_domainMap _is_ X_colMap.
1941 X_domainMap = X_colMap;
1942 if (!ZeroStartingSolution_) { // Don't copy if zero initial guess
1943 try {
1944 deep_copy(*X_domainMap, X); // Copy X into constant stride MV
1945 } catch (std::exception& e) {
1946 std::ostringstream os;
1947 os << "Ifpack2::Relaxation::MTGaussSeidel: "
1948 "deep_copy(*X_domainMap, X) threw an exception: "
1949 << e.what() << ".";
1950 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what());
1951 }
1952 }
1953 copyBackOutput = true; // Don't forget to copy back at end.
1954 /*
1955 TPETRA_EFFICIENCY_WARNING(
1956 ! X.isConstantStride (),
1957 std::runtime_error,
1958 "MTGaussSeidel: The current implementation of the Gauss-Seidel "
1959 "kernel requires that X and B both have constant stride. Since X "
1960 "does not have constant stride, we had to make a copy. This is a "
1961 "limitation of the current implementation and not your fault, but we "
1962 "still report it as an efficiency warning for your information.");
1963 */
1964 }
1965 } else { // Column Map and domain Map are _not_ the same.
1966 updateCachedMultiVector(colMap, as<size_t>(X.getNumVectors()));
1967 X_colMap = cachedMV_;
1968
1969 X_domainMap = X_colMap->offsetViewNonConst(domainMap, 0);
1970
1971 if (ZeroStartingSolution_) {
1972 // No need for an Import, since we're filling with zeros.
1973 X_colMap->putScalar(ZERO);
1974 } else {
1975 // We could just copy X into X_domainMap. However, that
1976 // wastes a copy, because the Import also does a copy (plus
1977 // communication). Since the typical use case for
1978 // Gauss-Seidel is a small number of sweeps (2 is typical), we
1979 // don't want to waste that copy. Thus, we do the Import
1980 // here, and skip the first Import in the first sweep.
1981 // Importing directly from X effects the copy into X_domainMap
1982 // (which is a view of X_colMap).
1983 X_colMap->doImport(X, *importer, Tpetra::CombineMode::INSERT);
1984 }
1985 copyBackOutput = true; // Don't forget to copy back at end.
1986 } // if column and domain Maps are (not) the same
1987
1988 // The Gauss-Seidel / SOR kernel expects multivectors of constant
1989 // stride. X_colMap is by construction, but B might not be. If
1990 // it's not, we have to make a copy.
1991 RCP<const multivector_type> B_in;
1992 if (B.isConstantStride()) {
1993 B_in = rcpFromRef(B);
1994 } else {
1995 // Range Map and row Map are the same in this case, so we can
1996 // use the cached row Map multivector to store a constant stride
1997 // copy of B.
1998 RCP<multivector_type> B_in_nonconst = rcp(new multivector_type(rowMap, B.getNumVectors()));
1999 try {
2000 deep_copy(*B_in_nonconst, B);
2001 } catch (std::exception& e) {
2002 std::ostringstream os;
2003 os << "Ifpack2::Relaxation::MTGaussSeidel: "
2004 "deep_copy(*B_in_nonconst, B) threw an exception: "
2005 << e.what() << ".";
2006 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what());
2007 }
2008 B_in = rcp_const_cast<const multivector_type>(B_in_nonconst);
2009
2010 /*
2011 TPETRA_EFFICIENCY_WARNING(
2012 ! B.isConstantStride (),
2013 std::runtime_error,
2014 "MTGaussSeidel: The current implementation requires that B have "
2015 "constant stride. Since B does not have constant stride, we had to "
2016 "copy it into a separate constant-stride multivector. This is a "
2017 "limitation of the current implementation and not your fault, but we "
2018 "still report it as an efficiency warning for your information.");
2019 */
2020 }
2021
2022 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice();
2023
2024 bool update_y_vector = true;
2025 // false as it was done up already, and we dont want to zero it in each sweep.
2026 bool zero_x_vector = false;
2027
2028 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2029 if (!importer.is_null() && sweep > 0) {
2030 // We already did the first Import for the zeroth sweep above,
2031 // if it was necessary.
2032 X_colMap->doImport(*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
2033 }
2034
2035 if (direction == Tpetra::Symmetric) {
2036 KokkosSparse::symmetric_gauss_seidel_apply(mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2037 kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2038 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2039 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2040 zero_x_vector, update_y_vector, DampingFactor_, 1);
2041 } else if (direction == Tpetra::Forward) {
2042 KokkosSparse::forward_sweep_gauss_seidel_apply(mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2043 kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2044 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2045 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2046 zero_x_vector, update_y_vector, DampingFactor_, 1);
2047 } else if (direction == Tpetra::Backward) {
2048 KokkosSparse::backward_sweep_gauss_seidel_apply(mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2049 kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2050 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2051 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2052 zero_x_vector, update_y_vector, DampingFactor_, 1);
2053 } else {
2054 TEUCHOS_TEST_FOR_EXCEPTION(
2055 true, std::invalid_argument,
2056 prefix << "The 'direction' enum does not have any of its valid "
2057 "values: Forward, Backward, or Symmetric.");
2058 }
2059 update_y_vector = false;
2060 }
2061
2062 if (copyBackOutput) {
2063 try {
2064 deep_copy(X, *X_domainMap); // Copy result back into X.
2065 } catch (std::exception& e) {
2066 TEUCHOS_TEST_FOR_EXCEPTION(
2067 true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
2068 "threw an exception: "
2069 << e.what());
2070 }
2071 }
2072
2073 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2074 const double numVectors = as<double>(X.getNumVectors());
2075 const double numGlobalRows = as<double>(A_->getGlobalNumRows());
2076 const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
2077 double ApplyFlops = NumSweeps_ * numVectors *
2078 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2079 if (direction == Tpetra::Symmetric)
2080 ApplyFlops *= 2.0;
2081 ApplyFlops_ += ApplyFlops;
2082}
2083
2084template <class MatrixType>
2086 std::ostringstream os;
2087
2088 // Output is a valid YAML dictionary in flow style. If you don't
2089 // like everything on a single line, you should call describe()
2090 // instead.
2091 os << "\"Ifpack2::Relaxation\": {";
2092
2093 os << "Initialized: " << (isInitialized() ? "true" : "false") << ", "
2094 << "Computed: " << (isComputed() ? "true" : "false") << ", ";
2095
2096 // It's useful to print this instance's relaxation method (Jacobi,
2097 // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info
2098 // than that, call describe() instead.
2099 os << "Type: ";
2100 if (PrecType_ == Ifpack2::Details::JACOBI) {
2101 os << "Jacobi";
2102 } else if (PrecType_ == Ifpack2::Details::GS) {
2103 os << "Gauss-Seidel";
2104 } else if (PrecType_ == Ifpack2::Details::SGS) {
2105 os << "Symmetric Gauss-Seidel";
2106 } else if (PrecType_ == Ifpack2::Details::MTGS) {
2107 os << "MT Gauss-Seidel";
2108 } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2109 os << "MT Symmetric Gauss-Seidel";
2110 } else if (PrecType_ == Ifpack2::Details::GS2) {
2111 os << "Two-stage Gauss-Seidel";
2112 } else if (PrecType_ == Ifpack2::Details::SGS2) {
2113 os << "Two-stage Symmetric Gauss-Seidel";
2114 } else {
2115 os << "INVALID";
2116 }
2117 if (hasBlockCrsMatrix_)
2118 os << ", BlockCrs";
2119
2120 os << ", "
2121 << "sweeps: " << NumSweeps_ << ", "
2122 << "damping factor: " << DampingFactor_ << ", ";
2123
2124 if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2125 os << "\"relaxation: mtgs cluster size\": " << clusterSize_ << ", ";
2126 os << "\"relaxation: long row threshold\": " << longRowThreshold_ << ", ";
2127 os << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << ", ";
2128 os << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2129 switch (mtColoringAlgorithm_) {
2130 case KokkosGraph::COLORING_DEFAULT:
2131 os << "DEFAULT";
2132 break;
2133 case KokkosGraph::COLORING_SERIAL:
2134 os << "SERIAL";
2135 break;
2136 case KokkosGraph::COLORING_VB:
2137 os << "VB";
2138 break;
2139 case KokkosGraph::COLORING_VBBIT:
2140 os << "VBBIT";
2141 break;
2142 case KokkosGraph::COLORING_VBCS:
2143 os << "VBCS";
2144 break;
2145 case KokkosGraph::COLORING_VBD:
2146 os << "VBD";
2147 break;
2148 case KokkosGraph::COLORING_VBDBIT:
2149 os << "VBDBIT";
2150 break;
2151 case KokkosGraph::COLORING_EB:
2152 os << "EB";
2153 break;
2154 default:
2155 os << "*Invalid*";
2156 }
2157 os << ", ";
2158 }
2159
2160 if (PrecType_ == Ifpack2::Details::GS2 ||
2161 PrecType_ == Ifpack2::Details::SGS2) {
2162 os << "outer sweeps: " << NumOuterSweeps_ << ", "
2163 << "inner sweeps: " << NumInnerSweeps_ << ", "
2164 << "inner damping factor: " << InnerDampingFactor_ << ", ";
2165 }
2166
2167 if (DoL1Method_) {
2168 os << "use l1: " << DoL1Method_ << ", "
2169 << "l1 eta: " << L1Eta_ << ", ";
2170 }
2171
2172 if (A_.is_null()) {
2173 os << "Matrix: null";
2174 } else {
2175 os << "Global matrix dimensions: ["
2176 << A_->getGlobalNumRows() << ", " << A_->getGlobalNumCols() << "]"
2177 << ", Global nnz: " << A_->getGlobalNumEntries();
2178 }
2179
2180 os << "}";
2181 return os.str();
2182}
2183
2184template <class MatrixType>
2186 describe(Teuchos::FancyOStream& out,
2187 const Teuchos::EVerbosityLevel verbLevel) const {
2188 using std::endl;
2189 using Teuchos::OSTab;
2190 using Teuchos::TypeNameTraits;
2191 using Teuchos::VERB_DEFAULT;
2192 using Teuchos::VERB_EXTREME;
2193 using Teuchos::VERB_HIGH;
2194 using Teuchos::VERB_LOW;
2195 using Teuchos::VERB_MEDIUM;
2196 using Teuchos::VERB_NONE;
2197
2198 const Teuchos::EVerbosityLevel vl =
2199 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2200
2201 const int myRank = this->getComm()->getRank();
2202
2203 // none: print nothing
2204 // low: print O(1) info from Proc 0
2205 // medium:
2206 // high:
2207 // extreme:
2208
2209 if (vl != VERB_NONE && myRank == 0) {
2210 // Describable interface asks each implementation to start with a tab.
2211 OSTab tab1(out);
2212 // Output is valid YAML; hence the quotes, to protect the colons.
2213 out << "\"Ifpack2::Relaxation\":" << endl;
2214 OSTab tab2(out);
2215 out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name() << "\""
2216 << endl;
2217 if (this->getObjectLabel() != "") {
2218 out << "Label: " << this->getObjectLabel() << endl;
2219 }
2220 out << "Initialized: " << (isInitialized() ? "true" : "false") << endl
2221 << "Computed: " << (isComputed() ? "true" : "false") << endl
2222 << "Parameters: " << endl;
2223 {
2224 OSTab tab3(out);
2225 out << "\"relaxation: type\": ";
2226 if (PrecType_ == Ifpack2::Details::JACOBI) {
2227 out << "Jacobi";
2228 } else if (PrecType_ == Ifpack2::Details::GS) {
2229 out << "Gauss-Seidel";
2230 } else if (PrecType_ == Ifpack2::Details::SGS) {
2231 out << "Symmetric Gauss-Seidel";
2232 } else if (PrecType_ == Ifpack2::Details::MTGS) {
2233 out << "MT Gauss-Seidel";
2234 } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2235 out << "MT Symmetric Gauss-Seidel";
2236 } else if (PrecType_ == Ifpack2::Details::GS2) {
2237 out << "Two-stage Gauss-Seidel";
2238 } else if (PrecType_ == Ifpack2::Details::SGS2) {
2239 out << "Two-stage Symmetric Gauss-Seidel";
2240 } else {
2241 out << "INVALID";
2242 }
2243 // We quote these parameter names because they contain colons.
2244 // YAML uses the colon to distinguish key from value.
2245 out << endl
2246 << "\"relaxation: sweeps\": " << NumSweeps_ << endl
2247 << "\"relaxation: damping factor\": " << DampingFactor_ << endl
2248 << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2249 << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2250 << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2251 << "\"relaxation: use l1\": " << DoL1Method_ << endl
2252 << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
2253 if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2254 out << "\"relaxation: mtgs cluster size\": " << clusterSize_ << endl;
2255 out << "\"relaxation: long row threshold\": " << longRowThreshold_ << endl;
2256 out << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << endl;
2257 out << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2258 switch (mtColoringAlgorithm_) {
2259 case KokkosGraph::COLORING_DEFAULT:
2260 out << "DEFAULT";
2261 break;
2262 case KokkosGraph::COLORING_SERIAL:
2263 out << "SERIAL";
2264 break;
2265 case KokkosGraph::COLORING_VB:
2266 out << "VB";
2267 break;
2268 case KokkosGraph::COLORING_VBBIT:
2269 out << "VBBIT";
2270 break;
2271 case KokkosGraph::COLORING_VBCS:
2272 out << "VBCS";
2273 break;
2274 case KokkosGraph::COLORING_VBD:
2275 out << "VBD";
2276 break;
2277 case KokkosGraph::COLORING_VBDBIT:
2278 out << "VBDBIT";
2279 break;
2280 case KokkosGraph::COLORING_EB:
2281 out << "EB";
2282 break;
2283 default:
2284 out << "*Invalid*";
2285 }
2286 out << endl;
2287 }
2288 if (PrecType_ == Ifpack2::Details::GS2 || PrecType_ == Ifpack2::Details::SGS2) {
2289 out << "\"relaxation: inner damping factor\": " << InnerDampingFactor_ << endl;
2290 out << "\"relaxation: outer sweeps\" : " << NumOuterSweeps_ << endl;
2291 out << "\"relaxation: inner sweeps\" : " << NumInnerSweeps_ << endl;
2292 }
2293 }
2294 out << "Computed quantities:" << endl;
2295 {
2296 OSTab tab3(out);
2297 out << "Global number of rows: " << A_->getGlobalNumRows() << endl
2298 << "Global number of columns: " << A_->getGlobalNumCols() << endl;
2299 }
2300 if (checkDiagEntries_ && isComputed()) {
2301 out << "Properties of input diagonal entries:" << endl;
2302 {
2303 OSTab tab3(out);
2304 out << "Magnitude of minimum-magnitude diagonal entry: "
2305 << globalMinMagDiagEntryMag_ << endl
2306 << "Magnitude of maximum-magnitude diagonal entry: "
2307 << globalMaxMagDiagEntryMag_ << endl
2308 << "Number of diagonal entries with small magnitude: "
2309 << globalNumSmallDiagEntries_ << endl
2310 << "Number of zero diagonal entries: "
2311 << globalNumZeroDiagEntries_ << endl
2312 << "Number of diagonal entries with negative real part: "
2313 << globalNumNegDiagEntries_ << endl
2314 << "Abs 2-norm diff between computed and actual inverse "
2315 << "diagonal: " << globalDiagNormDiff_ << endl;
2316 }
2317 }
2318 if (isComputed()) {
2319 out << "Saved diagonal offsets: "
2320 << (savedDiagOffsets_ ? "true" : "false") << endl;
2321 }
2322 out << "Call counts and total times (in seconds): " << endl;
2323 {
2324 OSTab tab3(out);
2325 out << "initialize: " << endl;
2326 {
2327 OSTab tab4(out);
2328 out << "Call count: " << NumInitialize_ << endl;
2329 out << "Total time: " << InitializeTime_ << endl;
2330 }
2331 out << "compute: " << endl;
2332 {
2333 OSTab tab4(out);
2334 out << "Call count: " << NumCompute_ << endl;
2335 out << "Total time: " << ComputeTime_ << endl;
2336 }
2337 out << "apply: " << endl;
2338 {
2339 OSTab tab4(out);
2340 out << "Call count: " << NumApply_ << endl;
2341 out << "Total time: " << ApplyTime_ << endl;
2342 }
2343 }
2344 }
2345}
2346
2347} // namespace Ifpack2
2348
2349#define IFPACK2_RELAXATION_INSTANT(S, LO, GO, N) \
2350 template class Ifpack2::Relaxation<Tpetra::RowMatrix<S, LO, GO, N>>;
2351
2352#endif // IFPACK2_RELAXATION_DEF_HPP
File for utility functions.
Ifpack2's implementation of Trilinos::Details::LinearSolver interface.
Definition Ifpack2_Details_LinearSolver_decl.hpp:75
Diagonal preconditioner.
Definition Ifpack2_Diagonal_decl.hpp:45
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition Ifpack2_Relaxation_decl.hpp:215
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition Ifpack2_Relaxation_def.hpp:182
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:227
void setParameters(const Teuchos::ParameterList &params)
Set the relaxation / preconditioner parameters.
Definition Ifpack2_Relaxation_def.hpp:434
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:230
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition Ifpack2_Relaxation_def.hpp:442
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition Ifpack2_Relaxation_def.hpp:461
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition Ifpack2_Relaxation_def.hpp:514
int getNumCompute() const
Total number of calls to compute().
Definition Ifpack2_Relaxation_def.hpp:494
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition Ifpack2_Relaxation_def.hpp:453
void compute()
Compute the preconditioner ("numeric setup");.
Definition Ifpack2_Relaxation_def.hpp:960
void initialize()
Initialize the preconditioner ("symbolic setup").
Definition Ifpack2_Relaxation_def.hpp:674
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition Ifpack2_Relaxation_def.hpp:509
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object's attributes to the given output stream.
Definition Ifpack2_Relaxation_def.hpp:2186
std::string description() const
A simple one-line description of this object.
Definition Ifpack2_Relaxation_def.hpp:2085
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition Ifpack2_Relaxation_def.hpp:191
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition Ifpack2_Relaxation_def.hpp:504
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition Ifpack2_Relaxation_def.hpp:162
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:221
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition Ifpack2_Relaxation_def.hpp:541
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:224
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the input matrix to X, returning the result in Y.
Definition Ifpack2_Relaxation_def.hpp:653
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition Ifpack2_Relaxation_decl.hpp:241
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition Ifpack2_Relaxation_decl.hpp:236
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition Ifpack2_Relaxation_def.hpp:529
int getNumApply() const
Total number of calls to apply().
Definition Ifpack2_Relaxation_def.hpp:499
int getNumInitialize() const
Total number of calls to initialize().
Definition Ifpack2_Relaxation_def.hpp:489
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition Ifpack2_Relaxation_def.hpp:519
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition Ifpack2_Relaxation_def.hpp:484
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition Ifpack2_Relaxation_def.hpp:474
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition Ifpack2_Relaxation_def.hpp:524
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40
void getValidParameters(Teuchos::ParameterList &params)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition Ifpack2_Parameters.cpp:18