Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Relaxation_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_RELAXATION_DEF_HPP
11#define IFPACK2_RELAXATION_DEF_HPP
12
13#include "Teuchos_StandardParameterEntryValidators.hpp"
14#include "Teuchos_TimeMonitor.hpp"
15#include "Tpetra_CrsMatrix.hpp"
16#include "Tpetra_BlockCrsMatrix.hpp"
17#include "Tpetra_BlockView.hpp"
18#include "Ifpack2_Utilities.hpp"
19#include "Ifpack2_Details_getCrsMatrix.hpp"
20#include "MatrixMarket_Tpetra.hpp"
21#include "Tpetra_Details_residual.hpp"
22#include <cstdlib>
23#include <memory>
24#include <sstream>
25#include "KokkosSparse_gauss_seidel.hpp"
26
27namespace {
28// Validate that a given int is nonnegative.
29class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
30 public:
31 // Constructor (does nothing).
32 NonnegativeIntValidator() {}
33
34 // ParameterEntryValidator wants this method.
35 Teuchos::ParameterEntryValidator::ValidStringsList validStringValues() const {
36 return Teuchos::null;
37 }
38
39 // Actually validate the parameter's value.
40 void
41 validate(const Teuchos::ParameterEntry& entry,
42 const std::string& paramName,
43 const std::string& sublistName) const {
44 using std::endl;
45 Teuchos::any anyVal = entry.getAny(true);
46 const std::string entryName = entry.getAny(false).typeName();
47
48 TEUCHOS_TEST_FOR_EXCEPTION(
49 anyVal.type() != typeid(int),
50 Teuchos::Exceptions::InvalidParameterType,
51 "Parameter \"" << paramName << "\" in sublist \"" << sublistName
52 << "\" has the wrong type." << endl
53 << "Parameter: " << paramName
54 << endl
55 << "Type specified: " << entryName << endl
56 << "Type required: int" << endl);
57
58 const int val = Teuchos::any_cast<int>(anyVal);
59 TEUCHOS_TEST_FOR_EXCEPTION(
60 val < 0, Teuchos::Exceptions::InvalidParameterValue,
61 "Parameter \"" << paramName << "\" in sublist \"" << sublistName
62 << "\" is negative." << endl
63 << "Parameter: " << paramName
64 << endl
65 << "Value specified: " << val << endl
66 << "Required range: [0, INT_MAX]" << endl);
67 }
68
69 // ParameterEntryValidator wants this method.
70 const std::string getXMLTypeName() const {
71 return "NonnegativeIntValidator";
72 }
73
74 // ParameterEntryValidator wants this method.
75 void
76 printDoc(const std::string& docString,
77 std::ostream& out) const {
78 Teuchos::StrUtils::printLines(out, "# ", docString);
79 out << "#\tValidator Used: " << std::endl;
80 out << "#\t\tNonnegativeIntValidator" << std::endl;
81 }
82};
83
84// A way to get a small positive number (eps() for floating-point
85// types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
86// define it (for example, for integer values).
87template <class Scalar, const bool isOrdinal = Teuchos::ScalarTraits<Scalar>::isOrdinal>
88class SmallTraits {
89 public:
90 // Return eps if Scalar is a floating-point type, else return 1.
91 static const Scalar eps();
92};
93
94// Partial specialization for when Scalar is not a floating-point type.
95template <class Scalar>
96class SmallTraits<Scalar, true> {
97 public:
98 static const Scalar eps() {
99 return Teuchos::ScalarTraits<Scalar>::one();
100 }
101};
102
103// Partial specialization for when Scalar is a floating-point type.
104template <class Scalar>
105class SmallTraits<Scalar, false> {
106 public:
107 static const Scalar eps() {
108 return Teuchos::ScalarTraits<Scalar>::eps();
109 }
110};
111
112// Work-around for GitHub Issue #5269.
113template <class Scalar,
114 const bool isComplex = Teuchos::ScalarTraits<Scalar>::isComplex>
115struct RealTraits {};
116
117template <class Scalar>
118struct RealTraits<Scalar, false> {
119 using val_type = Scalar;
120 using mag_type = Scalar;
121 static KOKKOS_INLINE_FUNCTION mag_type real(const val_type& z) {
122 return z;
123 }
124};
125
126template <class Scalar>
127struct RealTraits<Scalar, true> {
128 using val_type = Scalar;
129 using mag_type = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
130 static KOKKOS_INLINE_FUNCTION mag_type real(const val_type& z) {
131#if KOKKOS_VERSION >= 40799
132 return KokkosKernels::ArithTraits<val_type>::real(z);
133#else
134 return Kokkos::ArithTraits<val_type>::real(z);
135#endif
136 }
137};
138
139template <class Scalar>
140KOKKOS_INLINE_FUNCTION typename RealTraits<Scalar>::mag_type
141getRealValue(const Scalar& z) {
142 return RealTraits<Scalar>::real(z);
143}
144
145} // namespace
146
147namespace Ifpack2 {
148
149template <class MatrixType>
150void Relaxation<MatrixType>::
151 updateCachedMultiVector(const Teuchos::RCP<const Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>>& map,
152 size_t numVecs) const {
153 // Allocate a multivector if the cached one isn't perfect. Checking
154 // for map pointer equality is much cheaper than Map::isSameAs.
155 if (cachedMV_.is_null() ||
156 map.get() != cachedMV_->getMap().get() ||
157 cachedMV_->getNumVectors() != numVecs) {
158 using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
159 global_ordinal_type, node_type>;
160 cachedMV_ = Teuchos::rcp(new MV(map, numVecs, false));
161 }
162}
163
164template <class MatrixType>
166 setMatrix(const Teuchos::RCP<const row_matrix_type>& A) {
167 if (A.getRawPtr() != A_.getRawPtr()) { // it's a different matrix
168 Importer_ = Teuchos::null;
169 pointImporter_ = Teuchos::null;
170 Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
171 isInitialized_ = false;
172 IsComputed_ = false;
173 diagOffsets_ = Kokkos::View<size_t*, device_type>();
174 savedDiagOffsets_ = false;
175 hasBlockCrsMatrix_ = false;
176 serialGaussSeidel_ = Teuchos::null;
177 if (!A.is_null()) {
178 IsParallel_ = (A->getRowMap()->getComm()->getSize() > 1);
179 }
180 A_ = A;
181 }
182}
183
184template <class MatrixType>
186 Relaxation(const Teuchos::RCP<const row_matrix_type>& A)
187 : A_(A)
188 , IsParallel_((A.is_null() || A->getRowMap().is_null() || A->getRowMap()->getComm().is_null()) ? false : // a reasonable default if there's no communicator
189 A->getRowMap()->getComm()->getSize() > 1) {
190 this->setObjectLabel("Ifpack2::Relaxation");
191}
192
193template <class MatrixType>
194Teuchos::RCP<const Teuchos::ParameterList>
196 using Teuchos::Array;
197 using Teuchos::ParameterList;
198 using Teuchos::parameterList;
199 using Teuchos::RCP;
200 using Teuchos::rcp;
201 using Teuchos::rcp_const_cast;
202 using Teuchos::rcp_implicit_cast;
203 using Teuchos::setStringToIntegralParameter;
204 typedef Teuchos::ParameterEntryValidator PEV;
205
206 if (validParams_.is_null()) {
207 RCP<ParameterList> pl = parameterList("Ifpack2::Relaxation");
208
209 // Set a validator that automatically converts from the valid
210 // string options to their enum values.
211 Array<std::string> precTypes(8);
212 precTypes[0] = "Jacobi";
213 precTypes[1] = "Gauss-Seidel";
214 precTypes[2] = "Symmetric Gauss-Seidel";
215 precTypes[3] = "MT Gauss-Seidel";
216 precTypes[4] = "MT Symmetric Gauss-Seidel";
217 precTypes[5] = "Richardson";
218 precTypes[6] = "Two-stage Gauss-Seidel";
219 precTypes[7] = "Two-stage Symmetric Gauss-Seidel";
220 Array<Details::RelaxationType> precTypeEnums(8);
221 precTypeEnums[0] = Details::JACOBI;
222 precTypeEnums[1] = Details::GS;
223 precTypeEnums[2] = Details::SGS;
224 precTypeEnums[3] = Details::MTGS;
225 precTypeEnums[4] = Details::MTSGS;
226 precTypeEnums[5] = Details::RICHARDSON;
227 precTypeEnums[6] = Details::GS2;
228 precTypeEnums[7] = Details::SGS2;
229 const std::string defaultPrecType("Jacobi");
230 setStringToIntegralParameter<Details::RelaxationType>("relaxation: type",
231 defaultPrecType, "Relaxation method", precTypes(), precTypeEnums(),
232 pl.getRawPtr());
233
234 const int numSweeps = 1;
235 RCP<PEV> numSweepsValidator =
236 rcp_implicit_cast<PEV>(rcp(new NonnegativeIntValidator));
237 pl->set("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
238 rcp_const_cast<const PEV>(numSweepsValidator));
239
240 // number of 'local' outer sweeps for two-stage GS
241 const int numOuterSweeps = 1;
242 RCP<PEV> numOuterSweepsValidator =
243 rcp_implicit_cast<PEV>(rcp(new NonnegativeIntValidator));
244 pl->set("relaxation: outer sweeps", numOuterSweeps, "Number of outer local relaxation sweeps for two-stage GS",
245 rcp_const_cast<const PEV>(numOuterSweepsValidator));
246 // number of 'local' inner sweeps for two-stage GS
247 const int numInnerSweeps = 1;
248 RCP<PEV> numInnerSweepsValidator =
249 rcp_implicit_cast<PEV>(rcp(new NonnegativeIntValidator));
250 pl->set("relaxation: inner sweeps", numInnerSweeps, "Number of inner local relaxation sweeps for two-stage GS",
251 rcp_const_cast<const PEV>(numInnerSweepsValidator));
252 // specify damping factor for the inner sweeps of two-stage GS
253 const scalar_type innerDampingFactor = STS::one();
254 pl->set("relaxation: inner damping factor", innerDampingFactor, "Damping factor for the inner sweep of two-stage GS");
255 // specify whether to use sptrsv instead of inner-iterations for two-stage GS
256 const bool innerSpTrsv = false;
257 pl->set("relaxation: inner sparse-triangular solve", innerSpTrsv, "Specify whether to use sptrsv instead of JR iterations for two-stage GS");
258 // specify whether to use compact form of recurrence for two-stage GS
259 const bool compactForm = false;
260 pl->set("relaxation: compact form", compactForm, "Specify whether to use compact form of recurrence for two-stage GS");
261
262 const scalar_type dampingFactor = STS::one();
263 pl->set("relaxation: damping factor", dampingFactor);
264
265 const bool zeroStartingSolution = true;
266 pl->set("relaxation: zero starting solution", zeroStartingSolution);
267
268 const bool doBackwardGS = false;
269 pl->set("relaxation: backward mode", doBackwardGS);
270
271 const bool doL1Method = false;
272 pl->set("relaxation: use l1", doL1Method);
273
274 const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
275 (STM::one() + STM::one()); // 1.5
276 pl->set("relaxation: l1 eta", l1eta);
277
278 const scalar_type minDiagonalValue = STS::zero();
279 pl->set("relaxation: min diagonal value", minDiagonalValue);
280
281 const bool fixTinyDiagEntries = false;
282 pl->set("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
283
284 const bool checkDiagEntries = false;
285 pl->set("relaxation: check diagonal entries", checkDiagEntries);
286
287 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
288 pl->set("relaxation: local smoothing indices", localSmoothingIndices);
289
290 const bool is_matrix_structurally_symmetric = false;
291 pl->set("relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
292
293 const bool ifpack2_dump_matrix = false;
294 pl->set("relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
295
296 const int cluster_size = 1;
297 pl->set("relaxation: mtgs cluster size", cluster_size);
298
299 pl->set("relaxation: mtgs coloring algorithm", "Default");
300
301 const int long_row_threshold = 0;
302 pl->set("relaxation: long row threshold", long_row_threshold);
303
304 const bool timer_for_apply = true;
305 pl->set("timer for apply", timer_for_apply);
306
307 validParams_ = rcp_const_cast<const ParameterList>(pl);
308 }
309 return validParams_;
310}
311
312template <class MatrixType>
313void Relaxation<MatrixType>::setParametersImpl(Teuchos::ParameterList& pl) {
314 using Teuchos::getIntegralValue;
315 using Teuchos::getStringValue;
316 using Teuchos::ParameterList;
317 using Teuchos::RCP;
318 typedef scalar_type ST; // just to make code below shorter
319
320 if (pl.isType<double>("relaxation: damping factor")) {
321 // Make sure that ST=complex can run with a damping factor that is
322 // a double.
323 ST df = pl.get<double>("relaxation: damping factor");
324 pl.remove("relaxation: damping factor");
325 pl.set("relaxation: damping factor", df);
326 }
327
328 pl.validateParametersAndSetDefaults(*getValidParameters());
329
330 const Details::RelaxationType precType =
331 getIntegralValue<Details::RelaxationType>(pl, "relaxation: type");
332 const std::string precTypeStr = getStringValue<Details::RelaxationType>(pl, "relaxation: type");
333 // We only access "relaxation: type" using strings in the rest of the code
334 pl.set<std::string>("relaxation: type", precTypeStr);
335 pl.get<std::string>("relaxation: type"); // We need to mark the parameter as "used"
336 const int numSweeps = pl.get<int>("relaxation: sweeps");
337 const ST dampingFactor = pl.get<ST>("relaxation: damping factor");
338 const bool zeroStartSol = pl.get<bool>("relaxation: zero starting solution");
339 const bool doBackwardGS = pl.get<bool>("relaxation: backward mode");
340 const bool doL1Method = pl.get<bool>("relaxation: use l1");
341 const magnitude_type l1Eta = pl.get<magnitude_type>("relaxation: l1 eta");
342 const ST minDiagonalValue = pl.get<ST>("relaxation: min diagonal value");
343 const bool fixTinyDiagEntries = pl.get<bool>("relaxation: fix tiny diagonal entries");
344 const bool checkDiagEntries = pl.get<bool>("relaxation: check diagonal entries");
345 const bool is_matrix_structurally_symmetric = pl.get<bool>("relaxation: symmetric matrix structure");
346 const bool ifpack2_dump_matrix = pl.get<bool>("relaxation: ifpack2 dump matrix");
347 const bool timer_for_apply = pl.get<bool>("timer for apply");
348 int cluster_size = 1;
349 if (pl.isParameter("relaxation: mtgs cluster size")) // optional parameter
350 cluster_size = pl.get<int>("relaxation: mtgs cluster size");
351 int long_row_threshold = 0;
352 if (pl.isParameter("relaxation: long row threshold")) // optional parameter
353 long_row_threshold = pl.get<int>("relaxation: long row threshold");
354 std::string color_algo_name = pl.get<std::string>("relaxation: mtgs coloring algorithm");
355 // convert to lowercase
356 for (char& c : color_algo_name)
357 c = tolower(c);
358 if (color_algo_name == "default")
359 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_DEFAULT;
360 else if (color_algo_name == "serial")
361 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_SERIAL;
362 else if (color_algo_name == "vb")
363 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VB;
364 else if (color_algo_name == "vbbit")
365 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBBIT;
366 else if (color_algo_name == "vbcs")
367 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBCS;
368 else if (color_algo_name == "vbd")
369 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBD;
370 else if (color_algo_name == "vbdbit")
371 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBDBIT;
372 else if (color_algo_name == "eb")
373 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_EB;
374 else {
375 std::ostringstream msg;
376 msg << "Ifpack2::Relaxation: 'relaxation: mtgs coloring algorithm' = '" << color_algo_name << "' is not valid.\n";
377 msg << "Choices (not case sensitive) are: Default, Serial, VB, VBBIT, VBCS, VBD, VBDBIT, EB.";
378 TEUCHOS_TEST_FOR_EXCEPTION(
379 true, std::invalid_argument, msg.str());
380 }
381
382 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type>>("relaxation: local smoothing indices");
383
384 // for Two-stage Gauss-Seidel
385 if (!std::is_same<double, ST>::value && pl.isType<double>("relaxation: inner damping factor")) {
386 // Make sure that ST=complex can run with a damping factor that is
387 // a double.
388 ST df = pl.get<double>("relaxation: inner damping factor");
389 pl.remove("relaxation: inner damping factor");
390 pl.set("relaxation: inner damping factor", df);
391 }
392 // If long row algorithm was requested, make sure non-cluster (point) multicolor Gauss-Seidel (aka MTGS/MTSGS) will be used.
393 if (long_row_threshold > 0) {
394 TEUCHOS_TEST_FOR_EXCEPTION(
395 cluster_size != 1, std::invalid_argument,
396 "Ifpack2::Relaxation: "
397 "Requested long row MTGS/MTSGS algorithm and cluster GS/SGS, but those are not compatible.");
398 TEUCHOS_TEST_FOR_EXCEPTION(
399 precType != Details::RelaxationType::MTGS && precType != Details::RelaxationType::MTSGS,
400 std::invalid_argument,
401 "Ifpack2::Relaxation: "
402 "Requested long row MTGS/MTSGS algorithm, but this is only compatible with preconditioner types "
403 "'MT Gauss-Seidel' and 'MT Symmetric Gauss-Seidel'.");
404 }
405
406 const ST innerDampingFactor = pl.get<ST>("relaxation: inner damping factor");
407 const int numInnerSweeps = pl.get<int>("relaxation: inner sweeps");
408 const int numOuterSweeps = pl.get<int>("relaxation: outer sweeps");
409 const bool innerSpTrsv = pl.get<bool>("relaxation: inner sparse-triangular solve");
410 const bool compactForm = pl.get<bool>("relaxation: compact form");
411
412 // "Commit" the changes, now that we've validated everything.
413 PrecType_ = precType;
414 NumSweeps_ = numSweeps;
415 DampingFactor_ = dampingFactor;
416 ZeroStartingSolution_ = zeroStartSol;
417 DoBackwardGS_ = doBackwardGS;
418 DoL1Method_ = doL1Method;
419 L1Eta_ = l1Eta;
420 MinDiagonalValue_ = minDiagonalValue;
421 fixTinyDiagEntries_ = fixTinyDiagEntries;
422 checkDiagEntries_ = checkDiagEntries;
423 clusterSize_ = cluster_size;
424 longRowThreshold_ = long_row_threshold;
425 is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
426 ifpack2_dump_matrix_ = ifpack2_dump_matrix;
427 localSmoothingIndices_ = localSmoothingIndices;
428 // for Two-stage GS
429 NumInnerSweeps_ = numInnerSweeps;
430 NumOuterSweeps_ = numOuterSweeps;
431 InnerSpTrsv_ = innerSpTrsv;
432 InnerDampingFactor_ = innerDampingFactor;
433 CompactForm_ = compactForm;
434 TimerForApply_ = timer_for_apply;
435}
436
437template <class MatrixType>
438void Relaxation<MatrixType>::setParameters(const Teuchos::ParameterList& pl) {
439 // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
440 // but otherwise, we will get [unused] in pl
441 this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
442}
443
444template <class MatrixType>
445Teuchos::RCP<const Teuchos::Comm<int>>
447 TEUCHOS_TEST_FOR_EXCEPTION(
448 A_.is_null(), std::runtime_error,
449 "Ifpack2::Relaxation::getComm: "
450 "The input matrix A is null. Please call setMatrix() with a nonnull "
451 "input matrix before calling this method.");
452 return A_->getRowMap()->getComm();
453}
454
455template <class MatrixType>
456Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type>
458 return A_;
459}
460
461template <class MatrixType>
462Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
463 typename MatrixType::global_ordinal_type,
464 typename MatrixType::node_type>>
466 TEUCHOS_TEST_FOR_EXCEPTION(
467 A_.is_null(), std::runtime_error,
468 "Ifpack2::Relaxation::getDomainMap: "
469 "The input matrix A is null. Please call setMatrix() with a nonnull "
470 "input matrix before calling this method.");
471 return A_->getDomainMap();
472}
473
474template <class MatrixType>
475Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
476 typename MatrixType::global_ordinal_type,
477 typename MatrixType::node_type>>
479 TEUCHOS_TEST_FOR_EXCEPTION(
480 A_.is_null(), std::runtime_error,
481 "Ifpack2::Relaxation::getRangeMap: "
482 "The input matrix A is null. Please call setMatrix() with a nonnull "
483 "input matrix before calling this method.");
484 return A_->getRangeMap();
485}
486
487template <class MatrixType>
489 return true;
490}
491
492template <class MatrixType>
494 return (NumInitialize_);
495}
496
497template <class MatrixType>
499 return (NumCompute_);
500}
501
502template <class MatrixType>
504 return (NumApply_);
505}
506
507template <class MatrixType>
509 return (InitializeTime_);
510}
511
512template <class MatrixType>
514 return (ComputeTime_);
515}
516
517template <class MatrixType>
519 return (ApplyTime_);
520}
521
522template <class MatrixType>
524 return (ComputeFlops_);
525}
526
527template <class MatrixType>
529 return (ApplyFlops_);
530}
531
532template <class MatrixType>
534 TEUCHOS_TEST_FOR_EXCEPTION(
535 A_.is_null(), std::runtime_error,
536 "Ifpack2::Relaxation::getNodeSmootherComplexity: "
537 "The input matrix A is null. Please call setMatrix() with a nonnull "
538 "input matrix, then call compute(), before calling this method.");
539 // Relaxation methods cost roughly one apply + one diagonal inverse per iteration
540 return A_->getLocalNumRows() + A_->getLocalNumEntries();
541}
542
543template <class MatrixType>
545 apply(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
546 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
547 Teuchos::ETransp /* mode */,
548 scalar_type alpha,
549 scalar_type beta) const {
550 using Teuchos::as;
551 using Teuchos::RCP;
552 using Teuchos::rcp;
553 using Teuchos::rcpFromRef;
554 typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
556 MV;
557 TEUCHOS_TEST_FOR_EXCEPTION(
558 A_.is_null(), std::runtime_error,
559 "Ifpack2::Relaxation::apply: "
560 "The input matrix A is null. Please call setMatrix() with a nonnull "
561 "input matrix, then call compute(), before calling this method.");
562 TEUCHOS_TEST_FOR_EXCEPTION(
563 !isComputed(),
564 std::runtime_error,
565 "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
566 "preconditioner instance before you may call apply(). You may call "
567 "isComputed() to find out if compute() has been called already.");
568 TEUCHOS_TEST_FOR_EXCEPTION(
569 X.getNumVectors() != Y.getNumVectors(),
570 std::runtime_error,
571 "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
572 "X has "
573 << X.getNumVectors() << " columns, but Y has "
574 << Y.getNumVectors() << " columns.");
575 TEUCHOS_TEST_FOR_EXCEPTION(
576 beta != STS::zero(), std::logic_error,
577 "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
578 "implemented.");
579
580 Teuchos::RCP<Teuchos::Time> timer;
581 const std::string timerName("Ifpack2::Relaxation::apply");
582 if (TimerForApply_) {
583 timer = Teuchos::TimeMonitor::lookupCounter(timerName);
584 if (timer.is_null()) {
585 timer = Teuchos::TimeMonitor::getNewCounter(timerName);
586 }
587 }
588
589 Teuchos::Time time = Teuchos::Time(timerName);
590 double startTime = time.wallTime();
591
592 {
593 Teuchos::RCP<Teuchos::TimeMonitor> timeMon;
594 if (TimerForApply_)
595 timeMon = Teuchos::rcp(new Teuchos::TimeMonitor(*timer));
596
597 // Special case: alpha == 0.
598 if (alpha == STS::zero()) {
599 // No floating-point operations, so no need to update a count.
600 Y.putScalar(STS::zero());
601 } else {
602 // If X and Y alias one another, then we need to create an
603 // auxiliary vector, Xcopy (a deep copy of X).
604 RCP<const MV> Xcopy;
605 {
606 if (X.aliases(Y)) {
607 Xcopy = rcp(new MV(X, Teuchos::Copy));
608 } else {
609 Xcopy = rcpFromRef(X);
610 }
611 }
612
613 // Each of the following methods updates the flop count itself.
614 // All implementations handle zeroing out the starting solution
615 // (if necessary) themselves.
616 switch (PrecType_) {
617 case Ifpack2::Details::JACOBI:
618 ApplyInverseJacobi(*Xcopy, Y);
619 break;
620 case Ifpack2::Details::GS:
621 ApplyInverseSerialGS(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
622 break;
623 case Ifpack2::Details::SGS:
624 ApplyInverseSerialGS(*Xcopy, Y, Tpetra::Symmetric);
625 break;
626 case Ifpack2::Details::MTGS:
627 case Ifpack2::Details::GS2:
628 ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
629 break;
630 case Ifpack2::Details::MTSGS:
631 case Ifpack2::Details::SGS2:
632 ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, Tpetra::Symmetric);
633 break;
634 case Ifpack2::Details::RICHARDSON:
635 ApplyInverseRichardson(*Xcopy, Y);
636 break;
637
638 default:
639 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
640 "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
641 << PrecType_ << ". Please report this bug to the Ifpack2 developers.");
642 }
643 if (alpha != STS::one()) {
644 Y.scale(alpha);
645 const double numGlobalRows = as<double>(A_->getGlobalNumRows());
646 const double numVectors = as<double>(Y.getNumVectors());
647 ApplyFlops_ += numGlobalRows * numVectors;
648 }
649 }
650 }
651 ApplyTime_ += (time.wallTime() - startTime);
652 ++NumApply_;
653}
654
655template <class MatrixType>
657 applyMat(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
658 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
659 Teuchos::ETransp mode) const {
660 TEUCHOS_TEST_FOR_EXCEPTION(
661 A_.is_null(), std::runtime_error,
662 "Ifpack2::Relaxation::applyMat: "
663 "The input matrix A is null. Please call setMatrix() with a nonnull "
664 "input matrix, then call compute(), before calling this method.");
665 TEUCHOS_TEST_FOR_EXCEPTION(
666 !isComputed(), std::runtime_error,
667 "Ifpack2::Relaxation::applyMat: "
668 "isComputed() must be true before you may call applyMat(). "
669 "Please call compute() before calling this method.");
670 TEUCHOS_TEST_FOR_EXCEPTION(
671 X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
672 "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors()
673 << " != Y.getNumVectors() = " << Y.getNumVectors() << ".");
674 A_->apply(X, Y, mode);
675}
676
677template <class MatrixType>
679 const char methodName[] = "Ifpack2::Relaxation::initialize";
680
681 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, methodName << ": The "
682 "input matrix A is null. Please call setMatrix() with "
683 "a nonnull input matrix before calling this method.");
684
685 Teuchos::RCP<Teuchos::Time> timer =
686 Teuchos::TimeMonitor::getNewCounter(methodName);
687
688 double startTime = timer->wallTime();
689
690 { // Timing of initialize starts here
691 Teuchos::TimeMonitor timeMon(*timer);
692 isInitialized_ = false;
693
694 {
695 auto rowMap = A_->getRowMap();
696 auto comm = rowMap.is_null() ? Teuchos::null : rowMap->getComm();
697 IsParallel_ = !comm.is_null() && comm->getSize() > 1;
698 }
699
700 // mfh 21 Mar 2013, 07 May 2019: The Import object may be null,
701 // but in that case, the domain and column Maps are the same and
702 // we don't need to Import anyway.
703 //
704 // mfh 07 May 2019: A_->getGraph() might be an
705 // OverlappingRowGraph, which doesn't have an Import object.
706 // However, in that case, the comm should have size 1.
707 Importer_ = IsParallel_ ? A_->getGraph()->getImporter() : Teuchos::null;
708
709 {
710 Teuchos::RCP<const block_crs_matrix_type> A_bcrs =
711 Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(A_);
712 hasBlockCrsMatrix_ = !A_bcrs.is_null();
713 }
714
715 serialGaussSeidel_ = Teuchos::null;
716
717 if (PrecType_ == Details::MTGS || PrecType_ == Details::MTSGS ||
718 PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
719 auto crsMat = Details::getCrsMatrix(A_);
720 TEUCHOS_TEST_FOR_EXCEPTION(crsMat.is_null(), std::logic_error, methodName << ": "
721 "Multithreaded Gauss-Seidel methods currently only work "
722 "when the input matrix is a Tpetra::CrsMatrix.");
723
724 // FIXME (mfh 27 May 2019) Dumping the matrix belongs in
725 // compute, not initialize, since users may change the matrix's
726 // values at any time before calling compute.
727 if (ifpack2_dump_matrix_) {
728 static int sequence_number = 0;
729 const std::string file_name = "Ifpack2_MT_GS_" +
730 std::to_string(sequence_number++) + ".mtx";
731 using writer_type = Tpetra::MatrixMarket::Writer<crs_matrix_type>;
732 writer_type::writeSparseFile(file_name, crsMat);
733 }
734
735 this->mtKernelHandle_ = Teuchos::rcp(new mt_kernel_handle_type());
736 if (mtKernelHandle_->get_gs_handle() == nullptr) {
737 if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2)
738 mtKernelHandle_->create_gs_handle(KokkosSparse::GS_TWOSTAGE);
739 else if (this->clusterSize_ == 1) {
740 mtKernelHandle_->create_gs_handle(KokkosSparse::GS_DEFAULT, this->mtColoringAlgorithm_);
741 mtKernelHandle_->get_point_gs_handle()->set_long_row_threshold(longRowThreshold_);
742 } else
743 mtKernelHandle_->create_gs_handle(KokkosSparse::CLUSTER_DEFAULT, this->clusterSize_, this->mtColoringAlgorithm_);
744 }
745 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice();
746 if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
747 // set parameters for two-stage GS
748 mtKernelHandle_->set_gs_set_num_inner_sweeps(NumInnerSweeps_);
749 mtKernelHandle_->set_gs_set_num_outer_sweeps(NumOuterSweeps_);
750 mtKernelHandle_->set_gs_set_inner_damp_factor(InnerDampingFactor_);
751 mtKernelHandle_->set_gs_twostage(!InnerSpTrsv_, A_->getLocalNumRows());
752 mtKernelHandle_->set_gs_twostage_compact_form(CompactForm_);
753 }
754
755 KokkosSparse::gauss_seidel_symbolic(
756 mtKernelHandle_.getRawPtr(),
757 A_->getLocalNumRows(),
758 A_->getLocalNumCols(),
759 kcsr.graph.row_map,
760 kcsr.graph.entries,
761 is_matrix_structurally_symmetric_);
762 }
763 } // timing of initialize stops here
764
765 InitializeTime_ += (timer->wallTime() - startTime);
766 ++NumInitialize_;
767 isInitialized_ = true;
768}
769
770namespace Impl {
771template <typename BlockDiagView>
772struct InvertDiagBlocks {
773 typedef typename BlockDiagView::size_type Size;
774
775 private:
776 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
777 template <typename View>
778 using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
779 typename View::device_type, Unmanaged>;
780
781 typedef typename BlockDiagView::non_const_value_type Scalar;
782 typedef typename BlockDiagView::device_type Device;
783 typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
784 typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
785
786 UnmanagedView<BlockDiagView> block_diag_;
787 // TODO Use thread team and scratch memory space. In this first
788 // pass, provide workspace for each block.
789 RWrk rwrk_buf_;
790 UnmanagedView<RWrk> rwrk_;
791 IWrk iwrk_buf_;
792 UnmanagedView<IWrk> iwrk_;
793
794 public:
795 InvertDiagBlocks(BlockDiagView& block_diag)
796 : block_diag_(block_diag) {
797 const auto blksz = block_diag.extent(1);
798 Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
799 rwrk_ = rwrk_buf_;
800 Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
801 iwrk_ = iwrk_buf_;
802 }
803
804 KOKKOS_INLINE_FUNCTION
805 void operator()(const Size i, int& jinfo) const {
806 auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
807 auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
808 auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
809 int info = 0;
810 Tpetra::GETF2(D_cur, ipiv, info);
811 if (info) {
812 ++jinfo;
813 return;
814 }
815 Tpetra::GETRI(D_cur, ipiv, work, info);
816 if (info) ++jinfo;
817 }
818};
819} // namespace Impl
820
821template <class MatrixType>
822void Relaxation<MatrixType>::computeBlockCrs() {
823 using Kokkos::ALL;
824 using Teuchos::Array;
825 using Teuchos::ArrayRCP;
826 using Teuchos::ArrayView;
827 using Teuchos::as;
828 using Teuchos::Comm;
829 using Teuchos::RCP;
830 using Teuchos::rcp;
831 using Teuchos::rcp_dynamic_cast;
832 using Teuchos::REDUCE_MAX;
833 using Teuchos::REDUCE_MIN;
834 using Teuchos::REDUCE_SUM;
835 using Teuchos::reduceAll;
836 typedef local_ordinal_type LO;
837
838 const std::string timerName("Ifpack2::Relaxation::computeBlockCrs");
839 Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter(timerName);
840 if (timer.is_null()) {
841 timer = Teuchos::TimeMonitor::getNewCounter(timerName);
842 }
843 double startTime = timer->wallTime();
844 {
845 Teuchos::TimeMonitor timeMon(*timer);
846
847 TEUCHOS_TEST_FOR_EXCEPTION(
848 A_.is_null(), std::runtime_error,
849 "Ifpack2::Relaxation::"
850 "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
851 "with a nonnull input matrix, then call initialize(), before calling "
852 "this method.");
853 auto blockCrsA = rcp_dynamic_cast<const block_crs_matrix_type>(A_);
854 TEUCHOS_TEST_FOR_EXCEPTION(
855 blockCrsA.is_null(), std::logic_error,
856 "Ifpack2::Relaxation::"
857 "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
858 "got this far. Please report this bug to the Ifpack2 developers.");
859
860 const scalar_type one = STS::one();
861
862 // Reset state.
863 IsComputed_ = false;
864
865 const LO lclNumMeshRows =
866 blockCrsA->getCrsGraph().getLocalNumRows();
867 const LO blockSize = blockCrsA->getBlockSize();
868
869 if (!savedDiagOffsets_) {
870 blockDiag_ = block_diag_type(); // clear it before reallocating
871 blockDiag_ = block_diag_type("Ifpack2::Relaxation::blockDiag_",
872 lclNumMeshRows, blockSize, blockSize);
873 if (Teuchos::as<LO>(diagOffsets_.extent(0)) < lclNumMeshRows) {
874 // Clear diagOffsets_ first (by assigning an empty View to it)
875 // to save memory, before reallocating.
876 diagOffsets_ = Kokkos::View<size_t*, device_type>();
877 diagOffsets_ = Kokkos::View<size_t*, device_type>("offsets", lclNumMeshRows);
878 }
879 blockCrsA->getCrsGraph().getLocalDiagOffsets(diagOffsets_);
880 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(diagOffsets_.extent(0)) !=
881 static_cast<size_t>(blockDiag_.extent(0)),
882 std::logic_error, "diagOffsets_.extent(0) = " << diagOffsets_.extent(0) << " != blockDiag_.extent(0) = " << blockDiag_.extent(0) << ". Please report this bug to the Ifpack2 developers.");
883 savedDiagOffsets_ = true;
884 }
885 blockCrsA->getLocalDiagCopy(blockDiag_, diagOffsets_);
886
887 // Use an unmanaged View in this method, so that when we take
888 // subviews of it (to get each diagonal block), we don't have to
889 // touch the reference count. Reference count updates are a
890 // thread scalability bottleneck and have a performance cost even
891 // without using threads.
892 unmanaged_block_diag_type blockDiag = blockDiag_;
893
894 if (DoL1Method_ && IsParallel_) {
895 const scalar_type two = one + one;
896 const size_t maxLength = A_->getLocalMaxNumRowEntries();
897 nonconst_local_inds_host_view_type indices("indices", maxLength);
898 nonconst_values_host_view_type values_("values", maxLength * blockSize * blockSize);
899 size_t numEntries = 0;
900
901 for (LO i = 0; i < lclNumMeshRows; ++i) {
902 // FIXME (mfh 16 Dec 2015) Get views instead of copies.
903 blockCrsA->getLocalRowCopy(i, indices, values_, numEntries);
904 scalar_type* values = reinterpret_cast<scalar_type*>(values_.data());
905
906 auto diagBlock = Kokkos::subview(blockDiag, i, ALL(), ALL());
907 for (LO subRow = 0; subRow < blockSize; ++subRow) {
908 magnitude_type diagonal_boost = STM::zero();
909 for (size_t k = 0; k < numEntries; ++k) {
910 if (indices[k] >= lclNumMeshRows) {
911 const size_t offset = blockSize * blockSize * k + subRow * blockSize;
912 for (LO subCol = 0; subCol < blockSize; ++subCol) {
913 diagonal_boost += STS::magnitude(values[offset + subCol] / two);
914 }
915 }
916 }
917 if (STS::magnitude(diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
918 diagBlock(subRow, subRow) += diagonal_boost;
919 }
920 }
921 }
922 }
923
924 int info = 0;
925 {
926 Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
927 typedef typename unmanaged_block_diag_type::execution_space exec_space;
928 typedef Kokkos::RangePolicy<exec_space, LO> range_type;
929
930 Kokkos::parallel_reduce(range_type(0, lclNumMeshRows), idb, info);
931 // FIXME (mfh 19 May 2017) Different processes may not
932 // necessarily have this error all at the same time, so it would
933 // be better just to preserve a local error state and let users
934 // check.
935 TEUCHOS_TEST_FOR_EXCEPTION(info > 0, std::runtime_error, "GETF2 or GETRI failed on = " << info << " diagonal blocks.");
936 }
937
938 // In a debug build, do an extra test to make sure that all the
939 // factorizations were computed correctly.
940#ifdef HAVE_IFPACK2_DEBUG
941 const int numResults = 2;
942 // Use "max = -min" trick to get min and max in a single all-reduce.
943 int lclResults[2], gblResults[2];
944 lclResults[0] = info;
945 lclResults[1] = -info;
946 gblResults[0] = 0;
947 gblResults[1] = 0;
948 reduceAll<int, int>(*(A_->getGraph()->getComm()), REDUCE_MIN,
949 numResults, lclResults, gblResults);
950 TEUCHOS_TEST_FOR_EXCEPTION(gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
951 "Ifpack2::Relaxation::compute: When processing the input "
952 "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
953 "failed on one or more (MPI) processes.");
954#endif // HAVE_IFPACK2_DEBUG
955 serialGaussSeidel_ = rcp(new SerialGaussSeidel(blockCrsA, blockDiag_, localSmoothingIndices_, DampingFactor_));
956 } // end TimeMonitor scope
957
958 ComputeTime_ += (timer->wallTime() - startTime);
959 ++NumCompute_;
960 IsComputed_ = true;
961}
962
963template <class MatrixType>
965 using Teuchos::Array;
966 using Teuchos::ArrayRCP;
967 using Teuchos::ArrayView;
968 using Teuchos::as;
969 using Teuchos::Comm;
970 using Teuchos::RCP;
971 using Teuchos::rcp;
972 using Teuchos::REDUCE_MAX;
973 using Teuchos::REDUCE_MIN;
974 using Teuchos::REDUCE_SUM;
975 using Teuchos::reduceAll;
976 using LO = local_ordinal_type;
977 using vector_type = Tpetra::Vector<scalar_type, local_ordinal_type,
979 using IST = typename vector_type::impl_scalar_type;
980#if KOKKOS_VERSION >= 40799
981 using KAT = KokkosKernels::ArithTraits<IST>;
982#else
983 using KAT = Kokkos::ArithTraits<IST>;
984#endif
985
986 const char methodName[] = "Ifpack2::Relaxation::compute";
987 const scalar_type zero = STS::zero();
988 const scalar_type one = STS::one();
989
990 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, methodName << ": "
991 "The input matrix A is null. Please call setMatrix() with a nonnull "
992 "input matrix, then call initialize(), before calling this method.");
993
994 // We don't count initialization in compute() time.
995 if (!isInitialized()) {
996 initialize();
997 }
998
999 if (hasBlockCrsMatrix_) {
1000 computeBlockCrs();
1001 return;
1002 }
1003
1004 Teuchos::RCP<Teuchos::Time> timer =
1005 Teuchos::TimeMonitor::getNewCounter(methodName);
1006 double startTime = timer->wallTime();
1007
1008 { // Timing of compute starts here.
1009 Teuchos::TimeMonitor timeMon(*timer);
1010
1011 // Precompute some quantities for "fixing" zero or tiny diagonal
1012 // entries. We'll only use them if this "fixing" is enabled.
1013 //
1014 // SmallTraits covers for the lack of eps() in
1015 // Teuchos::ScalarTraits when its template parameter is not a
1016 // floating-point type. (Ifpack2 sometimes gets instantiated for
1017 // integer Scalar types.)
1018 IST oneOverMinDiagVal = KAT::one() / static_cast<IST>(SmallTraits<scalar_type>::eps());
1019 if (MinDiagonalValue_ != zero)
1020 oneOverMinDiagVal = KAT::one() / static_cast<IST>(MinDiagonalValue_);
1021
1022 // It's helpful not to have to recompute this magnitude each time.
1023 const magnitude_type minDiagValMag = STS::magnitude(MinDiagonalValue_);
1024
1025 const LO numMyRows = static_cast<LO>(A_->getLocalNumRows());
1026
1027 TEUCHOS_TEST_FOR_EXCEPTION(NumSweeps_ < 0, std::logic_error, methodName << ": NumSweeps_ = " << NumSweeps_ << " < 0. "
1028 "Please report this bug to the Ifpack2 developers.");
1029 IsComputed_ = false;
1030
1031 if (Diagonal_.is_null()) {
1032 // A_->getLocalDiagCopy fills in all Vector entries, even if the
1033 // matrix has no stored entries in the corresponding rows.
1034 Diagonal_ = rcp(new vector_type(A_->getRowMap(), false));
1035 }
1036
1037 if (checkDiagEntries_) {
1038 //
1039 // Check diagonal elements, replace zero elements with the minimum
1040 // diagonal value, and store their inverses. Count the number of
1041 // "small" and zero diagonal entries while we're at it.
1042 //
1043 size_t numSmallDiagEntries = 0; // "small" includes zero
1044 size_t numZeroDiagEntries = 0; // # zero diagonal entries
1045 size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
1046 magnitude_type minMagDiagEntryMag = STM::zero();
1047 magnitude_type maxMagDiagEntryMag = STM::zero();
1048
1049 // FIXME: We are extracting the diagonal more than once. That
1050 // isn't very efficient, but we shouldn't be checking
1051 // diagonal entries at scale anyway.
1052 A_->getLocalDiagCopy(*Diagonal_); // slow path for row matrix
1053 std::unique_ptr<vector_type> origDiag;
1054 origDiag = std::unique_ptr<vector_type>(new vector_type(*Diagonal_, Teuchos::Copy));
1055 auto diag2d = Diagonal_->getLocalViewHost(Tpetra::Access::ReadWrite);
1056 auto diag = Kokkos::subview(diag2d, Kokkos::ALL(), 0);
1057 // As we go, keep track of the diagonal entries with the
1058 // least and greatest magnitude. We could use the trick of
1059 // starting min with +Inf and max with -Inf, but that
1060 // doesn't work if scalar_type is a built-in integer type.
1061 // Thus, we have to start by reading the first diagonal
1062 // entry redundantly.
1063 if (numMyRows != 0) {
1064 const magnitude_type d_0_mag = KAT::abs(diag(0));
1065 minMagDiagEntryMag = d_0_mag;
1066 maxMagDiagEntryMag = d_0_mag;
1067 }
1068
1069 // Go through all the diagonal entries. Compute counts of
1070 // small-magnitude, zero, and negative-real-part entries.
1071 // Invert the diagonal entries that aren't too small. For
1072 // those too small in magnitude, replace them with
1073 // 1/MinDiagonalValue_ (or 1/eps if MinDiagonalValue_
1074 // happens to be zero).
1075 for (LO i = 0; i < numMyRows; ++i) {
1076 const IST d_i = diag(i);
1077 const magnitude_type d_i_mag = KAT::abs(d_i);
1078 // Work-around for GitHub Issue #5269.
1079 // const magnitude_type d_i_real = KAT::real (d_i);
1080 const auto d_i_real = getRealValue(d_i);
1081
1082 // We can't compare complex numbers, but we can compare their
1083 // real parts.
1084 if (d_i_real < STM::zero()) {
1085 ++numNegDiagEntries;
1086 }
1087 if (d_i_mag < minMagDiagEntryMag) {
1088 minMagDiagEntryMag = d_i_mag;
1089 }
1090 if (d_i_mag > maxMagDiagEntryMag) {
1091 maxMagDiagEntryMag = d_i_mag;
1092 }
1093
1094 if (fixTinyDiagEntries_) {
1095 // <= not <, in case minDiagValMag is zero.
1096 if (d_i_mag <= minDiagValMag) {
1097 ++numSmallDiagEntries;
1098 if (d_i_mag == STM::zero()) {
1099 ++numZeroDiagEntries;
1100 }
1101 diag(i) = oneOverMinDiagVal;
1102 } else {
1103 diag(i) = KAT::one() / d_i;
1104 }
1105 } else { // Don't fix zero or tiny diagonal entries.
1106 // <= not <, in case minDiagValMag is zero.
1107 if (d_i_mag <= minDiagValMag) {
1108 ++numSmallDiagEntries;
1109 if (d_i_mag == STM::zero()) {
1110 ++numZeroDiagEntries;
1111 }
1112 }
1113 diag(i) = KAT::one() / d_i;
1114 }
1115 }
1116
1117 // Collect global data about the diagonal entries. Only do this
1118 // (which involves O(1) all-reduces) if the user asked us to do
1119 // the extra work.
1120 //
1121 // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
1122 // zero rows. Fixing this requires one of two options:
1123 //
1124 // 1. Use a custom reduction operation that ignores processes
1125 // with zero diagonal entries.
1126 // 2. Split the communicator, compute all-reduces using the
1127 // subcommunicator over processes that have a nonzero number
1128 // of diagonal entries, and then broadcast from one of those
1129 // processes (if there is one) to the processes in the other
1130 // subcommunicator.
1131 const Comm<int>& comm = *(A_->getRowMap()->getComm());
1132
1133 // Compute global min and max magnitude of entries.
1134 Array<magnitude_type> localVals(2);
1135 localVals[0] = minMagDiagEntryMag;
1136 // (- (min (- x))) is the same as (max x).
1137 localVals[1] = -maxMagDiagEntryMag;
1138 Array<magnitude_type> globalVals(2);
1139 reduceAll<int, magnitude_type>(comm, REDUCE_MIN, 2,
1140 localVals.getRawPtr(),
1141 globalVals.getRawPtr());
1142 globalMinMagDiagEntryMag_ = globalVals[0];
1143 globalMaxMagDiagEntryMag_ = -globalVals[1];
1144
1145 Array<size_t> localCounts(3);
1146 localCounts[0] = numSmallDiagEntries;
1147 localCounts[1] = numZeroDiagEntries;
1148 localCounts[2] = numNegDiagEntries;
1149 Array<size_t> globalCounts(3);
1150 reduceAll<int, size_t>(comm, REDUCE_SUM, 3,
1151 localCounts.getRawPtr(),
1152 globalCounts.getRawPtr());
1153 globalNumSmallDiagEntries_ = globalCounts[0];
1154 globalNumZeroDiagEntries_ = globalCounts[1];
1155 globalNumNegDiagEntries_ = globalCounts[2];
1156
1157 // Compute and save the difference between the computed inverse
1158 // diagonal, and the original diagonal's inverse.
1159 vector_type diff(A_->getRowMap());
1160 diff.reciprocal(*origDiag);
1161 diff.update(-one, *Diagonal_, one);
1162 globalDiagNormDiff_ = diff.norm2();
1163 }
1164
1165 // Extract the diagonal entries. The CrsMatrix graph version is
1166 // faster than the row matrix version for subsequent calls to
1167 // compute(), since it caches the diagonal offsets.
1168
1169 bool debugAgainstSlowPath = false;
1170
1171 auto crsMat = Details::getCrsMatrix(A_);
1172
1173 if (crsMat.get() && crsMat->isFillComplete()) {
1174 // The invDiagKernel object computes diagonal offsets if
1175 // necessary. The "compute" call extracts diagonal enties,
1176 // optionally applies the L1 method and replacement of small
1177 // entries, and then inverts.
1178 if (invDiagKernel_.is_null())
1179 invDiagKernel_ = rcp(new Ifpack2::Details::InverseDiagonalKernel<op_type>(crsMat));
1180 else
1181 invDiagKernel_->setMatrix(crsMat);
1182 invDiagKernel_->compute(*Diagonal_,
1183 DoL1Method_ && IsParallel_, L1Eta_,
1184 fixTinyDiagEntries_, minDiagValMag);
1185 savedDiagOffsets_ = true;
1186
1187 // mfh 27 May 2019: Later on, we should introduce an IFPACK2_DEBUG
1188 // environment variable to control this behavior at run time.
1189#ifdef HAVE_IFPACK2_DEBUG
1190 debugAgainstSlowPath = true;
1191#endif
1192 }
1193
1194 if (crsMat.is_null() || !crsMat->isFillComplete() || debugAgainstSlowPath) {
1195 // We could not call the CrsMatrix version above, or want to
1196 // debug by comparing against the slow path.
1197
1198 // FIXME: The L1 method in this code path runs on host, since we
1199 // don't have device access for row matrices.
1200
1201 RCP<vector_type> Diagonal;
1202 if (debugAgainstSlowPath)
1203 Diagonal = rcp(new vector_type(A_->getRowMap()));
1204 else
1205 Diagonal = Diagonal_;
1206
1207 A_->getLocalDiagCopy(*Diagonal); // slow path for row matrix
1208
1209 // Setup for L1 Methods.
1210 // Here we add half the value of the off-processor entries in the row,
1211 // but only if diagonal isn't sufficiently large.
1212 //
1213 // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
1214 // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM
1215 // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
1216 //
1217 if (DoL1Method_ && IsParallel_) {
1218 const row_matrix_type& A_row = *A_;
1219 auto diag = Diagonal->getLocalViewHost(Tpetra::Access::ReadWrite);
1220 const magnitude_type two = STM::one() + STM::one();
1221 const size_t maxLength = A_row.getLocalMaxNumRowEntries();
1222 nonconst_local_inds_host_view_type indices("indices", maxLength);
1223 nonconst_values_host_view_type values("values", maxLength);
1224 size_t numEntries;
1225
1226 for (LO i = 0; i < numMyRows; ++i) {
1227 A_row.getLocalRowCopy(i, indices, values, numEntries);
1228 magnitude_type diagonal_boost = STM::zero();
1229 for (size_t k = 0; k < numEntries; ++k) {
1230 if (indices[k] >= numMyRows) {
1231 diagonal_boost += STS::magnitude(values[k] / two);
1232 }
1233 }
1234 if (KAT::magnitude(diag(i, 0)) < L1Eta_ * diagonal_boost) {
1235 diag(i, 0) += diagonal_boost;
1236 }
1237 }
1238 }
1239
1240 //
1241 // Invert the matrix's diagonal entries (in Diagonal).
1242 //
1243 if (fixTinyDiagEntries_) {
1244 // Go through all the diagonal entries. Invert those that
1245 // aren't too small in magnitude. For those that are too
1246 // small in magnitude, replace them with oneOverMinDiagVal.
1247 auto localDiag = Diagonal->getLocalViewDevice(Tpetra::Access::ReadWrite);
1248 Kokkos::parallel_for(
1249 Kokkos::RangePolicy<MyExecSpace>(0, localDiag.extent(0)),
1250 KOKKOS_LAMBDA(local_ordinal_type i) {
1251 auto d_i = localDiag(i, 0);
1252 const magnitude_type d_i_mag = KAT::magnitude(d_i);
1253 // <= not <, in case minDiagValMag is zero.
1254 if (d_i_mag <= minDiagValMag) {
1255 d_i = oneOverMinDiagVal;
1256 } else {
1257 // For Stokhos types, operator/ returns an expression
1258 // type. Explicitly convert to IST before returning.
1259 d_i = IST(KAT::one() / d_i);
1260 }
1261 localDiag(i, 0) = d_i;
1262 });
1263 } else { // don't fix tiny or zero diagonal entries
1264 Diagonal->reciprocal(*Diagonal);
1265 }
1266
1267 if (debugAgainstSlowPath) {
1268 // Validate the fast-path diagonal against the slow-path diagonal.
1269 Diagonal->update(STS::one(), *Diagonal_, -STS::one());
1270 const magnitude_type err = Diagonal->normInf();
1271 // The two diagonals should be exactly the same, so their
1272 // difference should be exactly zero.
1273 TEUCHOS_TEST_FOR_EXCEPTION(err > 100 * STM::eps(), std::logic_error, methodName << ": "
1274 << "\"fast-path\" diagonal computation failed. "
1275 "\\|D1 - D2\\|_inf = "
1276 << err << ".");
1277 }
1278 }
1279
1280 // Count floating-point operations of computing the inverse diagonal.
1281 //
1282 // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
1283 if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1284 ComputeFlops_ += 4.0 * numMyRows;
1285 } else {
1286 ComputeFlops_ += numMyRows;
1287 }
1288
1289 if (PrecType_ == Ifpack2::Details::MTGS ||
1290 PrecType_ == Ifpack2::Details::MTSGS ||
1291 PrecType_ == Ifpack2::Details::GS2 ||
1292 PrecType_ == Ifpack2::Details::SGS2) {
1293 // KokkosKernels GaussSeidel Initialization.
1294 using scalar_view_t = typename local_matrix_device_type::values_type;
1295
1296 TEUCHOS_TEST_FOR_EXCEPTION(crsMat.is_null(), std::logic_error, methodName << ": "
1297 "Multithreaded Gauss-Seidel methods currently only work "
1298 "when the input matrix is a Tpetra::CrsMatrix.");
1299 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice();
1300
1301 // TODO BMK: This should be ReadOnly, and KokkosKernels should accept a
1302 // const-valued view for user-provided D^-1. OK for now, Diagonal_ is nonconst.
1303 auto diagView_2d = Diagonal_->getLocalViewDevice(Tpetra::Access::ReadWrite);
1304 scalar_view_t diagView_1d = Kokkos::subview(diagView_2d, Kokkos::ALL(), 0);
1305 KokkosSparse::gauss_seidel_numeric(
1306 mtKernelHandle_.getRawPtr(),
1307 A_->getLocalNumRows(),
1308 A_->getLocalNumCols(),
1309 kcsr.graph.row_map,
1310 kcsr.graph.entries,
1311 kcsr.values,
1312 diagView_1d,
1313 is_matrix_structurally_symmetric_);
1314 } else if (PrecType_ == Ifpack2::Details::GS || PrecType_ == Ifpack2::Details::SGS) {
1315 if (crsMat)
1316 serialGaussSeidel_ = rcp(new SerialGaussSeidel(crsMat, Diagonal_, localSmoothingIndices_, DampingFactor_));
1317 else
1318 serialGaussSeidel_ = rcp(new SerialGaussSeidel(A_, Diagonal_, localSmoothingIndices_, DampingFactor_));
1319 }
1320 } // end TimeMonitor scope
1321
1322 ComputeTime_ += (timer->wallTime() - startTime);
1323 ++NumCompute_;
1324 IsComputed_ = true;
1325}
1326
1327template <class MatrixType>
1329 ApplyInverseRichardson(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1330 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y) const {
1331 using Teuchos::as;
1332 const double numGlobalRows = as<double>(A_->getGlobalNumRows());
1333 const double numVectors = as<double>(X.getNumVectors());
1334 if (ZeroStartingSolution_) {
1335 // For the first Richardson sweep, if we are allowed to assume that
1336 // the initial guess is zero, then Richardson is just alpha times the RHS
1337 // Compute it as Y(i,j) = DampingFactor_ * X(i,j).
1338 Y.scale(DampingFactor_, X);
1339
1340 // Count (global) floating-point operations. Ifpack2 represents
1341 // this as a floating-point number rather than an integer, so that
1342 // overflow (for a very large number of calls, or a very large
1343 // problem) is approximate instead of catastrophic.
1344 double flopUpdate = 0.0;
1345 if (DampingFactor_ == STS::one()) {
1346 // Y(i,j) = X(i,j): one multiply for each entry of Y.
1347 flopUpdate = numGlobalRows * numVectors;
1348 } else {
1349 // Y(i,j) = DampingFactor_ * X(i,j):
1350 // One multiplies per entry of Y.
1351 flopUpdate = numGlobalRows * numVectors;
1352 }
1353 ApplyFlops_ += flopUpdate;
1354 if (NumSweeps_ == 1) {
1355 return;
1356 }
1357 }
1358 // If we were allowed to assume that the starting guess was zero,
1359 // then we have already done the first sweep above.
1360 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1361
1362 // Allocate a multivector if the cached one isn't perfect
1363 updateCachedMultiVector(Y.getMap(), as<size_t>(numVectors));
1364
1365 for (int j = startSweep; j < NumSweeps_; ++j) {
1366 // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1367 Tpetra::Details::residual(*A_, Y, X, *cachedMV_);
1368 Y.update(DampingFactor_, *cachedMV_, STS::one());
1369 }
1370
1371 // For each column of output, for each pass over the matrix:
1372 //
1373 // - One + and one * for each matrix entry
1374 // - One / and one + for each row of the matrix
1375 // - If the damping factor is not one: one * for each row of the
1376 // matrix. (It's not fair to count this if the damping factor is
1377 // one, since the implementation could skip it. Whether it does
1378 // or not is the implementation's choice.)
1379
1380 // Floating-point operations due to the damping factor, per matrix
1381 // row, per direction, per columm of output.
1382 const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
1383 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1384 ApplyFlops_ += as<double>(NumSweeps_ - startSweep) * numVectors *
1385 (2.0 * numGlobalNonzeros + dampingFlops);
1386}
1387
1388template <class MatrixType>
1389void Relaxation<MatrixType>::
1390 ApplyInverseJacobi(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1391 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y) const {
1392 using Teuchos::as;
1393 if (hasBlockCrsMatrix_) {
1394 ApplyInverseJacobi_BlockCrsMatrix(X, Y);
1395 return;
1396 }
1397
1398 const double numGlobalRows = as<double>(A_->getGlobalNumRows());
1399 const double numVectors = as<double>(X.getNumVectors());
1400 if (ZeroStartingSolution_) {
1401 // For the first Jacobi sweep, if we are allowed to assume that
1402 // the initial guess is zero, then Jacobi is just diagonal
1403 // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1404 // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
1405 Y.elementWiseMultiply(DampingFactor_, *Diagonal_, X, STS::zero());
1406
1407 // Count (global) floating-point operations. Ifpack2 represents
1408 // this as a floating-point number rather than an integer, so that
1409 // overflow (for a very large number of calls, or a very large
1410 // problem) is approximate instead of catastrophic.
1411 double flopUpdate = 0.0;
1412 if (DampingFactor_ == STS::one()) {
1413 // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
1414 flopUpdate = numGlobalRows * numVectors;
1415 } else {
1416 // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
1417 // Two multiplies per entry of Y.
1418 flopUpdate = 2.0 * numGlobalRows * numVectors;
1419 }
1420 ApplyFlops_ += flopUpdate;
1421 if (NumSweeps_ == 1) {
1422 return;
1423 }
1424 }
1425 // If we were allowed to assume that the starting guess was zero,
1426 // then we have already done the first sweep above.
1427 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1428
1429 // Allocate a multivector if the cached one isn't perfect
1430 updateCachedMultiVector(Y.getMap(), as<size_t>(numVectors));
1431
1432 for (int j = startSweep; j < NumSweeps_; ++j) {
1433 // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1434 Tpetra::Details::residual(*A_, Y, X, *cachedMV_);
1435 Y.elementWiseMultiply(DampingFactor_, *Diagonal_, *cachedMV_, STS::one());
1436 }
1437
1438 // For each column of output, for each pass over the matrix:
1439 //
1440 // - One + and one * for each matrix entry
1441 // - One / and one + for each row of the matrix
1442 // - If the damping factor is not one: one * for each row of the
1443 // matrix. (It's not fair to count this if the damping factor is
1444 // one, since the implementation could skip it. Whether it does
1445 // or not is the implementation's choice.)
1446
1447 // Floating-point operations due to the damping factor, per matrix
1448 // row, per direction, per columm of output.
1449 const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
1450 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1451 ApplyFlops_ += as<double>(NumSweeps_ - startSweep) * numVectors *
1452 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1453}
1454
1455template <class MatrixType>
1456void Relaxation<MatrixType>::
1457 ApplyInverseJacobi_BlockCrsMatrix(const Tpetra::MultiVector<scalar_type,
1458 local_ordinal_type,
1459 global_ordinal_type,
1460 node_type>& X,
1461 Tpetra::MultiVector<scalar_type,
1462 local_ordinal_type,
1463 global_ordinal_type,
1464 node_type>& Y) const {
1465 using Tpetra::BlockMultiVector;
1466 using BMV = BlockMultiVector<scalar_type, local_ordinal_type,
1467 global_ordinal_type, node_type>;
1468
1469 const block_crs_matrix_type* blockMatConst =
1470 dynamic_cast<const block_crs_matrix_type*>(A_.getRawPtr());
1471 TEUCHOS_TEST_FOR_EXCEPTION(blockMatConst == nullptr, std::logic_error,
1472 "This method should "
1473 "never be called if the matrix A_ is not a BlockCrsMatrix. "
1474 "Please report this bug to the Ifpack2 developers.");
1475 // mfh 23 Jan 2016: Unfortunately, the const cast is necessary.
1476 // This is because applyBlock() is nonconst (more accurate), while
1477 // apply() is const (required by Tpetra::Operator interface, but a
1478 // lie, because it possibly allocates temporary buffers).
1479 block_crs_matrix_type* blockMat =
1480 const_cast<block_crs_matrix_type*>(blockMatConst);
1481
1482 auto meshRowMap = blockMat->getRowMap();
1483 auto meshColMap = blockMat->getColMap();
1484 const local_ordinal_type blockSize = blockMat->getBlockSize();
1485
1486 BMV X_blk(X, *meshColMap, blockSize);
1487 BMV Y_blk(Y, *meshRowMap, blockSize);
1488
1489 if (ZeroStartingSolution_) {
1490 // For the first sweep, if we are allowed to assume that the
1491 // initial guess is zero, then block Jacobi is just block diagonal
1492 // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1493 Y_blk.blockWiseMultiply(DampingFactor_, blockDiag_, X_blk);
1494 if (NumSweeps_ == 1) {
1495 return;
1496 }
1497 }
1498
1499 auto pointRowMap = Y.getMap();
1500 const size_t numVecs = X.getNumVectors();
1501
1502 // We don't need to initialize the result MV, since the sparse
1503 // mat-vec will clobber its contents anyway.
1504 BMV A_times_Y(*meshRowMap, *pointRowMap, blockSize, numVecs);
1505
1506 // If we were allowed to assume that the starting guess was zero,
1507 // then we have already done the first sweep above.
1508 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1509
1510 for (int j = startSweep; j < NumSweeps_; ++j) {
1511 blockMat->applyBlock(Y_blk, A_times_Y);
1512 // Y := Y + \omega D^{-1} (X - A*Y). Use A_times_Y as scratch.
1513 Y_blk.blockJacobiUpdate(DampingFactor_, blockDiag_,
1514 X_blk, A_times_Y, STS::one());
1515 }
1516}
1517
1518template <class MatrixType>
1519void Relaxation<MatrixType>::
1520 ApplyInverseSerialGS(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1521 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1522 Tpetra::ESweepDirection direction) const {
1523 using this_type = Relaxation<MatrixType>;
1524 // The CrsMatrix version is faster, because it can access the sparse
1525 // matrix data directly, rather than by copying out each row's data
1526 // in turn. Thus, we check whether the RowMatrix is really a
1527 // CrsMatrix.
1528 //
1529 // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
1530 // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
1531 // will still be correct if the cast fails, but it will use an
1532 // unoptimized kernel.
1533 auto blockCrsMat = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(A_);
1534 auto crsMat = Details::getCrsMatrix(A_);
1535 if (blockCrsMat.get()) {
1536 const_cast<this_type&>(*this).ApplyInverseSerialGS_BlockCrsMatrix(*blockCrsMat, X, Y, direction);
1537 } else if (crsMat.get()) {
1538 ApplyInverseSerialGS_CrsMatrix(*crsMat, X, Y, direction);
1539 } else {
1540 ApplyInverseSerialGS_RowMatrix(X, Y, direction);
1541 }
1542}
1543
1544template <class MatrixType>
1545void Relaxation<MatrixType>::
1546 ApplyInverseSerialGS_RowMatrix(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1547 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1548 Tpetra::ESweepDirection direction) const {
1549 using Teuchos::Array;
1550 using Teuchos::ArrayRCP;
1551 using Teuchos::ArrayView;
1552 using Teuchos::as;
1553 using Teuchos::RCP;
1554 using Teuchos::rcp;
1555 using Teuchos::rcpFromRef;
1556 typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> MV;
1557
1558 // Tpetra's GS implementation for CrsMatrix handles zeroing out the
1559 // starting multivector itself. The generic RowMatrix version here
1560 // does not, so we have to zero out Y here.
1561 if (ZeroStartingSolution_) {
1562 Y.putScalar(STS::zero());
1563 }
1564
1565 size_t NumVectors = X.getNumVectors();
1566
1567 RCP<MV> Y2;
1568 if (IsParallel_) {
1569 if (Importer_.is_null()) { // domain and column Maps are the same.
1570 updateCachedMultiVector(Y.getMap(), NumVectors);
1571 } else {
1572 updateCachedMultiVector(Importer_->getTargetMap(), NumVectors);
1573 }
1574 Y2 = cachedMV_;
1575 } else {
1576 Y2 = rcpFromRef(Y);
1577 }
1578
1579 for (int j = 0; j < NumSweeps_; ++j) {
1580 // data exchange is here, once per sweep
1581 if (IsParallel_) {
1582 if (Importer_.is_null()) {
1583 // FIXME (mfh 27 May 2019) This doesn't deep copy -- not
1584 // clear if this is correct. Reevaluate at some point.
1585
1586 *Y2 = Y; // just copy, since domain and column Maps are the same
1587 } else {
1588 Y2->doImport(Y, *Importer_, Tpetra::INSERT);
1589 }
1590 }
1591 serialGaussSeidel_->apply(*Y2, X, direction);
1592
1593 // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1594 if (IsParallel_) {
1595 Tpetra::deep_copy(Y, *Y2);
1596 }
1597 }
1598
1599 // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
1600 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1601 const double numVectors = as<double>(X.getNumVectors());
1602 const double numGlobalRows = as<double>(A_->getGlobalNumRows());
1603 const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
1604 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1605 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1606}
1607
1608template <class MatrixType>
1609void Relaxation<MatrixType>::
1610 ApplyInverseSerialGS_CrsMatrix(const crs_matrix_type& A,
1611 const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& B,
1612 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1613 Tpetra::ESweepDirection direction) const {
1614 using Teuchos::null;
1615 using Teuchos::RCP;
1616 using Teuchos::rcp;
1617 using Teuchos::rcp_const_cast;
1618 using Teuchos::rcpFromRef;
1619 typedef scalar_type Scalar;
1620 const char prefix[] = "Ifpack2::Relaxation::SerialGS: ";
1621 const scalar_type ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1622
1623 TEUCHOS_TEST_FOR_EXCEPTION(
1624 !A.isFillComplete(), std::runtime_error,
1625 prefix << "The matrix is not fill complete.");
1626 TEUCHOS_TEST_FOR_EXCEPTION(
1627 NumSweeps_ < 0, std::invalid_argument,
1628 prefix << "The number of sweeps must be nonnegative, "
1629 "but you provided numSweeps = "
1630 << NumSweeps_ << " < 0.");
1631
1632 if (NumSweeps_ == 0) {
1633 return;
1634 }
1635
1636 RCP<const import_type> importer = A.getGraph()->getImporter();
1637
1638 RCP<const map_type> domainMap = A.getDomainMap();
1639 RCP<const map_type> rangeMap = A.getRangeMap();
1640 RCP<const map_type> rowMap = A.getGraph()->getRowMap();
1641 RCP<const map_type> colMap = A.getGraph()->getColMap();
1642
1643#ifdef HAVE_IFPACK2_DEBUG
1644 {
1645 // The relation 'isSameAs' is transitive. It's also a
1646 // collective, so we don't have to do a "shared" test for
1647 // exception (i.e., a global reduction on the test value).
1648 TEUCHOS_TEST_FOR_EXCEPTION(
1649 !X.getMap()->isSameAs(*domainMap), std::runtime_error,
1650 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1651 "multivector X be in the domain Map of the matrix.");
1652 TEUCHOS_TEST_FOR_EXCEPTION(
1653 !B.getMap()->isSameAs(*rangeMap), std::runtime_error,
1654 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1655 "B be in the range Map of the matrix.");
1656 TEUCHOS_TEST_FOR_EXCEPTION(
1657 !Diagonal_->getMap()->isSameAs(*rowMap), std::runtime_error,
1658 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1659 "D be in the row Map of the matrix.");
1660 TEUCHOS_TEST_FOR_EXCEPTION(
1661 !rowMap->isSameAs(*rangeMap), std::runtime_error,
1662 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
1663 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1664 TEUCHOS_TEST_FOR_EXCEPTION(
1665 !domainMap->isSameAs(*rangeMap), std::runtime_error,
1666 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
1667 "the range Map of the matrix be the same.");
1668 }
1669#endif
1670
1671 // Fetch a (possibly cached) temporary column Map multivector
1672 // X_colMap, and a domain Map view X_domainMap of it. Both have
1673 // constant stride by construction. We know that the domain Map
1674 // must include the column Map, because our Gauss-Seidel kernel
1675 // requires that the row Map, domain Map, and range Map are all
1676 // the same, and that each process owns all of its own diagonal
1677 // entries of the matrix.
1678
1679 RCP<multivector_type> X_colMap;
1680 RCP<multivector_type> X_domainMap;
1681 bool copyBackOutput = false;
1682 if (importer.is_null()) {
1683 X_colMap = Teuchos::rcpFromRef(X);
1684 X_domainMap = Teuchos::rcpFromRef(X);
1685 // Column Map and domain Map are the same, so there are no
1686 // remote entries. Thus, if we are not setting the initial
1687 // guess to zero, we don't have to worry about setting remote
1688 // entries to zero, even though we are not doing an Import in
1689 // this case.
1690 if (ZeroStartingSolution_) {
1691 X_colMap->putScalar(ZERO);
1692 }
1693 // No need to copy back to X at end.
1694 } else { // Column Map and domain Map are _not_ the same.
1695 updateCachedMultiVector(colMap, X.getNumVectors());
1696 X_colMap = cachedMV_;
1697 X_domainMap = X_colMap->offsetViewNonConst(domainMap, 0);
1698
1699 if (ZeroStartingSolution_) {
1700 // No need for an Import, since we're filling with zeros.
1701 X_colMap->putScalar(ZERO);
1702 } else {
1703 // We could just copy X into X_domainMap. However, that
1704 // wastes a copy, because the Import also does a copy (plus
1705 // communication). Since the typical use case for
1706 // Gauss-Seidel is a small number of sweeps (2 is typical), we
1707 // don't want to waste that copy. Thus, we do the Import
1708 // here, and skip the first Import in the first sweep.
1709 // Importing directly from X effects the copy into X_domainMap
1710 // (which is a view of X_colMap).
1711 X_colMap->doImport(X, *importer, Tpetra::INSERT);
1712 }
1713 copyBackOutput = true; // Don't forget to copy back at end.
1714 } // if column and domain Maps are (not) the same
1715
1716 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1717 if (!importer.is_null() && sweep > 0) {
1718 // We already did the first Import for the zeroth sweep above,
1719 // if it was necessary.
1720 X_colMap->doImport(*X_domainMap, *importer, Tpetra::INSERT);
1721 }
1722 // Do local Gauss-Seidel (forward, backward or symmetric)
1723 serialGaussSeidel_->apply(*X_colMap, B, direction);
1724 }
1725
1726 if (copyBackOutput) {
1727 try {
1728 deep_copy(X, *X_domainMap); // Copy result back into X.
1729 } catch (std::exception& e) {
1730 TEUCHOS_TEST_FOR_EXCEPTION(
1731 true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
1732 "threw an exception: "
1733 << e.what());
1734 }
1735 }
1736
1737 // For each column of output, for each sweep over the matrix:
1738 //
1739 // - One + and one * for each matrix entry
1740 // - One / and one + for each row of the matrix
1741 // - If the damping factor is not one: one * for each row of the
1742 // matrix. (It's not fair to count this if the damping factor is
1743 // one, since the implementation could skip it. Whether it does
1744 // or not is the implementation's choice.)
1745
1746 // Floating-point operations due to the damping factor, per matrix
1747 // row, per direction, per columm of output.
1748 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1749 const double numVectors = X.getNumVectors();
1750 const double numGlobalRows = A_->getGlobalNumRows();
1751 const double numGlobalNonzeros = A_->getGlobalNumEntries();
1752 ApplyFlops_ += NumSweeps_ * numVectors *
1753 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1754}
1755
1756template <class MatrixType>
1757void Relaxation<MatrixType>::
1758 ApplyInverseSerialGS_BlockCrsMatrix(const block_crs_matrix_type& A,
1759 const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1760 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1761 Tpetra::ESweepDirection direction) {
1762 using Teuchos::RCP;
1763 using Teuchos::rcp;
1764 using Teuchos::rcpFromRef;
1765 using Tpetra::INSERT;
1766
1767 // FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
1768 //
1769 // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
1770 // multiple right-hand sides, unless the input or output MultiVector
1771 // does not have constant stride. We should check for that case
1772 // here, in case it doesn't work in localGaussSeidel (which is
1773 // entirely possible).
1774 block_multivector_type yBlock(Y, *(A.getGraph()->getDomainMap()), A.getBlockSize());
1775 const block_multivector_type xBlock(X, *(A.getColMap()), A.getBlockSize());
1776
1777 bool performImport = false;
1778 RCP<block_multivector_type> yBlockCol;
1779 if (Importer_.is_null()) {
1780 yBlockCol = rcpFromRef(yBlock);
1781 } else {
1782 if (yBlockColumnPointMap_.is_null() ||
1783 yBlockColumnPointMap_->getNumVectors() != yBlock.getNumVectors() ||
1784 yBlockColumnPointMap_->getBlockSize() != yBlock.getBlockSize()) {
1785 yBlockColumnPointMap_ =
1786 rcp(new block_multivector_type(*(A.getColMap()), A.getBlockSize(),
1787 static_cast<local_ordinal_type>(yBlock.getNumVectors())));
1788 }
1789 yBlockCol = yBlockColumnPointMap_;
1790 if (pointImporter_.is_null()) {
1791 auto srcMap = rcp(new map_type(yBlock.getPointMap()));
1792 auto tgtMap = rcp(new map_type(yBlockCol->getPointMap()));
1793 pointImporter_ = rcp(new import_type(srcMap, tgtMap));
1794 }
1795 performImport = true;
1796 }
1797
1798 multivector_type yBlock_mv;
1799 multivector_type yBlockCol_mv;
1800 RCP<const multivector_type> yBlockColPointDomain;
1801 if (performImport) { // create views (shallow copies)
1802 yBlock_mv = yBlock.getMultiVectorView();
1803 yBlockCol_mv = yBlockCol->getMultiVectorView();
1804 yBlockColPointDomain =
1805 yBlockCol_mv.offsetView(A.getDomainMap(), 0);
1806 }
1807
1808 if (ZeroStartingSolution_) {
1809 yBlockCol->putScalar(STS::zero());
1810 } else if (performImport) {
1811 yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1812 }
1813
1814 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1815 if (performImport && sweep > 0) {
1816 yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1817 }
1818 serialGaussSeidel_->applyBlock(*yBlockCol, xBlock, direction);
1819 if (performImport) {
1820 Tpetra::deep_copy(Y, *yBlockColPointDomain);
1821 }
1822 }
1823}
1824
1825template <class MatrixType>
1826void Relaxation<MatrixType>::
1827 ApplyInverseMTGS_CrsMatrix(
1828 const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& B,
1829 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1830 Tpetra::ESweepDirection direction) const {
1831 using Teuchos::as;
1832 using Teuchos::null;
1833 using Teuchos::RCP;
1834 using Teuchos::rcp;
1835 using Teuchos::rcp_const_cast;
1836 using Teuchos::rcpFromRef;
1837
1838 typedef scalar_type Scalar;
1839
1840 const char prefix[] = "Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1841 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1842
1843 auto crsMat = Details::getCrsMatrix(A_);
1844 TEUCHOS_TEST_FOR_EXCEPTION(crsMat.is_null(), std::logic_error,
1845 "Ifpack2::Relaxation::apply: "
1846 "Multithreaded Gauss-Seidel methods currently only work when the "
1847 "input matrix is a Tpetra::CrsMatrix.");
1848
1849 // Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
1850 TEUCHOS_TEST_FOR_EXCEPTION(!localSmoothingIndices_.is_null(), std::logic_error,
1851 "Ifpack2's implementation of Multithreaded Gauss-Seidel does not "
1852 "implement the case where the user supplies an iteration order. "
1853 "This error used to appear as \"MT GaussSeidel ignores the given "
1854 "order\". "
1855 "I tried to add more explanation, but I didn't implement \"MT "
1856 "GaussSeidel\" [sic]. "
1857 "You'll have to ask the person who did.");
1858
1859 TEUCHOS_TEST_FOR_EXCEPTION(!crsMat->isFillComplete(), std::runtime_error, prefix << "The "
1860 "input CrsMatrix is not fill complete. Please call fillComplete "
1861 "on the matrix, then do setup again, before calling apply(). "
1862 "\"Do setup\" means that if only the matrix's values have changed "
1863 "since last setup, you need only call compute(). If the matrix's "
1864 "structure may also have changed, you must first call initialize(), "
1865 "then call compute(). If you have not set up this preconditioner "
1866 "for this matrix before, you must first call initialize(), then "
1867 "call compute().");
1868 TEUCHOS_TEST_FOR_EXCEPTION(NumSweeps_ < 0, std::logic_error, prefix << ": NumSweeps_ = " << NumSweeps_ << " < 0. Please report this bug to the Ifpack2 "
1869 "developers.");
1870 if (NumSweeps_ == 0) {
1871 return;
1872 }
1873
1874 RCP<const import_type> importer = crsMat->getGraph()->getImporter();
1875 TEUCHOS_TEST_FOR_EXCEPTION(
1876 !crsMat->getGraph()->getExporter().is_null(), std::runtime_error,
1877 "This method's implementation currently requires that the matrix's row, "
1878 "domain, and range Maps be the same. This cannot be the case, because "
1879 "the matrix has a nontrivial Export object.");
1880
1881 RCP<const map_type> domainMap = crsMat->getDomainMap();
1882 RCP<const map_type> rangeMap = crsMat->getRangeMap();
1883 RCP<const map_type> rowMap = crsMat->getGraph()->getRowMap();
1884 RCP<const map_type> colMap = crsMat->getGraph()->getColMap();
1885
1886#ifdef HAVE_IFPACK2_DEBUG
1887 {
1888 // The relation 'isSameAs' is transitive. It's also a
1889 // collective, so we don't have to do a "shared" test for
1890 // exception (i.e., a global reduction on the test value).
1891 TEUCHOS_TEST_FOR_EXCEPTION(
1892 !X.getMap()->isSameAs(*domainMap), std::runtime_error,
1893 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1894 "multivector X be in the domain Map of the matrix.");
1895 TEUCHOS_TEST_FOR_EXCEPTION(
1896 !B.getMap()->isSameAs(*rangeMap), std::runtime_error,
1897 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1898 "B be in the range Map of the matrix.");
1899 TEUCHOS_TEST_FOR_EXCEPTION(
1900 !rowMap->isSameAs(*rangeMap), std::runtime_error,
1901 "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1902 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1903 TEUCHOS_TEST_FOR_EXCEPTION(
1904 !domainMap->isSameAs(*rangeMap), std::runtime_error,
1905 "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1906 "the range Map of the matrix be the same.");
1907 }
1908#else
1909 // Forestall any compiler warnings for unused variables.
1910 (void)rangeMap;
1911 (void)rowMap;
1912#endif // HAVE_IFPACK2_DEBUG
1913
1914 // Fetch a (possibly cached) temporary column Map multivector
1915 // X_colMap, and a domain Map view X_domainMap of it. Both have
1916 // constant stride by construction. We know that the domain Map
1917 // must include the column Map, because our Gauss-Seidel kernel
1918 // requires that the row Map, domain Map, and range Map are all
1919 // the same, and that each process owns all of its own diagonal
1920 // entries of the matrix.
1921
1922 RCP<multivector_type> X_colMap;
1923 RCP<multivector_type> X_domainMap;
1924 bool copyBackOutput = false;
1925 if (importer.is_null()) {
1926 if (X.isConstantStride()) {
1927 X_colMap = rcpFromRef(X);
1928 X_domainMap = rcpFromRef(X);
1929
1930 // Column Map and domain Map are the same, so there are no
1931 // remote entries. Thus, if we are not setting the initial
1932 // guess to zero, we don't have to worry about setting remote
1933 // entries to zero, even though we are not doing an Import in
1934 // this case.
1935 if (ZeroStartingSolution_) {
1936 X_colMap->putScalar(ZERO);
1937 }
1938 // No need to copy back to X at end.
1939 } else {
1940 // We must copy X into a constant stride multivector.
1941 // Just use the cached column Map multivector for that.
1942 // force=true means fill with zeros, so no need to fill
1943 // remote entries (not in domain Map) with zeros.
1944 updateCachedMultiVector(colMap, as<size_t>(X.getNumVectors()));
1945 X_colMap = cachedMV_;
1946 // X_domainMap is always a domain Map view of the column Map
1947 // multivector. In this case, the domain and column Maps are
1948 // the same, so X_domainMap _is_ X_colMap.
1949 X_domainMap = X_colMap;
1950 if (!ZeroStartingSolution_) { // Don't copy if zero initial guess
1951 try {
1952 deep_copy(*X_domainMap, X); // Copy X into constant stride MV
1953 } catch (std::exception& e) {
1954 std::ostringstream os;
1955 os << "Ifpack2::Relaxation::MTGaussSeidel: "
1956 "deep_copy(*X_domainMap, X) threw an exception: "
1957 << e.what() << ".";
1958 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what());
1959 }
1960 }
1961 copyBackOutput = true; // Don't forget to copy back at end.
1962 /*
1963 TPETRA_EFFICIENCY_WARNING(
1964 ! X.isConstantStride (),
1965 std::runtime_error,
1966 "MTGaussSeidel: The current implementation of the Gauss-Seidel "
1967 "kernel requires that X and B both have constant stride. Since X "
1968 "does not have constant stride, we had to make a copy. This is a "
1969 "limitation of the current implementation and not your fault, but we "
1970 "still report it as an efficiency warning for your information.");
1971 */
1972 }
1973 } else { // Column Map and domain Map are _not_ the same.
1974 updateCachedMultiVector(colMap, as<size_t>(X.getNumVectors()));
1975 X_colMap = cachedMV_;
1976
1977 X_domainMap = X_colMap->offsetViewNonConst(domainMap, 0);
1978
1979 if (ZeroStartingSolution_) {
1980 // No need for an Import, since we're filling with zeros.
1981 X_colMap->putScalar(ZERO);
1982 } else {
1983 // We could just copy X into X_domainMap. However, that
1984 // wastes a copy, because the Import also does a copy (plus
1985 // communication). Since the typical use case for
1986 // Gauss-Seidel is a small number of sweeps (2 is typical), we
1987 // don't want to waste that copy. Thus, we do the Import
1988 // here, and skip the first Import in the first sweep.
1989 // Importing directly from X effects the copy into X_domainMap
1990 // (which is a view of X_colMap).
1991 X_colMap->doImport(X, *importer, Tpetra::CombineMode::INSERT);
1992 }
1993 copyBackOutput = true; // Don't forget to copy back at end.
1994 } // if column and domain Maps are (not) the same
1995
1996 // The Gauss-Seidel / SOR kernel expects multivectors of constant
1997 // stride. X_colMap is by construction, but B might not be. If
1998 // it's not, we have to make a copy.
1999 RCP<const multivector_type> B_in;
2000 if (B.isConstantStride()) {
2001 B_in = rcpFromRef(B);
2002 } else {
2003 // Range Map and row Map are the same in this case, so we can
2004 // use the cached row Map multivector to store a constant stride
2005 // copy of B.
2006 RCP<multivector_type> B_in_nonconst = rcp(new multivector_type(rowMap, B.getNumVectors()));
2007 try {
2008 deep_copy(*B_in_nonconst, B);
2009 } catch (std::exception& e) {
2010 std::ostringstream os;
2011 os << "Ifpack2::Relaxation::MTGaussSeidel: "
2012 "deep_copy(*B_in_nonconst, B) threw an exception: "
2013 << e.what() << ".";
2014 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what());
2015 }
2016 B_in = rcp_const_cast<const multivector_type>(B_in_nonconst);
2017
2018 /*
2019 TPETRA_EFFICIENCY_WARNING(
2020 ! B.isConstantStride (),
2021 std::runtime_error,
2022 "MTGaussSeidel: The current implementation requires that B have "
2023 "constant stride. Since B does not have constant stride, we had to "
2024 "copy it into a separate constant-stride multivector. This is a "
2025 "limitation of the current implementation and not your fault, but we "
2026 "still report it as an efficiency warning for your information.");
2027 */
2028 }
2029
2030 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice();
2031
2032 bool update_y_vector = true;
2033 // false as it was done up already, and we dont want to zero it in each sweep.
2034 bool zero_x_vector = false;
2035
2036 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2037 if (!importer.is_null() && sweep > 0) {
2038 // We already did the first Import for the zeroth sweep above,
2039 // if it was necessary.
2040 X_colMap->doImport(*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
2041 }
2042
2043 if (direction == Tpetra::Symmetric) {
2044 KokkosSparse::symmetric_gauss_seidel_apply(mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2045 kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2046 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2047 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2048 zero_x_vector, update_y_vector, DampingFactor_, 1);
2049 } else if (direction == Tpetra::Forward) {
2050 KokkosSparse::forward_sweep_gauss_seidel_apply(mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2051 kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2052 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2053 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2054 zero_x_vector, update_y_vector, DampingFactor_, 1);
2055 } else if (direction == Tpetra::Backward) {
2056 KokkosSparse::backward_sweep_gauss_seidel_apply(mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2057 kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2058 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2059 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2060 zero_x_vector, update_y_vector, DampingFactor_, 1);
2061 } else {
2062 TEUCHOS_TEST_FOR_EXCEPTION(
2063 true, std::invalid_argument,
2064 prefix << "The 'direction' enum does not have any of its valid "
2065 "values: Forward, Backward, or Symmetric.");
2066 }
2067 update_y_vector = false;
2068 }
2069
2070 if (copyBackOutput) {
2071 try {
2072 deep_copy(X, *X_domainMap); // Copy result back into X.
2073 } catch (std::exception& e) {
2074 TEUCHOS_TEST_FOR_EXCEPTION(
2075 true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
2076 "threw an exception: "
2077 << e.what());
2078 }
2079 }
2080
2081 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2082 const double numVectors = as<double>(X.getNumVectors());
2083 const double numGlobalRows = as<double>(A_->getGlobalNumRows());
2084 const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
2085 double ApplyFlops = NumSweeps_ * numVectors *
2086 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2087 if (direction == Tpetra::Symmetric)
2088 ApplyFlops *= 2.0;
2089 ApplyFlops_ += ApplyFlops;
2090}
2091
2092template <class MatrixType>
2094 std::ostringstream os;
2095
2096 // Output is a valid YAML dictionary in flow style. If you don't
2097 // like everything on a single line, you should call describe()
2098 // instead.
2099 os << "\"Ifpack2::Relaxation\": {";
2100
2101 os << "Initialized: " << (isInitialized() ? "true" : "false") << ", "
2102 << "Computed: " << (isComputed() ? "true" : "false") << ", ";
2103
2104 // It's useful to print this instance's relaxation method (Jacobi,
2105 // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info
2106 // than that, call describe() instead.
2107 os << "Type: ";
2108 if (PrecType_ == Ifpack2::Details::JACOBI) {
2109 os << "Jacobi";
2110 } else if (PrecType_ == Ifpack2::Details::GS) {
2111 os << "Gauss-Seidel";
2112 } else if (PrecType_ == Ifpack2::Details::SGS) {
2113 os << "Symmetric Gauss-Seidel";
2114 } else if (PrecType_ == Ifpack2::Details::MTGS) {
2115 os << "MT Gauss-Seidel";
2116 } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2117 os << "MT Symmetric Gauss-Seidel";
2118 } else if (PrecType_ == Ifpack2::Details::GS2) {
2119 os << "Two-stage Gauss-Seidel";
2120 } else if (PrecType_ == Ifpack2::Details::SGS2) {
2121 os << "Two-stage Symmetric Gauss-Seidel";
2122 } else {
2123 os << "INVALID";
2124 }
2125 if (hasBlockCrsMatrix_)
2126 os << ", BlockCrs";
2127
2128 os << ", "
2129 << "sweeps: " << NumSweeps_ << ", "
2130 << "damping factor: " << DampingFactor_ << ", ";
2131
2132 if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2133 os << "\"relaxation: mtgs cluster size\": " << clusterSize_ << ", ";
2134 os << "\"relaxation: long row threshold\": " << longRowThreshold_ << ", ";
2135 os << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << ", ";
2136 os << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2137 switch (mtColoringAlgorithm_) {
2138 case KokkosGraph::COLORING_DEFAULT:
2139 os << "DEFAULT";
2140 break;
2141 case KokkosGraph::COLORING_SERIAL:
2142 os << "SERIAL";
2143 break;
2144 case KokkosGraph::COLORING_VB:
2145 os << "VB";
2146 break;
2147 case KokkosGraph::COLORING_VBBIT:
2148 os << "VBBIT";
2149 break;
2150 case KokkosGraph::COLORING_VBCS:
2151 os << "VBCS";
2152 break;
2153 case KokkosGraph::COLORING_VBD:
2154 os << "VBD";
2155 break;
2156 case KokkosGraph::COLORING_VBDBIT:
2157 os << "VBDBIT";
2158 break;
2159 case KokkosGraph::COLORING_EB:
2160 os << "EB";
2161 break;
2162 default:
2163 os << "*Invalid*";
2164 }
2165 os << ", ";
2166 }
2167
2168 if (PrecType_ == Ifpack2::Details::GS2 ||
2169 PrecType_ == Ifpack2::Details::SGS2) {
2170 os << "outer sweeps: " << NumOuterSweeps_ << ", "
2171 << "inner sweeps: " << NumInnerSweeps_ << ", "
2172 << "inner damping factor: " << InnerDampingFactor_ << ", ";
2173 }
2174
2175 if (DoL1Method_) {
2176 os << "use l1: " << DoL1Method_ << ", "
2177 << "l1 eta: " << L1Eta_ << ", ";
2178 }
2179
2180 if (A_.is_null()) {
2181 os << "Matrix: null";
2182 } else {
2183 os << "Global matrix dimensions: ["
2184 << A_->getGlobalNumRows() << ", " << A_->getGlobalNumCols() << "]"
2185 << ", Global nnz: " << A_->getGlobalNumEntries();
2186 }
2187
2188 os << "}";
2189 return os.str();
2190}
2191
2192template <class MatrixType>
2194 describe(Teuchos::FancyOStream& out,
2195 const Teuchos::EVerbosityLevel verbLevel) const {
2196 using std::endl;
2197 using Teuchos::OSTab;
2198 using Teuchos::TypeNameTraits;
2199 using Teuchos::VERB_DEFAULT;
2200 using Teuchos::VERB_EXTREME;
2201 using Teuchos::VERB_HIGH;
2202 using Teuchos::VERB_LOW;
2203 using Teuchos::VERB_MEDIUM;
2204 using Teuchos::VERB_NONE;
2205
2206 const Teuchos::EVerbosityLevel vl =
2207 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2208
2209 const int myRank = this->getComm()->getRank();
2210
2211 // none: print nothing
2212 // low: print O(1) info from Proc 0
2213 // medium:
2214 // high:
2215 // extreme:
2216
2217 if (vl != VERB_NONE && myRank == 0) {
2218 // Describable interface asks each implementation to start with a tab.
2219 OSTab tab1(out);
2220 // Output is valid YAML; hence the quotes, to protect the colons.
2221 out << "\"Ifpack2::Relaxation\":" << endl;
2222 OSTab tab2(out);
2223 out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name() << "\""
2224 << endl;
2225 if (this->getObjectLabel() != "") {
2226 out << "Label: " << this->getObjectLabel() << endl;
2227 }
2228 out << "Initialized: " << (isInitialized() ? "true" : "false") << endl
2229 << "Computed: " << (isComputed() ? "true" : "false") << endl
2230 << "Parameters: " << endl;
2231 {
2232 OSTab tab3(out);
2233 out << "\"relaxation: type\": ";
2234 if (PrecType_ == Ifpack2::Details::JACOBI) {
2235 out << "Jacobi";
2236 } else if (PrecType_ == Ifpack2::Details::GS) {
2237 out << "Gauss-Seidel";
2238 } else if (PrecType_ == Ifpack2::Details::SGS) {
2239 out << "Symmetric Gauss-Seidel";
2240 } else if (PrecType_ == Ifpack2::Details::MTGS) {
2241 out << "MT Gauss-Seidel";
2242 } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2243 out << "MT Symmetric Gauss-Seidel";
2244 } else if (PrecType_ == Ifpack2::Details::GS2) {
2245 out << "Two-stage Gauss-Seidel";
2246 } else if (PrecType_ == Ifpack2::Details::SGS2) {
2247 out << "Two-stage Symmetric Gauss-Seidel";
2248 } else {
2249 out << "INVALID";
2250 }
2251 // We quote these parameter names because they contain colons.
2252 // YAML uses the colon to distinguish key from value.
2253 out << endl
2254 << "\"relaxation: sweeps\": " << NumSweeps_ << endl
2255 << "\"relaxation: damping factor\": " << DampingFactor_ << endl
2256 << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2257 << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2258 << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2259 << "\"relaxation: use l1\": " << DoL1Method_ << endl
2260 << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
2261 if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2262 out << "\"relaxation: mtgs cluster size\": " << clusterSize_ << endl;
2263 out << "\"relaxation: long row threshold\": " << longRowThreshold_ << endl;
2264 out << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << endl;
2265 out << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2266 switch (mtColoringAlgorithm_) {
2267 case KokkosGraph::COLORING_DEFAULT:
2268 out << "DEFAULT";
2269 break;
2270 case KokkosGraph::COLORING_SERIAL:
2271 out << "SERIAL";
2272 break;
2273 case KokkosGraph::COLORING_VB:
2274 out << "VB";
2275 break;
2276 case KokkosGraph::COLORING_VBBIT:
2277 out << "VBBIT";
2278 break;
2279 case KokkosGraph::COLORING_VBCS:
2280 out << "VBCS";
2281 break;
2282 case KokkosGraph::COLORING_VBD:
2283 out << "VBD";
2284 break;
2285 case KokkosGraph::COLORING_VBDBIT:
2286 out << "VBDBIT";
2287 break;
2288 case KokkosGraph::COLORING_EB:
2289 out << "EB";
2290 break;
2291 default:
2292 out << "*Invalid*";
2293 }
2294 out << endl;
2295 }
2296 if (PrecType_ == Ifpack2::Details::GS2 || PrecType_ == Ifpack2::Details::SGS2) {
2297 out << "\"relaxation: inner damping factor\": " << InnerDampingFactor_ << endl;
2298 out << "\"relaxation: outer sweeps\" : " << NumOuterSweeps_ << endl;
2299 out << "\"relaxation: inner sweeps\" : " << NumInnerSweeps_ << endl;
2300 }
2301 }
2302 out << "Computed quantities:" << endl;
2303 {
2304 OSTab tab3(out);
2305 out << "Global number of rows: " << A_->getGlobalNumRows() << endl
2306 << "Global number of columns: " << A_->getGlobalNumCols() << endl;
2307 }
2308 if (checkDiagEntries_ && isComputed()) {
2309 out << "Properties of input diagonal entries:" << endl;
2310 {
2311 OSTab tab3(out);
2312 out << "Magnitude of minimum-magnitude diagonal entry: "
2313 << globalMinMagDiagEntryMag_ << endl
2314 << "Magnitude of maximum-magnitude diagonal entry: "
2315 << globalMaxMagDiagEntryMag_ << endl
2316 << "Number of diagonal entries with small magnitude: "
2317 << globalNumSmallDiagEntries_ << endl
2318 << "Number of zero diagonal entries: "
2319 << globalNumZeroDiagEntries_ << endl
2320 << "Number of diagonal entries with negative real part: "
2321 << globalNumNegDiagEntries_ << endl
2322 << "Abs 2-norm diff between computed and actual inverse "
2323 << "diagonal: " << globalDiagNormDiff_ << endl;
2324 }
2325 }
2326 if (isComputed()) {
2327 out << "Saved diagonal offsets: "
2328 << (savedDiagOffsets_ ? "true" : "false") << endl;
2329 }
2330 out << "Call counts and total times (in seconds): " << endl;
2331 {
2332 OSTab tab3(out);
2333 out << "initialize: " << endl;
2334 {
2335 OSTab tab4(out);
2336 out << "Call count: " << NumInitialize_ << endl;
2337 out << "Total time: " << InitializeTime_ << endl;
2338 }
2339 out << "compute: " << endl;
2340 {
2341 OSTab tab4(out);
2342 out << "Call count: " << NumCompute_ << endl;
2343 out << "Total time: " << ComputeTime_ << endl;
2344 }
2345 out << "apply: " << endl;
2346 {
2347 OSTab tab4(out);
2348 out << "Call count: " << NumApply_ << endl;
2349 out << "Total time: " << ApplyTime_ << endl;
2350 }
2351 }
2352 }
2353}
2354
2355} // namespace Ifpack2
2356
2357#define IFPACK2_RELAXATION_INSTANT(S, LO, GO, N) \
2358 template class Ifpack2::Relaxation<Tpetra::RowMatrix<S, LO, GO, N>>;
2359
2360#endif // IFPACK2_RELAXATION_DEF_HPP
File for utility functions.
Ifpack2's implementation of Trilinos::Details::LinearSolver interface.
Definition Ifpack2_Details_LinearSolver_decl.hpp:75
Diagonal preconditioner.
Definition Ifpack2_Diagonal_decl.hpp:45
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition Ifpack2_Relaxation_decl.hpp:215
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition Ifpack2_Relaxation_def.hpp:186
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:227
void setParameters(const Teuchos::ParameterList &params)
Set the relaxation / preconditioner parameters.
Definition Ifpack2_Relaxation_def.hpp:438
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:230
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition Ifpack2_Relaxation_def.hpp:446
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition Ifpack2_Relaxation_def.hpp:465
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition Ifpack2_Relaxation_def.hpp:518
int getNumCompute() const
Total number of calls to compute().
Definition Ifpack2_Relaxation_def.hpp:498
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition Ifpack2_Relaxation_def.hpp:457
void compute()
Compute the preconditioner ("numeric setup");.
Definition Ifpack2_Relaxation_def.hpp:964
void initialize()
Initialize the preconditioner ("symbolic setup").
Definition Ifpack2_Relaxation_def.hpp:678
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition Ifpack2_Relaxation_def.hpp:513
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object's attributes to the given output stream.
Definition Ifpack2_Relaxation_def.hpp:2194
std::string description() const
A simple one-line description of this object.
Definition Ifpack2_Relaxation_def.hpp:2093
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition Ifpack2_Relaxation_def.hpp:195
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition Ifpack2_Relaxation_def.hpp:508
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition Ifpack2_Relaxation_def.hpp:166
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:221
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition Ifpack2_Relaxation_def.hpp:545
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:224
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the input matrix to X, returning the result in Y.
Definition Ifpack2_Relaxation_def.hpp:657
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition Ifpack2_Relaxation_decl.hpp:241
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition Ifpack2_Relaxation_decl.hpp:236
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition Ifpack2_Relaxation_def.hpp:533
int getNumApply() const
Total number of calls to apply().
Definition Ifpack2_Relaxation_def.hpp:503
int getNumInitialize() const
Total number of calls to initialize().
Definition Ifpack2_Relaxation_def.hpp:493
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition Ifpack2_Relaxation_def.hpp:523
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition Ifpack2_Relaxation_def.hpp:488
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition Ifpack2_Relaxation_def.hpp:478
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition Ifpack2_Relaxation_def.hpp:528
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40
void getValidParameters(Teuchos::ParameterList &params)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition Ifpack2_Parameters.cpp:18