Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Relaxation_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_RELAXATION_DEF_HPP
11#define IFPACK2_RELAXATION_DEF_HPP
12
13#include "Teuchos_StandardParameterEntryValidators.hpp"
14#include "Teuchos_TimeMonitor.hpp"
15#include "Tpetra_CrsMatrix.hpp"
16#include "Tpetra_BlockCrsMatrix.hpp"
17#include "Tpetra_BlockView.hpp"
18#include "Ifpack2_Utilities.hpp"
19#include "Ifpack2_Details_getCrsMatrix.hpp"
21#include "MatrixMarket_Tpetra.hpp"
22#include "Tpetra_Details_residual.hpp"
23#include <cstdlib>
24#include <memory>
25#include <sstream>
26#include "KokkosSparse_gauss_seidel.hpp"
27
28namespace {
29// Validate that a given int is nonnegative.
30class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
31 public:
32 // Constructor (does nothing).
33 NonnegativeIntValidator() {}
34
35 // ParameterEntryValidator wants this method.
36 Teuchos::ParameterEntryValidator::ValidStringsList validStringValues() const {
37 return Teuchos::null;
38 }
39
40 // Actually validate the parameter's value.
41 void
42 validate(const Teuchos::ParameterEntry& entry,
43 const std::string& paramName,
44 const std::string& sublistName) const {
45 using std::endl;
46 Teuchos::any anyVal = entry.getAny(true);
47 const std::string entryName = entry.getAny(false).typeName();
48
49 TEUCHOS_TEST_FOR_EXCEPTION(
50 anyVal.type() != typeid(int),
51 Teuchos::Exceptions::InvalidParameterType,
52 "Parameter \"" << paramName << "\" in sublist \"" << sublistName
53 << "\" has the wrong type." << endl
54 << "Parameter: " << paramName
55 << endl
56 << "Type specified: " << entryName << endl
57 << "Type required: int" << endl);
58
59 const int val = Teuchos::any_cast<int>(anyVal);
60 TEUCHOS_TEST_FOR_EXCEPTION(
61 val < 0, Teuchos::Exceptions::InvalidParameterValue,
62 "Parameter \"" << paramName << "\" in sublist \"" << sublistName
63 << "\" is negative." << endl
64 << "Parameter: " << paramName
65 << endl
66 << "Value specified: " << val << endl
67 << "Required range: [0, INT_MAX]" << endl);
68 }
69
70 // ParameterEntryValidator wants this method.
71 const std::string getXMLTypeName() const {
72 return "NonnegativeIntValidator";
73 }
74
75 // ParameterEntryValidator wants this method.
76 void
77 printDoc(const std::string& docString,
78 std::ostream& out) const {
79 Teuchos::StrUtils::printLines(out, "# ", docString);
80 out << "#\tValidator Used: " << std::endl;
81 out << "#\t\tNonnegativeIntValidator" << std::endl;
82 }
83};
84
85// A way to get a small positive number (eps() for floating-point
86// types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
87// define it (for example, for integer values).
88template <class Scalar, const bool isOrdinal = Teuchos::ScalarTraits<Scalar>::isOrdinal>
89class SmallTraits {
90 public:
91 // Return eps if Scalar is a floating-point type, else return 1.
92 static const Scalar eps();
93};
94
95// Partial specialization for when Scalar is not a floating-point type.
96template <class Scalar>
97class SmallTraits<Scalar, true> {
98 public:
99 static const Scalar eps() {
100 return Teuchos::ScalarTraits<Scalar>::one();
101 }
102};
103
104// Partial specialization for when Scalar is a floating-point type.
105template <class Scalar>
106class SmallTraits<Scalar, false> {
107 public:
108 static const Scalar eps() {
109 return Teuchos::ScalarTraits<Scalar>::eps();
110 }
111};
112
113// Work-around for GitHub Issue #5269.
114template <class Scalar,
115 const bool isComplex = Teuchos::ScalarTraits<Scalar>::isComplex>
116struct RealTraits {};
117
118template <class Scalar>
119struct RealTraits<Scalar, false> {
120 using val_type = Scalar;
121 using mag_type = Scalar;
122 static KOKKOS_INLINE_FUNCTION mag_type real(const val_type& z) {
123 return z;
124 }
125};
126
127template <class Scalar>
128struct RealTraits<Scalar, true> {
129 using val_type = Scalar;
130 using mag_type = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
131 static KOKKOS_INLINE_FUNCTION mag_type real(const val_type& z) {
132 return KokkosKernels::ArithTraits<val_type>::real(z);
133 }
134};
135
136template <class Scalar>
137KOKKOS_INLINE_FUNCTION typename RealTraits<Scalar>::mag_type
138getRealValue(const Scalar& z) {
139 return RealTraits<Scalar>::real(z);
140}
141
142} // namespace
143
144namespace Ifpack2 {
145
146template <class MatrixType>
147void Relaxation<MatrixType>::
148 updateCachedMultiVector(const Teuchos::RCP<const Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>>& map,
149 size_t numVecs) const {
150 // Allocate a multivector if the cached one isn't perfect. Checking
151 // for map pointer equality is much cheaper than Map::isSameAs.
152 if (cachedMV_.is_null() ||
153 map.get() != cachedMV_->getMap().get() ||
154 cachedMV_->getNumVectors() != numVecs) {
155 using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
156 global_ordinal_type, node_type>;
157 cachedMV_ = Teuchos::rcp(new MV(map, numVecs, false));
158 }
159}
160
161template <class MatrixType>
163 setMatrix(const Teuchos::RCP<const row_matrix_type>& A) {
164 if (A.getRawPtr() != A_.getRawPtr()) { // it's a different matrix
165 Importer_ = Teuchos::null;
166 pointImporter_ = Teuchos::null;
167 Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
168 isInitialized_ = false;
169 IsComputed_ = false;
170 diagOffsets_ = Kokkos::View<size_t*, device_type>();
171 savedDiagOffsets_ = false;
172 hasBlockCrsMatrix_ = false;
173 serialGaussSeidel_ = Teuchos::null;
174 if (!A.is_null()) {
175 IsParallel_ = (A->getRowMap()->getComm()->getSize() > 1);
176 }
177 A_ = A;
178 }
179}
180
181template <class MatrixType>
183 Relaxation(const Teuchos::RCP<const row_matrix_type>& A)
184 : A_(A)
185 , IsParallel_((A.is_null() || A->getRowMap().is_null() || A->getRowMap()->getComm().is_null()) ? false : // a reasonable default if there's no communicator
186 A->getRowMap()->getComm()->getSize() > 1) {
187 this->setObjectLabel("Ifpack2::Relaxation");
188}
189
190template <class MatrixType>
191Teuchos::RCP<const Teuchos::ParameterList>
193 using Teuchos::Array;
194 using Teuchos::ParameterList;
195 using Teuchos::parameterList;
196 using Teuchos::RCP;
197 using Teuchos::rcp;
198 using Teuchos::rcp_const_cast;
199 using Teuchos::rcp_implicit_cast;
200 using Teuchos::setStringToIntegralParameter;
201 typedef Teuchos::ParameterEntryValidator PEV;
202
203 if (validParams_.is_null()) {
204 RCP<ParameterList> pl = parameterList("Ifpack2::Relaxation");
205
206 // Set a validator that automatically converts from the valid
207 // string options to their enum values.
208 Array<std::string> precTypes(8);
209 precTypes[0] = "Jacobi";
210 precTypes[1] = "Gauss-Seidel";
211 precTypes[2] = "Symmetric Gauss-Seidel";
212 precTypes[3] = "MT Gauss-Seidel";
213 precTypes[4] = "MT Symmetric Gauss-Seidel";
214 precTypes[5] = "Richardson";
215 precTypes[6] = "Two-stage Gauss-Seidel";
216 precTypes[7] = "Two-stage Symmetric Gauss-Seidel";
217 Array<Details::RelaxationType> precTypeEnums(8);
218 precTypeEnums[0] = Details::JACOBI;
219 precTypeEnums[1] = Details::GS;
220 precTypeEnums[2] = Details::SGS;
221 precTypeEnums[3] = Details::MTGS;
222 precTypeEnums[4] = Details::MTSGS;
223 precTypeEnums[5] = Details::RICHARDSON;
224 precTypeEnums[6] = Details::GS2;
225 precTypeEnums[7] = Details::SGS2;
226 const std::string defaultPrecType("Jacobi");
227 setStringToIntegralParameter<Details::RelaxationType>("relaxation: type",
228 defaultPrecType, "Relaxation method", precTypes(), precTypeEnums(),
229 pl.getRawPtr());
230
231 const int numSweeps = 1;
232 RCP<PEV> numSweepsValidator =
233 rcp_implicit_cast<PEV>(rcp(new NonnegativeIntValidator));
234 pl->set("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
235 rcp_const_cast<const PEV>(numSweepsValidator));
236
237 // number of 'local' outer sweeps for two-stage GS
238 const int numOuterSweeps = 1;
239 RCP<PEV> numOuterSweepsValidator =
240 rcp_implicit_cast<PEV>(rcp(new NonnegativeIntValidator));
241 pl->set("relaxation: outer sweeps", numOuterSweeps, "Number of outer local relaxation sweeps for two-stage GS",
242 rcp_const_cast<const PEV>(numOuterSweepsValidator));
243 // number of 'local' inner sweeps for two-stage GS
244 const int numInnerSweeps = 1;
245 RCP<PEV> numInnerSweepsValidator =
246 rcp_implicit_cast<PEV>(rcp(new NonnegativeIntValidator));
247 pl->set("relaxation: inner sweeps", numInnerSweeps, "Number of inner local relaxation sweeps for two-stage GS",
248 rcp_const_cast<const PEV>(numInnerSweepsValidator));
249 // specify damping factor for the inner sweeps of two-stage GS
250 const scalar_type innerDampingFactor = STS::one();
251 pl->set("relaxation: inner damping factor", innerDampingFactor, "Damping factor for the inner sweep of two-stage GS");
252 // specify whether to use sptrsv instead of inner-iterations for two-stage GS
253 const bool innerSpTrsv = false;
254 pl->set("relaxation: inner sparse-triangular solve", innerSpTrsv, "Specify whether to use sptrsv instead of JR iterations for two-stage GS");
255 // specify whether to use compact form of recurrence for two-stage GS
256 const bool compactForm = false;
257 pl->set("relaxation: compact form", compactForm, "Specify whether to use compact form of recurrence for two-stage GS");
258
259 const scalar_type dampingFactor = STS::one();
260 pl->set("relaxation: damping factor", dampingFactor);
261
262 const bool zeroStartingSolution = true;
263 pl->set("relaxation: zero starting solution", zeroStartingSolution);
264
265 const bool doBackwardGS = false;
266 pl->set("relaxation: backward mode", doBackwardGS);
267
268 const bool doL1Method = false;
269 pl->set("relaxation: use l1", doL1Method);
270
271 const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
272 (STM::one() + STM::one()); // 1.5
273 pl->set("relaxation: l1 eta", l1eta);
274
275 const scalar_type minDiagonalValue = STS::zero();
276 pl->set("relaxation: min diagonal value", minDiagonalValue);
277
278 const bool fixTinyDiagEntries = false;
279 pl->set("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
280
281 const bool checkDiagEntries = false;
282 pl->set("relaxation: check diagonal entries", checkDiagEntries);
283
284 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
285 pl->set("relaxation: local smoothing indices", localSmoothingIndices);
286
287 const bool is_matrix_structurally_symmetric = false;
288 pl->set("relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
289
290 const bool ifpack2_dump_matrix = false;
291 pl->set("relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
292
293 const int cluster_size = 1;
294 pl->set("relaxation: mtgs cluster size", cluster_size);
295
296 pl->set("relaxation: mtgs coloring algorithm", "Default");
297
298 const int long_row_threshold = 0;
299 pl->set("relaxation: long row threshold", long_row_threshold);
300
301 const bool timer_for_apply = true;
302 pl->set("timer for apply", timer_for_apply);
303
304 validParams_ = rcp_const_cast<const ParameterList>(pl);
305 }
306 return validParams_;
307}
308
309template <class MatrixType>
310void Relaxation<MatrixType>::setParametersImpl(Teuchos::ParameterList& pl) {
311 using Teuchos::getIntegralValue;
312 using Teuchos::getStringValue;
313 using Teuchos::ParameterList;
314 using Teuchos::RCP;
315 typedef scalar_type ST; // just to make code below shorter
316
317 if (pl.isType<double>("relaxation: damping factor")) {
318 // Make sure that ST=complex can run with a damping factor that is
319 // a double.
320 ST df = pl.get<double>("relaxation: damping factor");
321 pl.remove("relaxation: damping factor");
322 pl.set("relaxation: damping factor", df);
323 }
324
325 pl.validateParametersAndSetDefaults(*getValidParameters());
326
327 const Details::RelaxationType precType =
328 getIntegralValue<Details::RelaxationType>(pl, "relaxation: type");
329 const std::string precTypeStr = getStringValue<Details::RelaxationType>(pl, "relaxation: type");
330 // We only access "relaxation: type" using strings in the rest of the code
331 pl.set<std::string>("relaxation: type", precTypeStr);
332 pl.get<std::string>("relaxation: type"); // We need to mark the parameter as "used"
333 const int numSweeps = pl.get<int>("relaxation: sweeps");
334 const ST dampingFactor = pl.get<ST>("relaxation: damping factor");
335 const bool zeroStartSol = pl.get<bool>("relaxation: zero starting solution");
336 const bool doBackwardGS = pl.get<bool>("relaxation: backward mode");
337 const bool doL1Method = pl.get<bool>("relaxation: use l1");
338 const magnitude_type l1Eta = pl.get<magnitude_type>("relaxation: l1 eta");
339 const ST minDiagonalValue = pl.get<ST>("relaxation: min diagonal value");
340 const bool fixTinyDiagEntries = pl.get<bool>("relaxation: fix tiny diagonal entries");
341 const bool checkDiagEntries = pl.get<bool>("relaxation: check diagonal entries");
342 const bool is_matrix_structurally_symmetric = pl.get<bool>("relaxation: symmetric matrix structure");
343 const bool ifpack2_dump_matrix = pl.get<bool>("relaxation: ifpack2 dump matrix");
344 const bool timer_for_apply = pl.get<bool>("timer for apply");
345 int cluster_size = 1;
346 if (pl.isParameter("relaxation: mtgs cluster size")) // optional parameter
347 cluster_size = pl.get<int>("relaxation: mtgs cluster size");
348 int long_row_threshold = 0;
349 if (pl.isParameter("relaxation: long row threshold")) // optional parameter
350 long_row_threshold = pl.get<int>("relaxation: long row threshold");
351 std::string color_algo_name = pl.get<std::string>("relaxation: mtgs coloring algorithm");
352 // convert to lowercase
353 for (char& c : color_algo_name)
354 c = tolower(c);
355 if (color_algo_name == "default")
356 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_DEFAULT;
357 else if (color_algo_name == "serial")
358 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_SERIAL;
359 else if (color_algo_name == "vb")
360 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VB;
361 else if (color_algo_name == "vbbit")
362 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBBIT;
363 else if (color_algo_name == "vbcs")
364 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBCS;
365 else if (color_algo_name == "vbd")
366 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBD;
367 else if (color_algo_name == "vbdbit")
368 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBDBIT;
369 else if (color_algo_name == "eb")
370 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_EB;
371 else {
372 std::ostringstream msg;
373 msg << "Ifpack2::Relaxation: 'relaxation: mtgs coloring algorithm' = '" << color_algo_name << "' is not valid.\n";
374 msg << "Choices (not case sensitive) are: Default, Serial, VB, VBBIT, VBCS, VBD, VBDBIT, EB.";
375 TEUCHOS_TEST_FOR_EXCEPTION(
376 true, std::invalid_argument, msg.str());
377 }
378
379 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type>>("relaxation: local smoothing indices");
380
381 // for Two-stage Gauss-Seidel
382 if (!std::is_same<double, ST>::value && pl.isType<double>("relaxation: inner damping factor")) {
383 // Make sure that ST=complex can run with a damping factor that is
384 // a double.
385 ST df = pl.get<double>("relaxation: inner damping factor");
386 pl.remove("relaxation: inner damping factor");
387 pl.set("relaxation: inner damping factor", df);
388 }
389 // If long row algorithm was requested, make sure non-cluster (point) multicolor Gauss-Seidel (aka MTGS/MTSGS) will be used.
390 if (long_row_threshold > 0) {
391 TEUCHOS_TEST_FOR_EXCEPTION(
392 cluster_size != 1, std::invalid_argument,
393 "Ifpack2::Relaxation: "
394 "Requested long row MTGS/MTSGS algorithm and cluster GS/SGS, but those are not compatible.");
395 TEUCHOS_TEST_FOR_EXCEPTION(
396 precType != Details::RelaxationType::MTGS && precType != Details::RelaxationType::MTSGS,
397 std::invalid_argument,
398 "Ifpack2::Relaxation: "
399 "Requested long row MTGS/MTSGS algorithm, but this is only compatible with preconditioner types "
400 "'MT Gauss-Seidel' and 'MT Symmetric Gauss-Seidel'.");
401 }
402
403 const ST innerDampingFactor = pl.get<ST>("relaxation: inner damping factor");
404 const int numInnerSweeps = pl.get<int>("relaxation: inner sweeps");
405 const int numOuterSweeps = pl.get<int>("relaxation: outer sweeps");
406 const bool innerSpTrsv = pl.get<bool>("relaxation: inner sparse-triangular solve");
407 const bool compactForm = pl.get<bool>("relaxation: compact form");
408
409 // "Commit" the changes, now that we've validated everything.
410 PrecType_ = precType;
411 NumSweeps_ = numSweeps;
412 DampingFactor_ = dampingFactor;
413 ZeroStartingSolution_ = zeroStartSol;
414 DoBackwardGS_ = doBackwardGS;
415 DoL1Method_ = doL1Method;
416 L1Eta_ = l1Eta;
417 MinDiagonalValue_ = minDiagonalValue;
418 fixTinyDiagEntries_ = fixTinyDiagEntries;
419 checkDiagEntries_ = checkDiagEntries;
420 clusterSize_ = cluster_size;
421 longRowThreshold_ = long_row_threshold;
422 is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
423 ifpack2_dump_matrix_ = ifpack2_dump_matrix;
424 localSmoothingIndices_ = localSmoothingIndices;
425 // for Two-stage GS
426 NumInnerSweeps_ = numInnerSweeps;
427 NumOuterSweeps_ = numOuterSweeps;
428 InnerSpTrsv_ = innerSpTrsv;
429 InnerDampingFactor_ = innerDampingFactor;
430 CompactForm_ = compactForm;
431 TimerForApply_ = timer_for_apply;
432}
433
434template <class MatrixType>
435void Relaxation<MatrixType>::setParameters(const Teuchos::ParameterList& pl) {
436 // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
437 // but otherwise, we will get [unused] in pl
438 this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
439}
440
441template <class MatrixType>
442Teuchos::RCP<const Teuchos::Comm<int>>
444 TEUCHOS_TEST_FOR_EXCEPTION(
445 A_.is_null(), std::runtime_error,
446 "Ifpack2::Relaxation::getComm: "
447 "The input matrix A is null. Please call setMatrix() with a nonnull "
448 "input matrix before calling this method.");
449 return A_->getRowMap()->getComm();
450}
451
452template <class MatrixType>
453Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type>
455 return A_;
456}
457
458template <class MatrixType>
459Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
460 typename MatrixType::global_ordinal_type,
461 typename MatrixType::node_type>>
463 TEUCHOS_TEST_FOR_EXCEPTION(
464 A_.is_null(), std::runtime_error,
465 "Ifpack2::Relaxation::getDomainMap: "
466 "The input matrix A is null. Please call setMatrix() with a nonnull "
467 "input matrix before calling this method.");
468 return A_->getDomainMap();
469}
470
471template <class MatrixType>
472Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
473 typename MatrixType::global_ordinal_type,
474 typename MatrixType::node_type>>
476 TEUCHOS_TEST_FOR_EXCEPTION(
477 A_.is_null(), std::runtime_error,
478 "Ifpack2::Relaxation::getRangeMap: "
479 "The input matrix A is null. Please call setMatrix() with a nonnull "
480 "input matrix before calling this method.");
481 return A_->getRangeMap();
482}
483
484template <class MatrixType>
486 return true;
487}
488
489template <class MatrixType>
491 return (NumInitialize_);
492}
493
494template <class MatrixType>
496 return (NumCompute_);
497}
498
499template <class MatrixType>
501 return (NumApply_);
502}
503
504template <class MatrixType>
506 return (InitializeTime_);
507}
508
509template <class MatrixType>
511 return (ComputeTime_);
512}
513
514template <class MatrixType>
516 return (ApplyTime_);
517}
518
519template <class MatrixType>
521 return (ComputeFlops_);
522}
523
524template <class MatrixType>
526 return (ApplyFlops_);
527}
528
529template <class MatrixType>
531 TEUCHOS_TEST_FOR_EXCEPTION(
532 A_.is_null(), std::runtime_error,
533 "Ifpack2::Relaxation::getNodeSmootherComplexity: "
534 "The input matrix A is null. Please call setMatrix() with a nonnull "
535 "input matrix, then call compute(), before calling this method.");
536 // Relaxation methods cost roughly one apply + one diagonal inverse per iteration
537 return A_->getLocalNumRows() + A_->getLocalNumEntries();
538}
539
540template <class MatrixType>
542 apply(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
543 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
544 Teuchos::ETransp /* mode */,
545 scalar_type alpha,
546 scalar_type beta) const {
547 using Teuchos::as;
548 using Teuchos::RCP;
549 using Teuchos::rcp;
550 using Teuchos::rcpFromRef;
551 typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
553 MV;
554 TEUCHOS_TEST_FOR_EXCEPTION(
555 A_.is_null(), std::runtime_error,
556 "Ifpack2::Relaxation::apply: "
557 "The input matrix A is null. Please call setMatrix() with a nonnull "
558 "input matrix, then call compute(), before calling this method.");
559 TEUCHOS_TEST_FOR_EXCEPTION(
560 !isComputed(),
561 std::runtime_error,
562 "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
563 "preconditioner instance before you may call apply(). You may call "
564 "isComputed() to find out if compute() has been called already.");
565 TEUCHOS_TEST_FOR_EXCEPTION(
566 X.getNumVectors() != Y.getNumVectors(),
567 std::runtime_error,
568 "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
569 "X has "
570 << X.getNumVectors() << " columns, but Y has "
571 << Y.getNumVectors() << " columns.");
572 TEUCHOS_TEST_FOR_EXCEPTION(
573 beta != STS::zero(), std::logic_error,
574 "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
575 "implemented.");
576
577 Teuchos::RCP<Teuchos::Time> timer;
578 const std::string timerName("Ifpack2::Relaxation::apply");
579 if (TimerForApply_) {
580 timer = Teuchos::TimeMonitor::lookupCounter(timerName);
581 if (timer.is_null()) {
582 timer = Teuchos::TimeMonitor::getNewCounter(timerName);
583 }
584 }
585
586 Teuchos::Time time = Teuchos::Time(timerName);
587 double startTime = time.wallTime();
588
589 {
590 Teuchos::RCP<Teuchos::TimeMonitor> timeMon;
591 if (TimerForApply_)
592 timeMon = Teuchos::rcp(new Teuchos::TimeMonitor(*timer));
593
594 // Special case: alpha == 0.
595 if (alpha == STS::zero()) {
596 // No floating-point operations, so no need to update a count.
597 Y.putScalar(STS::zero());
598 } else {
599 // If X and Y alias one another, then we need to create an
600 // auxiliary vector, Xcopy (a deep copy of X).
601 RCP<const MV> Xcopy;
602 {
603 if (X.aliases(Y)) {
604 Xcopy = rcp(new MV(X, Teuchos::Copy));
605 } else {
606 Xcopy = rcpFromRef(X);
607 }
608 }
609
610 // Each of the following methods updates the flop count itself.
611 // All implementations handle zeroing out the starting solution
612 // (if necessary) themselves.
613 switch (PrecType_) {
614 case Ifpack2::Details::JACOBI:
615 ApplyInverseJacobi(*Xcopy, Y);
616 break;
617 case Ifpack2::Details::GS:
618 ApplyInverseSerialGS(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
619 break;
620 case Ifpack2::Details::SGS:
621 ApplyInverseSerialGS(*Xcopy, Y, Tpetra::Symmetric);
622 break;
623 case Ifpack2::Details::MTGS:
624 case Ifpack2::Details::GS2:
625 ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
626 break;
627 case Ifpack2::Details::MTSGS:
628 case Ifpack2::Details::SGS2:
629 ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, Tpetra::Symmetric);
630 break;
631 case Ifpack2::Details::RICHARDSON:
632 ApplyInverseRichardson(*Xcopy, Y);
633 break;
634
635 default:
636 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
637 "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
638 << PrecType_ << ". Please report this bug to the Ifpack2 developers.");
639 }
640 if (alpha != STS::one()) {
641 Y.scale(alpha);
642 const double numGlobalRows = as<double>(A_->getGlobalNumRows());
643 const double numVectors = as<double>(Y.getNumVectors());
644 ApplyFlops_ += numGlobalRows * numVectors;
645 }
646 }
647 }
648 ApplyTime_ += (time.wallTime() - startTime);
649 ++NumApply_;
650}
651
652template <class MatrixType>
654 applyMat(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
655 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
656 Teuchos::ETransp mode) const {
657 TEUCHOS_TEST_FOR_EXCEPTION(
658 A_.is_null(), std::runtime_error,
659 "Ifpack2::Relaxation::applyMat: "
660 "The input matrix A is null. Please call setMatrix() with a nonnull "
661 "input matrix, then call compute(), before calling this method.");
662 TEUCHOS_TEST_FOR_EXCEPTION(
663 !isComputed(), std::runtime_error,
664 "Ifpack2::Relaxation::applyMat: "
665 "isComputed() must be true before you may call applyMat(). "
666 "Please call compute() before calling this method.");
667 TEUCHOS_TEST_FOR_EXCEPTION(
668 X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
669 "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors()
670 << " != Y.getNumVectors() = " << Y.getNumVectors() << ".");
671 A_->apply(X, Y, mode);
672}
673
674template <class MatrixType>
676 const char methodName[] = "Ifpack2::Relaxation::initialize";
677
678 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, methodName << ": The "
679 "input matrix A is null. Please call setMatrix() with "
680 "a nonnull input matrix before calling this method.");
681
682 Teuchos::RCP<Teuchos::Time> timer =
683 Teuchos::TimeMonitor::getNewCounter(methodName);
684
685 double startTime = timer->wallTime();
686
687 { // Timing of initialize starts here
688 Teuchos::TimeMonitor timeMon(*timer);
689 isInitialized_ = false;
690
691 {
692 auto rowMap = A_->getRowMap();
693 auto comm = rowMap.is_null() ? Teuchos::null : rowMap->getComm();
694 IsParallel_ = !comm.is_null() && comm->getSize() > 1;
695 }
696
697 // mfh 21 Mar 2013, 07 May 2019: The Import object may be null,
698 // but in that case, the domain and column Maps are the same and
699 // we don't need to Import anyway.
700 //
701 // mfh 07 May 2019: A_->getGraph() might be an
702 // OverlappingRowGraph, which doesn't have an Import object.
703 // However, in that case, the comm should have size 1.
704 Importer_ = IsParallel_ ? A_->getGraph()->getImporter() : Teuchos::null;
705
706 {
707 Teuchos::RCP<const block_crs_matrix_type> A_bcrs =
708 Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(A_);
709 hasBlockCrsMatrix_ = !A_bcrs.is_null();
710 }
711
712 serialGaussSeidel_ = Teuchos::null;
713
714 if (PrecType_ == Details::MTGS || PrecType_ == Details::MTSGS ||
715 PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
716 auto crsMat = Details::getCrsMatrix(A_);
717 TEUCHOS_TEST_FOR_EXCEPTION(crsMat.is_null(), std::logic_error, methodName << ": "
718 "Multithreaded Gauss-Seidel methods currently only work "
719 "when the input matrix is a Tpetra::CrsMatrix.");
720
721 // FIXME (mfh 27 May 2019) Dumping the matrix belongs in
722 // compute, not initialize, since users may change the matrix's
723 // values at any time before calling compute.
724 if (ifpack2_dump_matrix_) {
725 static int sequence_number = 0;
726 const std::string file_name = "Ifpack2_MT_GS_" +
727 std::to_string(sequence_number++) + ".mtx";
728 using writer_type = Tpetra::MatrixMarket::Writer<crs_matrix_type>;
729 writer_type::writeSparseFile(file_name, crsMat);
730 }
731
732 this->mtKernelHandle_ = Teuchos::rcp(new mt_kernel_handle_type());
733 if (mtKernelHandle_->get_gs_handle() == nullptr) {
734 if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2)
735 mtKernelHandle_->create_gs_handle(KokkosSparse::GS_TWOSTAGE);
736 else if (this->clusterSize_ == 1) {
737 mtKernelHandle_->create_gs_handle(KokkosSparse::GS_DEFAULT, this->mtColoringAlgorithm_);
738 mtKernelHandle_->get_point_gs_handle()->set_long_row_threshold(longRowThreshold_);
739 } else
740 mtKernelHandle_->create_gs_handle(KokkosSparse::CLUSTER_DEFAULT, this->clusterSize_, this->mtColoringAlgorithm_);
741 }
742 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice();
743 if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
744 // set parameters for two-stage GS
745 mtKernelHandle_->set_gs_set_num_inner_sweeps(NumInnerSweeps_);
746 mtKernelHandle_->set_gs_set_num_outer_sweeps(NumOuterSweeps_);
747 mtKernelHandle_->set_gs_set_inner_damp_factor(InnerDampingFactor_);
748 mtKernelHandle_->set_gs_twostage(!InnerSpTrsv_, A_->getLocalNumRows());
749 mtKernelHandle_->set_gs_twostage_compact_form(CompactForm_);
750 }
751
752 KokkosSparse::gauss_seidel_symbolic(
753 mtKernelHandle_.getRawPtr(),
754 A_->getLocalNumRows(),
755 A_->getLocalNumCols(),
756 kcsr.graph.row_map,
757 kcsr.graph.entries,
758 is_matrix_structurally_symmetric_);
759 }
760 } // timing of initialize stops here
761
762 InitializeTime_ += (timer->wallTime() - startTime);
763 ++NumInitialize_;
764 isInitialized_ = true;
765}
766
767namespace Impl {
768template <typename BlockDiagView>
769struct InvertDiagBlocks {
770 typedef typename BlockDiagView::size_type Size;
771
772 private:
773 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
774 template <typename View>
775 using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
776 typename View::device_type, Unmanaged>;
777
778 typedef typename BlockDiagView::non_const_value_type Scalar;
779 typedef typename BlockDiagView::device_type Device;
780 typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
781 typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
782
783 UnmanagedView<BlockDiagView> block_diag_;
784 // TODO Use thread team and scratch memory space. In this first
785 // pass, provide workspace for each block.
786 RWrk rwrk_buf_;
787 UnmanagedView<RWrk> rwrk_;
788 IWrk iwrk_buf_;
789 UnmanagedView<IWrk> iwrk_;
790
791 public:
792 InvertDiagBlocks(BlockDiagView& block_diag)
793 : block_diag_(block_diag) {
794 const auto blksz = block_diag.extent(1);
795 Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
796 rwrk_ = rwrk_buf_;
797 Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
798 iwrk_ = iwrk_buf_;
799 }
800
801 KOKKOS_INLINE_FUNCTION
802 void operator()(const Size i, int& jinfo) const {
803 auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
804 auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
805 auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
806 int info = 0;
807 Tpetra::GETF2(D_cur, ipiv, info);
808 if (info) {
809 ++jinfo;
810 return;
811 }
812 Tpetra::GETRI(D_cur, ipiv, work, info);
813 if (info) ++jinfo;
814 }
815};
816} // namespace Impl
817
818template <class MatrixType>
819void Relaxation<MatrixType>::computeBlockCrs() {
820 using Kokkos::ALL;
821 using Teuchos::Array;
822 using Teuchos::ArrayRCP;
823 using Teuchos::ArrayView;
824 using Teuchos::as;
825 using Teuchos::Comm;
826 using Teuchos::RCP;
827 using Teuchos::rcp;
828 using Teuchos::rcp_dynamic_cast;
829 using Teuchos::REDUCE_MAX;
830 using Teuchos::REDUCE_MIN;
831 using Teuchos::REDUCE_SUM;
832 using Teuchos::reduceAll;
833 typedef local_ordinal_type LO;
834
835 const std::string timerName("Ifpack2::Relaxation::computeBlockCrs");
836 Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter(timerName);
837 if (timer.is_null()) {
838 timer = Teuchos::TimeMonitor::getNewCounter(timerName);
839 }
840 double startTime = timer->wallTime();
841 {
842 Teuchos::TimeMonitor timeMon(*timer);
843
844 TEUCHOS_TEST_FOR_EXCEPTION(
845 A_.is_null(), std::runtime_error,
846 "Ifpack2::Relaxation::"
847 "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
848 "with a nonnull input matrix, then call initialize(), before calling "
849 "this method.");
850 auto blockCrsA = rcp_dynamic_cast<const block_crs_matrix_type>(A_);
851 TEUCHOS_TEST_FOR_EXCEPTION(
852 blockCrsA.is_null(), std::logic_error,
853 "Ifpack2::Relaxation::"
854 "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
855 "got this far. Please report this bug to the Ifpack2 developers.");
856
857 const scalar_type one = STS::one();
858
859 // Reset state.
860 IsComputed_ = false;
861
862 const LO lclNumMeshRows =
863 blockCrsA->getCrsGraph().getLocalNumRows();
864 const LO blockSize = blockCrsA->getBlockSize();
865
866 if (!savedDiagOffsets_) {
867 blockDiag_ = block_diag_type(); // clear it before reallocating
868 blockDiag_ = block_diag_type("Ifpack2::Relaxation::blockDiag_",
869 lclNumMeshRows, blockSize, blockSize);
870 if (Teuchos::as<LO>(diagOffsets_.extent(0)) < lclNumMeshRows) {
871 // Clear diagOffsets_ first (by assigning an empty View to it)
872 // to save memory, before reallocating.
873 diagOffsets_ = Kokkos::View<size_t*, device_type>();
874 diagOffsets_ = Kokkos::View<size_t*, device_type>("offsets", lclNumMeshRows);
875 }
876 blockCrsA->getCrsGraph().getLocalDiagOffsets(diagOffsets_);
877 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(diagOffsets_.extent(0)) !=
878 static_cast<size_t>(blockDiag_.extent(0)),
879 std::logic_error, "diagOffsets_.extent(0) = " << diagOffsets_.extent(0) << " != blockDiag_.extent(0) = " << blockDiag_.extent(0) << ". Please report this bug to the Ifpack2 developers.");
880 savedDiagOffsets_ = true;
881 }
882 blockCrsA->getLocalDiagCopy(blockDiag_, diagOffsets_);
883
884 // Use an unmanaged View in this method, so that when we take
885 // subviews of it (to get each diagonal block), we don't have to
886 // touch the reference count. Reference count updates are a
887 // thread scalability bottleneck and have a performance cost even
888 // without using threads.
889 unmanaged_block_diag_type blockDiag = blockDiag_;
890
891 if (DoL1Method_ && IsParallel_) {
892 const scalar_type two = one + one;
893 const size_t maxLength = A_->getLocalMaxNumRowEntries();
894 nonconst_local_inds_host_view_type indices("indices", maxLength);
895 nonconst_values_host_view_type values_("values", maxLength * blockSize * blockSize);
896 size_t numEntries = 0;
897
898 for (LO i = 0; i < lclNumMeshRows; ++i) {
899 // FIXME (mfh 16 Dec 2015) Get views instead of copies.
900 blockCrsA->getLocalRowCopy(i, indices, values_, numEntries);
901 scalar_type* values = reinterpret_cast<scalar_type*>(values_.data());
902
903 auto diagBlock = Kokkos::subview(blockDiag, i, ALL(), ALL());
904 for (LO subRow = 0; subRow < blockSize; ++subRow) {
905 magnitude_type diagonal_boost = STM::zero();
906 for (size_t k = 0; k < numEntries; ++k) {
907 if (indices[k] >= lclNumMeshRows) {
908 const size_t offset = blockSize * blockSize * k + subRow * blockSize;
909 for (LO subCol = 0; subCol < blockSize; ++subCol) {
910 diagonal_boost += STS::magnitude(values[offset + subCol] / two);
911 }
912 }
913 }
914 if (STS::magnitude(diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
915 diagBlock(subRow, subRow) += diagonal_boost;
916 }
917 }
918 }
919 }
920
921 int info = 0;
922 {
923 Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
924 typedef typename unmanaged_block_diag_type::execution_space exec_space;
925 typedef Kokkos::RangePolicy<exec_space, LO> range_type;
926
927 Kokkos::parallel_reduce(range_type(0, lclNumMeshRows), idb, info);
928 // FIXME (mfh 19 May 2017) Different processes may not
929 // necessarily have this error all at the same time, so it would
930 // be better just to preserve a local error state and let users
931 // check.
932 TEUCHOS_TEST_FOR_EXCEPTION(info > 0, std::runtime_error, "GETF2 or GETRI failed on = " << info << " diagonal blocks.");
933 }
934
935 // In a debug build, do an extra test to make sure that all the
936 // factorizations were computed correctly.
938 const int numResults = 2;
939 // Use "max = -min" trick to get min and max in a single all-reduce.
940 int lclResults[2], gblResults[2];
941 lclResults[0] = info;
942 lclResults[1] = -info;
943 gblResults[0] = 0;
944 gblResults[1] = 0;
945 reduceAll<int, int>(*(A_->getGraph()->getComm()), REDUCE_MIN,
946 numResults, lclResults, gblResults);
947 TEUCHOS_TEST_FOR_EXCEPTION(gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
948 "Ifpack2::Relaxation::compute: When processing the input "
949 "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
950 "failed on one or more (MPI) processes.");
951 }
952 serialGaussSeidel_ = rcp(new SerialGaussSeidel(blockCrsA, blockDiag_, localSmoothingIndices_, DampingFactor_));
953 } // end TimeMonitor scope
954
955 ComputeTime_ += (timer->wallTime() - startTime);
956 ++NumCompute_;
957 IsComputed_ = true;
958}
959
960template <class MatrixType>
962 using Teuchos::Array;
963 using Teuchos::ArrayRCP;
964 using Teuchos::ArrayView;
965 using Teuchos::as;
966 using Teuchos::Comm;
967 using Teuchos::RCP;
968 using Teuchos::rcp;
969 using Teuchos::REDUCE_MAX;
970 using Teuchos::REDUCE_MIN;
971 using Teuchos::REDUCE_SUM;
972 using Teuchos::reduceAll;
973 using LO = local_ordinal_type;
974 using vector_type = Tpetra::Vector<scalar_type, local_ordinal_type,
976 using IST = typename vector_type::impl_scalar_type;
977 using KAT = KokkosKernels::ArithTraits<IST>;
978
979 const char methodName[] = "Ifpack2::Relaxation::compute";
980 const scalar_type zero = STS::zero();
981 const scalar_type one = STS::one();
982
983 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, methodName << ": "
984 "The input matrix A is null. Please call setMatrix() with a nonnull "
985 "input matrix, then call initialize(), before calling this method.");
986
987 // We don't count initialization in compute() time.
988 if (!isInitialized()) {
989 initialize();
990 }
991
992 if (hasBlockCrsMatrix_) {
993 computeBlockCrs();
994 return;
995 }
996
997 Teuchos::RCP<Teuchos::Time> timer =
998 Teuchos::TimeMonitor::getNewCounter(methodName);
999 double startTime = timer->wallTime();
1000
1001 { // Timing of compute starts here.
1002 Teuchos::TimeMonitor timeMon(*timer);
1003
1004 // Precompute some quantities for "fixing" zero or tiny diagonal
1005 // entries. We'll only use them if this "fixing" is enabled.
1006 //
1007 // SmallTraits covers for the lack of eps() in
1008 // Teuchos::ScalarTraits when its template parameter is not a
1009 // floating-point type. (Ifpack2 sometimes gets instantiated for
1010 // integer Scalar types.)
1011 IST oneOverMinDiagVal = KAT::one() / static_cast<IST>(SmallTraits<scalar_type>::eps());
1012 if (MinDiagonalValue_ != zero)
1013 oneOverMinDiagVal = KAT::one() / static_cast<IST>(MinDiagonalValue_);
1014
1015 // It's helpful not to have to recompute this magnitude each time.
1016 const magnitude_type minDiagValMag = STS::magnitude(MinDiagonalValue_);
1017
1018 const LO numMyRows = static_cast<LO>(A_->getLocalNumRows());
1019
1020 TEUCHOS_TEST_FOR_EXCEPTION(NumSweeps_ < 0, std::logic_error, methodName << ": NumSweeps_ = " << NumSweeps_ << " < 0. "
1021 "Please report this bug to the Ifpack2 developers.");
1022 IsComputed_ = false;
1023
1024 if (Diagonal_.is_null()) {
1025 // A_->getLocalDiagCopy fills in all Vector entries, even if the
1026 // matrix has no stored entries in the corresponding rows.
1027 Diagonal_ = rcp(new vector_type(A_->getRowMap(), false));
1028 }
1029
1030 if (checkDiagEntries_) {
1031 //
1032 // Check diagonal elements, replace zero elements with the minimum
1033 // diagonal value, and store their inverses. Count the number of
1034 // "small" and zero diagonal entries while we're at it.
1035 //
1036 size_t numSmallDiagEntries = 0; // "small" includes zero
1037 size_t numZeroDiagEntries = 0; // # zero diagonal entries
1038 size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
1039 magnitude_type minMagDiagEntryMag = STM::zero();
1040 magnitude_type maxMagDiagEntryMag = STM::zero();
1041
1042 // FIXME: We are extracting the diagonal more than once. That
1043 // isn't very efficient, but we shouldn't be checking
1044 // diagonal entries at scale anyway.
1045 A_->getLocalDiagCopy(*Diagonal_); // slow path for row matrix
1046 std::unique_ptr<vector_type> origDiag;
1047 origDiag = std::unique_ptr<vector_type>(new vector_type(*Diagonal_, Teuchos::Copy));
1048 auto diag2d = Diagonal_->getLocalViewHost(Tpetra::Access::ReadWrite);
1049 auto diag = Kokkos::subview(diag2d, Kokkos::ALL(), 0);
1050 // As we go, keep track of the diagonal entries with the
1051 // least and greatest magnitude. We could use the trick of
1052 // starting min with +Inf and max with -Inf, but that
1053 // doesn't work if scalar_type is a built-in integer type.
1054 // Thus, we have to start by reading the first diagonal
1055 // entry redundantly.
1056 if (numMyRows != 0) {
1057 const magnitude_type d_0_mag = KAT::abs(diag(0));
1058 minMagDiagEntryMag = d_0_mag;
1059 maxMagDiagEntryMag = d_0_mag;
1060 }
1061
1062 // Go through all the diagonal entries. Compute counts of
1063 // small-magnitude, zero, and negative-real-part entries.
1064 // Invert the diagonal entries that aren't too small. For
1065 // those too small in magnitude, replace them with
1066 // 1/MinDiagonalValue_ (or 1/eps if MinDiagonalValue_
1067 // happens to be zero).
1068 for (LO i = 0; i < numMyRows; ++i) {
1069 const IST d_i = diag(i);
1070 const magnitude_type d_i_mag = KAT::abs(d_i);
1071 // Work-around for GitHub Issue #5269.
1072 // const magnitude_type d_i_real = KAT::real (d_i);
1073 const auto d_i_real = getRealValue(d_i);
1074
1075 // We can't compare complex numbers, but we can compare their
1076 // real parts.
1077 if (d_i_real < STM::zero()) {
1078 ++numNegDiagEntries;
1079 }
1080 if (d_i_mag < minMagDiagEntryMag) {
1081 minMagDiagEntryMag = d_i_mag;
1082 }
1083 if (d_i_mag > maxMagDiagEntryMag) {
1084 maxMagDiagEntryMag = d_i_mag;
1085 }
1086
1087 if (fixTinyDiagEntries_) {
1088 // <= not <, in case minDiagValMag is zero.
1089 if (d_i_mag <= minDiagValMag) {
1090 ++numSmallDiagEntries;
1091 if (d_i_mag == STM::zero()) {
1092 ++numZeroDiagEntries;
1093 }
1094 diag(i) = oneOverMinDiagVal;
1095 } else {
1096 diag(i) = KAT::one() / d_i;
1097 }
1098 } else { // Don't fix zero or tiny diagonal entries.
1099 // <= not <, in case minDiagValMag is zero.
1100 if (d_i_mag <= minDiagValMag) {
1101 ++numSmallDiagEntries;
1102 if (d_i_mag == STM::zero()) {
1103 ++numZeroDiagEntries;
1104 }
1105 }
1106 diag(i) = KAT::one() / d_i;
1107 }
1108 }
1109
1110 // Collect global data about the diagonal entries. Only do this
1111 // (which involves O(1) all-reduces) if the user asked us to do
1112 // the extra work.
1113 //
1114 // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
1115 // zero rows. Fixing this requires one of two options:
1116 //
1117 // 1. Use a custom reduction operation that ignores processes
1118 // with zero diagonal entries.
1119 // 2. Split the communicator, compute all-reduces using the
1120 // subcommunicator over processes that have a nonzero number
1121 // of diagonal entries, and then broadcast from one of those
1122 // processes (if there is one) to the processes in the other
1123 // subcommunicator.
1124 const Comm<int>& comm = *(A_->getRowMap()->getComm());
1125
1126 // Compute global min and max magnitude of entries.
1127 Array<magnitude_type> localVals(2);
1128 localVals[0] = minMagDiagEntryMag;
1129 // (- (min (- x))) is the same as (max x).
1130 localVals[1] = -maxMagDiagEntryMag;
1131 Array<magnitude_type> globalVals(2);
1132 reduceAll<int, magnitude_type>(comm, REDUCE_MIN, 2,
1133 localVals.getRawPtr(),
1134 globalVals.getRawPtr());
1135 globalMinMagDiagEntryMag_ = globalVals[0];
1136 globalMaxMagDiagEntryMag_ = -globalVals[1];
1137
1138 Array<size_t> localCounts(3);
1139 localCounts[0] = numSmallDiagEntries;
1140 localCounts[1] = numZeroDiagEntries;
1141 localCounts[2] = numNegDiagEntries;
1142 Array<size_t> globalCounts(3);
1143 reduceAll<int, size_t>(comm, REDUCE_SUM, 3,
1144 localCounts.getRawPtr(),
1145 globalCounts.getRawPtr());
1146 globalNumSmallDiagEntries_ = globalCounts[0];
1147 globalNumZeroDiagEntries_ = globalCounts[1];
1148 globalNumNegDiagEntries_ = globalCounts[2];
1149
1150 // Compute and save the difference between the computed inverse
1151 // diagonal, and the original diagonal's inverse.
1152 vector_type diff(A_->getRowMap());
1153 diff.reciprocal(*origDiag);
1154 diff.update(-one, *Diagonal_, one);
1155 globalDiagNormDiff_ = diff.norm2();
1156 }
1157
1158 // Extract the diagonal entries. The CrsMatrix graph version is
1159 // faster than the row matrix version for subsequent calls to
1160 // compute(), since it caches the diagonal offsets.
1161
1162 bool debugAgainstSlowPath = false;
1163
1164 auto crsMat = Details::getCrsMatrix(A_);
1165
1166 if (crsMat.get() && crsMat->isFillComplete()) {
1167 // The invDiagKernel object computes diagonal offsets if
1168 // necessary. The "compute" call extracts diagonal enties,
1169 // optionally applies the L1 method and replacement of small
1170 // entries, and then inverts.
1171 if (invDiagKernel_.is_null())
1172 invDiagKernel_ = rcp(new Ifpack2::Details::InverseDiagonalKernel<op_type>(crsMat));
1173 else
1174 invDiagKernel_->setMatrix(crsMat);
1175 invDiagKernel_->compute(*Diagonal_,
1176 DoL1Method_ && IsParallel_, L1Eta_,
1177 fixTinyDiagEntries_, minDiagValMag);
1178 savedDiagOffsets_ = true;
1179
1180 debugAgainstSlowPath = Ifpack2::Details::Behavior::debug();
1181 }
1182
1183 if (crsMat.is_null() || !crsMat->isFillComplete() || debugAgainstSlowPath) {
1184 // We could not call the CrsMatrix version above, or want to
1185 // debug by comparing against the slow path.
1186
1187 // FIXME: The L1 method in this code path runs on host, since we
1188 // don't have device access for row matrices.
1189
1190 RCP<vector_type> Diagonal;
1191 if (debugAgainstSlowPath)
1192 Diagonal = rcp(new vector_type(A_->getRowMap()));
1193 else
1194 Diagonal = Diagonal_;
1195
1196 A_->getLocalDiagCopy(*Diagonal); // slow path for row matrix
1197
1198 // Setup for L1 Methods.
1199 // Here we add half the value of the off-processor entries in the row,
1200 // but only if diagonal isn't sufficiently large.
1201 //
1202 // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
1203 // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM
1204 // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
1205 //
1206 if (DoL1Method_ && IsParallel_) {
1207 const row_matrix_type& A_row = *A_;
1208 auto diag = Diagonal->getLocalViewHost(Tpetra::Access::ReadWrite);
1209 const magnitude_type two = STM::one() + STM::one();
1210 const size_t maxLength = A_row.getLocalMaxNumRowEntries();
1211 nonconst_local_inds_host_view_type indices("indices", maxLength);
1212 nonconst_values_host_view_type values("values", maxLength);
1213 size_t numEntries;
1214
1215 for (LO i = 0; i < numMyRows; ++i) {
1216 A_row.getLocalRowCopy(i, indices, values, numEntries);
1217 magnitude_type diagonal_boost = STM::zero();
1218 for (size_t k = 0; k < numEntries; ++k) {
1219 if (indices[k] >= numMyRows) {
1220 diagonal_boost += STS::magnitude(values[k] / two);
1221 }
1222 }
1223 if (KAT::magnitude(diag(i, 0)) < L1Eta_ * diagonal_boost) {
1224 diag(i, 0) += diagonal_boost;
1225 }
1226 }
1227 }
1228
1229 //
1230 // Invert the matrix's diagonal entries (in Diagonal).
1231 //
1232 if (fixTinyDiagEntries_) {
1233 // Go through all the diagonal entries. Invert those that
1234 // aren't too small in magnitude. For those that are too
1235 // small in magnitude, replace them with oneOverMinDiagVal.
1236 auto localDiag = Diagonal->getLocalViewDevice(Tpetra::Access::ReadWrite);
1237 Kokkos::parallel_for(
1238 Kokkos::RangePolicy<MyExecSpace>(0, localDiag.extent(0)),
1239 KOKKOS_LAMBDA(local_ordinal_type i) {
1240 auto d_i = localDiag(i, 0);
1241 const magnitude_type d_i_mag = KAT::magnitude(d_i);
1242 // <= not <, in case minDiagValMag is zero.
1243 if (d_i_mag <= minDiagValMag) {
1244 d_i = oneOverMinDiagVal;
1245 } else {
1246 // For Stokhos types, operator/ returns an expression
1247 // type. Explicitly convert to IST before returning.
1248 d_i = IST(KAT::one() / d_i);
1249 }
1250 localDiag(i, 0) = d_i;
1251 });
1252 } else { // don't fix tiny or zero diagonal entries
1253 Diagonal->reciprocal(*Diagonal);
1254 }
1255
1256 if (debugAgainstSlowPath) {
1257 // Validate the fast-path diagonal against the slow-path diagonal.
1258 Diagonal->update(STS::one(), *Diagonal_, -STS::one());
1259 const magnitude_type err = Diagonal->normInf();
1260 // The two diagonals should be exactly the same, so their
1261 // difference should be exactly zero.
1262 TEUCHOS_TEST_FOR_EXCEPTION(err > 100 * STM::eps(), std::logic_error, methodName << ": "
1263 << "\"fast-path\" diagonal computation failed. "
1264 "\\|D1 - D2\\|_inf = "
1265 << err << ".");
1266 }
1267 }
1268
1269 // Count floating-point operations of computing the inverse diagonal.
1270 //
1271 // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
1272 if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1273 ComputeFlops_ += 4.0 * numMyRows;
1274 } else {
1275 ComputeFlops_ += numMyRows;
1276 }
1277
1278 if (PrecType_ == Ifpack2::Details::MTGS ||
1279 PrecType_ == Ifpack2::Details::MTSGS ||
1280 PrecType_ == Ifpack2::Details::GS2 ||
1281 PrecType_ == Ifpack2::Details::SGS2) {
1282 // KokkosKernels GaussSeidel Initialization.
1283 using scalar_view_t = typename local_matrix_device_type::values_type;
1284
1285 TEUCHOS_TEST_FOR_EXCEPTION(crsMat.is_null(), std::logic_error, methodName << ": "
1286 "Multithreaded Gauss-Seidel methods currently only work "
1287 "when the input matrix is a Tpetra::CrsMatrix.");
1288 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice();
1289
1290 // TODO BMK: This should be ReadOnly, and KokkosKernels should accept a
1291 // const-valued view for user-provided D^-1. OK for now, Diagonal_ is nonconst.
1292 auto diagView_2d = Diagonal_->getLocalViewDevice(Tpetra::Access::ReadWrite);
1293 scalar_view_t diagView_1d = Kokkos::subview(diagView_2d, Kokkos::ALL(), 0);
1294 KokkosSparse::gauss_seidel_numeric(
1295 mtKernelHandle_.getRawPtr(),
1296 A_->getLocalNumRows(),
1297 A_->getLocalNumCols(),
1298 kcsr.graph.row_map,
1299 kcsr.graph.entries,
1300 kcsr.values,
1301 diagView_1d,
1302 is_matrix_structurally_symmetric_);
1303 } else if (PrecType_ == Ifpack2::Details::GS || PrecType_ == Ifpack2::Details::SGS) {
1304 if (crsMat)
1305 serialGaussSeidel_ = rcp(new SerialGaussSeidel(crsMat, Diagonal_, localSmoothingIndices_, DampingFactor_));
1306 else
1307 serialGaussSeidel_ = rcp(new SerialGaussSeidel(A_, Diagonal_, localSmoothingIndices_, DampingFactor_));
1308 }
1309 } // end TimeMonitor scope
1310
1311 ComputeTime_ += (timer->wallTime() - startTime);
1312 ++NumCompute_;
1313 IsComputed_ = true;
1314}
1315
1316template <class MatrixType>
1318 ApplyInverseRichardson(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1319 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y) const {
1320 using Teuchos::as;
1321 const double numGlobalRows = as<double>(A_->getGlobalNumRows());
1322 const double numVectors = as<double>(X.getNumVectors());
1323 if (ZeroStartingSolution_) {
1324 // For the first Richardson sweep, if we are allowed to assume that
1325 // the initial guess is zero, then Richardson is just alpha times the RHS
1326 // Compute it as Y(i,j) = DampingFactor_ * X(i,j).
1327 Y.scale(DampingFactor_, X);
1328
1329 // Count (global) floating-point operations. Ifpack2 represents
1330 // this as a floating-point number rather than an integer, so that
1331 // overflow (for a very large number of calls, or a very large
1332 // problem) is approximate instead of catastrophic.
1333 double flopUpdate = 0.0;
1334 if (DampingFactor_ == STS::one()) {
1335 // Y(i,j) = X(i,j): one multiply for each entry of Y.
1336 flopUpdate = numGlobalRows * numVectors;
1337 } else {
1338 // Y(i,j) = DampingFactor_ * X(i,j):
1339 // One multiplies per entry of Y.
1340 flopUpdate = numGlobalRows * numVectors;
1341 }
1342 ApplyFlops_ += flopUpdate;
1343 if (NumSweeps_ == 1) {
1344 return;
1345 }
1346 }
1347 // If we were allowed to assume that the starting guess was zero,
1348 // then we have already done the first sweep above.
1349 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1350
1351 // Allocate a multivector if the cached one isn't perfect
1352 updateCachedMultiVector(Y.getMap(), as<size_t>(numVectors));
1353
1354 for (int j = startSweep; j < NumSweeps_; ++j) {
1355 // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1356 Tpetra::Details::residual(*A_, Y, X, *cachedMV_);
1357 Y.update(DampingFactor_, *cachedMV_, STS::one());
1358 }
1359
1360 // For each column of output, for each pass over the matrix:
1361 //
1362 // - One + and one * for each matrix entry
1363 // - One / and one + for each row of the matrix
1364 // - If the damping factor is not one: one * for each row of the
1365 // matrix. (It's not fair to count this if the damping factor is
1366 // one, since the implementation could skip it. Whether it does
1367 // or not is the implementation's choice.)
1368
1369 // Floating-point operations due to the damping factor, per matrix
1370 // row, per direction, per columm of output.
1371 const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
1372 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1373 ApplyFlops_ += as<double>(NumSweeps_ - startSweep) * numVectors *
1374 (2.0 * numGlobalNonzeros + dampingFlops);
1375}
1376
1377template <class MatrixType>
1378void Relaxation<MatrixType>::
1379 ApplyInverseJacobi(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1380 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y) const {
1381 using Teuchos::as;
1382 if (hasBlockCrsMatrix_) {
1383 ApplyInverseJacobi_BlockCrsMatrix(X, Y);
1384 return;
1385 }
1386
1387 const double numGlobalRows = as<double>(A_->getGlobalNumRows());
1388 const double numVectors = as<double>(X.getNumVectors());
1389 if (ZeroStartingSolution_) {
1390 // For the first Jacobi sweep, if we are allowed to assume that
1391 // the initial guess is zero, then Jacobi is just diagonal
1392 // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1393 // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
1394 Y.elementWiseMultiply(DampingFactor_, *Diagonal_, X, STS::zero());
1395
1396 // Count (global) floating-point operations. Ifpack2 represents
1397 // this as a floating-point number rather than an integer, so that
1398 // overflow (for a very large number of calls, or a very large
1399 // problem) is approximate instead of catastrophic.
1400 double flopUpdate = 0.0;
1401 if (DampingFactor_ == STS::one()) {
1402 // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
1403 flopUpdate = numGlobalRows * numVectors;
1404 } else {
1405 // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
1406 // Two multiplies per entry of Y.
1407 flopUpdate = 2.0 * numGlobalRows * numVectors;
1408 }
1409 ApplyFlops_ += flopUpdate;
1410 if (NumSweeps_ == 1) {
1411 return;
1412 }
1413 }
1414 // If we were allowed to assume that the starting guess was zero,
1415 // then we have already done the first sweep above.
1416 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1417
1418 // Allocate a multivector if the cached one isn't perfect
1419 updateCachedMultiVector(Y.getMap(), as<size_t>(numVectors));
1420
1421 for (int j = startSweep; j < NumSweeps_; ++j) {
1422 // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1423 Tpetra::Details::residual(*A_, Y, X, *cachedMV_);
1424 Y.elementWiseMultiply(DampingFactor_, *Diagonal_, *cachedMV_, STS::one());
1425 }
1426
1427 // For each column of output, for each pass over the matrix:
1428 //
1429 // - One + and one * for each matrix entry
1430 // - One / and one + for each row of the matrix
1431 // - If the damping factor is not one: one * for each row of the
1432 // matrix. (It's not fair to count this if the damping factor is
1433 // one, since the implementation could skip it. Whether it does
1434 // or not is the implementation's choice.)
1435
1436 // Floating-point operations due to the damping factor, per matrix
1437 // row, per direction, per columm of output.
1438 const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
1439 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1440 ApplyFlops_ += as<double>(NumSweeps_ - startSweep) * numVectors *
1441 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1442}
1443
1444template <class MatrixType>
1445void Relaxation<MatrixType>::
1446 ApplyInverseJacobi_BlockCrsMatrix(const Tpetra::MultiVector<scalar_type,
1447 local_ordinal_type,
1448 global_ordinal_type,
1449 node_type>& X,
1450 Tpetra::MultiVector<scalar_type,
1451 local_ordinal_type,
1452 global_ordinal_type,
1453 node_type>& Y) const {
1454 using Tpetra::BlockMultiVector;
1455 using BMV = BlockMultiVector<scalar_type, local_ordinal_type,
1456 global_ordinal_type, node_type>;
1457
1458 const block_crs_matrix_type* blockMatConst =
1459 dynamic_cast<const block_crs_matrix_type*>(A_.getRawPtr());
1460 TEUCHOS_TEST_FOR_EXCEPTION(blockMatConst == nullptr, std::logic_error,
1461 "This method should "
1462 "never be called if the matrix A_ is not a BlockCrsMatrix. "
1463 "Please report this bug to the Ifpack2 developers.");
1464 // mfh 23 Jan 2016: Unfortunately, the const cast is necessary.
1465 // This is because applyBlock() is nonconst (more accurate), while
1466 // apply() is const (required by Tpetra::Operator interface, but a
1467 // lie, because it possibly allocates temporary buffers).
1468 block_crs_matrix_type* blockMat =
1469 const_cast<block_crs_matrix_type*>(blockMatConst);
1470
1471 auto meshRowMap = blockMat->getRowMap();
1472 auto meshColMap = blockMat->getColMap();
1473 const local_ordinal_type blockSize = blockMat->getBlockSize();
1474
1475 BMV X_blk(X, *meshColMap, blockSize);
1476 BMV Y_blk(Y, *meshRowMap, blockSize);
1477
1478 if (ZeroStartingSolution_) {
1479 // For the first sweep, if we are allowed to assume that the
1480 // initial guess is zero, then block Jacobi is just block diagonal
1481 // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1482 Y_blk.blockWiseMultiply(DampingFactor_, blockDiag_, X_blk);
1483 if (NumSweeps_ == 1) {
1484 return;
1485 }
1486 }
1487
1488 auto pointRowMap = Y.getMap();
1489 const size_t numVecs = X.getNumVectors();
1490
1491 // We don't need to initialize the result MV, since the sparse
1492 // mat-vec will clobber its contents anyway.
1493 BMV A_times_Y(*meshRowMap, *pointRowMap, blockSize, numVecs);
1494
1495 // If we were allowed to assume that the starting guess was zero,
1496 // then we have already done the first sweep above.
1497 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1498
1499 for (int j = startSweep; j < NumSweeps_; ++j) {
1500 blockMat->applyBlock(Y_blk, A_times_Y);
1501 // Y := Y + \omega D^{-1} (X - A*Y). Use A_times_Y as scratch.
1502 Y_blk.blockJacobiUpdate(DampingFactor_, blockDiag_,
1503 X_blk, A_times_Y, STS::one());
1504 }
1505}
1506
1507template <class MatrixType>
1508void Relaxation<MatrixType>::
1509 ApplyInverseSerialGS(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1510 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1511 Tpetra::ESweepDirection direction) const {
1512 using this_type = Relaxation<MatrixType>;
1513 // The CrsMatrix version is faster, because it can access the sparse
1514 // matrix data directly, rather than by copying out each row's data
1515 // in turn. Thus, we check whether the RowMatrix is really a
1516 // CrsMatrix.
1517 //
1518 // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
1519 // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
1520 // will still be correct if the cast fails, but it will use an
1521 // unoptimized kernel.
1522 auto blockCrsMat = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(A_);
1523 auto crsMat = Details::getCrsMatrix(A_);
1524 if (blockCrsMat.get()) {
1525 const_cast<this_type&>(*this).ApplyInverseSerialGS_BlockCrsMatrix(*blockCrsMat, X, Y, direction);
1526 } else if (crsMat.get()) {
1527 ApplyInverseSerialGS_CrsMatrix(*crsMat, X, Y, direction);
1528 } else {
1529 ApplyInverseSerialGS_RowMatrix(X, Y, direction);
1530 }
1531}
1532
1533template <class MatrixType>
1534void Relaxation<MatrixType>::
1535 ApplyInverseSerialGS_RowMatrix(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1536 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1537 Tpetra::ESweepDirection direction) const {
1538 using Teuchos::Array;
1539 using Teuchos::ArrayRCP;
1540 using Teuchos::ArrayView;
1541 using Teuchos::as;
1542 using Teuchos::RCP;
1543 using Teuchos::rcp;
1544 using Teuchos::rcpFromRef;
1545 typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> MV;
1546
1547 // Tpetra's GS implementation for CrsMatrix handles zeroing out the
1548 // starting multivector itself. The generic RowMatrix version here
1549 // does not, so we have to zero out Y here.
1550 if (ZeroStartingSolution_) {
1551 Y.putScalar(STS::zero());
1552 }
1553
1554 size_t NumVectors = X.getNumVectors();
1555
1556 RCP<MV> Y2;
1557 if (IsParallel_) {
1558 if (Importer_.is_null()) { // domain and column Maps are the same.
1559 updateCachedMultiVector(Y.getMap(), NumVectors);
1560 } else {
1561 updateCachedMultiVector(Importer_->getTargetMap(), NumVectors);
1562 }
1563 Y2 = cachedMV_;
1564 } else {
1565 Y2 = rcpFromRef(Y);
1566 }
1567
1568 for (int j = 0; j < NumSweeps_; ++j) {
1569 // data exchange is here, once per sweep
1570 if (IsParallel_) {
1571 if (Importer_.is_null()) {
1572 // FIXME (mfh 27 May 2019) This doesn't deep copy -- not
1573 // clear if this is correct. Reevaluate at some point.
1574
1575 *Y2 = Y; // just copy, since domain and column Maps are the same
1576 } else {
1577 Y2->doImport(Y, *Importer_, Tpetra::INSERT);
1578 }
1579 }
1580 serialGaussSeidel_->apply(*Y2, X, direction);
1581
1582 // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1583 if (IsParallel_) {
1584 Tpetra::deep_copy(Y, *Y2);
1585 }
1586 }
1587
1588 // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
1589 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1590 const double numVectors = as<double>(X.getNumVectors());
1591 const double numGlobalRows = as<double>(A_->getGlobalNumRows());
1592 const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
1593 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1594 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1595}
1596
1597template <class MatrixType>
1598void Relaxation<MatrixType>::
1599 ApplyInverseSerialGS_CrsMatrix(const crs_matrix_type& A,
1600 const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& B,
1601 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1602 Tpetra::ESweepDirection direction) const {
1603 using Teuchos::null;
1604 using Teuchos::RCP;
1605 using Teuchos::rcp;
1606 using Teuchos::rcp_const_cast;
1607 using Teuchos::rcpFromRef;
1608 typedef scalar_type Scalar;
1609 const char prefix[] = "Ifpack2::Relaxation::SerialGS: ";
1610 const scalar_type ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1611
1612 TEUCHOS_TEST_FOR_EXCEPTION(
1613 !A.isFillComplete(), std::runtime_error,
1614 prefix << "The matrix is not fill complete.");
1615 TEUCHOS_TEST_FOR_EXCEPTION(
1616 NumSweeps_ < 0, std::invalid_argument,
1617 prefix << "The number of sweeps must be nonnegative, "
1618 "but you provided numSweeps = "
1619 << NumSweeps_ << " < 0.");
1620
1621 if (NumSweeps_ == 0) {
1622 return;
1623 }
1624
1625 RCP<const import_type> importer = A.getGraph()->getImporter();
1626
1627 RCP<const map_type> domainMap = A.getDomainMap();
1628 RCP<const map_type> rangeMap = A.getRangeMap();
1629 RCP<const map_type> rowMap = A.getGraph()->getRowMap();
1630 RCP<const map_type> colMap = A.getGraph()->getColMap();
1631
1633 // The relation 'isSameAs' is transitive. It's also a
1634 // collective, so we don't have to do a "shared" test for
1635 // exception (i.e., a global reduction on the test value).
1636 TEUCHOS_TEST_FOR_EXCEPTION(
1637 !X.getMap()->isSameAs(*domainMap), std::runtime_error,
1638 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1639 "multivector X be in the domain Map of the matrix.");
1640 TEUCHOS_TEST_FOR_EXCEPTION(
1641 !B.getMap()->isSameAs(*rangeMap), std::runtime_error,
1642 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1643 "B be in the range Map of the matrix.");
1644 TEUCHOS_TEST_FOR_EXCEPTION(
1645 !Diagonal_->getMap()->isSameAs(*rowMap), std::runtime_error,
1646 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1647 "D be in the row Map of the matrix.");
1648 TEUCHOS_TEST_FOR_EXCEPTION(
1649 !rowMap->isSameAs(*rangeMap), std::runtime_error,
1650 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
1651 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1652 TEUCHOS_TEST_FOR_EXCEPTION(
1653 !domainMap->isSameAs(*rangeMap), std::runtime_error,
1654 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
1655 "the range Map of the matrix be the same.");
1656 }
1657
1658 // Fetch a (possibly cached) temporary column Map multivector
1659 // X_colMap, and a domain Map view X_domainMap of it. Both have
1660 // constant stride by construction. We know that the domain Map
1661 // must include the column Map, because our Gauss-Seidel kernel
1662 // requires that the row Map, domain Map, and range Map are all
1663 // the same, and that each process owns all of its own diagonal
1664 // entries of the matrix.
1665
1666 RCP<multivector_type> X_colMap;
1667 RCP<multivector_type> X_domainMap;
1668 bool copyBackOutput = false;
1669 if (importer.is_null()) {
1670 X_colMap = Teuchos::rcpFromRef(X);
1671 X_domainMap = Teuchos::rcpFromRef(X);
1672 // Column Map and domain Map are the same, so there are no
1673 // remote entries. Thus, if we are not setting the initial
1674 // guess to zero, we don't have to worry about setting remote
1675 // entries to zero, even though we are not doing an Import in
1676 // this case.
1677 if (ZeroStartingSolution_) {
1678 X_colMap->putScalar(ZERO);
1679 }
1680 // No need to copy back to X at end.
1681 } else { // Column Map and domain Map are _not_ the same.
1682 updateCachedMultiVector(colMap, X.getNumVectors());
1683 X_colMap = cachedMV_;
1684 X_domainMap = X_colMap->offsetViewNonConst(domainMap, 0);
1685
1686 if (ZeroStartingSolution_) {
1687 // No need for an Import, since we're filling with zeros.
1688 X_colMap->putScalar(ZERO);
1689 } else {
1690 // We could just copy X into X_domainMap. However, that
1691 // wastes a copy, because the Import also does a copy (plus
1692 // communication). Since the typical use case for
1693 // Gauss-Seidel is a small number of sweeps (2 is typical), we
1694 // don't want to waste that copy. Thus, we do the Import
1695 // here, and skip the first Import in the first sweep.
1696 // Importing directly from X effects the copy into X_domainMap
1697 // (which is a view of X_colMap).
1698 X_colMap->doImport(X, *importer, Tpetra::INSERT);
1699 }
1700 copyBackOutput = true; // Don't forget to copy back at end.
1701 } // if column and domain Maps are (not) the same
1702
1703 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1704 if (!importer.is_null() && sweep > 0) {
1705 // We already did the first Import for the zeroth sweep above,
1706 // if it was necessary.
1707 X_colMap->doImport(*X_domainMap, *importer, Tpetra::INSERT);
1708 }
1709 // Do local Gauss-Seidel (forward, backward or symmetric)
1710 serialGaussSeidel_->apply(*X_colMap, B, direction);
1711 }
1712
1713 if (copyBackOutput) {
1714 try {
1715 deep_copy(X, *X_domainMap); // Copy result back into X.
1716 } catch (std::exception& e) {
1717 TEUCHOS_TEST_FOR_EXCEPTION(
1718 true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
1719 "threw an exception: "
1720 << e.what());
1721 }
1722 }
1723
1724 // For each column of output, for each sweep over the matrix:
1725 //
1726 // - One + and one * for each matrix entry
1727 // - One / and one + for each row of the matrix
1728 // - If the damping factor is not one: one * for each row of the
1729 // matrix. (It's not fair to count this if the damping factor is
1730 // one, since the implementation could skip it. Whether it does
1731 // or not is the implementation's choice.)
1732
1733 // Floating-point operations due to the damping factor, per matrix
1734 // row, per direction, per columm of output.
1735 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1736 const double numVectors = X.getNumVectors();
1737 const double numGlobalRows = A_->getGlobalNumRows();
1738 const double numGlobalNonzeros = A_->getGlobalNumEntries();
1739 ApplyFlops_ += NumSweeps_ * numVectors *
1740 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1741}
1742
1743template <class MatrixType>
1744void Relaxation<MatrixType>::
1745 ApplyInverseSerialGS_BlockCrsMatrix(const block_crs_matrix_type& A,
1746 const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1747 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1748 Tpetra::ESweepDirection direction) {
1749 using Teuchos::RCP;
1750 using Teuchos::rcp;
1751 using Teuchos::rcpFromRef;
1752 using Tpetra::INSERT;
1753
1754 // FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
1755 //
1756 // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
1757 // multiple right-hand sides, unless the input or output MultiVector
1758 // does not have constant stride. We should check for that case
1759 // here, in case it doesn't work in localGaussSeidel (which is
1760 // entirely possible).
1761 block_multivector_type yBlock(Y, *(A.getGraph()->getDomainMap()), A.getBlockSize());
1762 const block_multivector_type xBlock(X, *(A.getColMap()), A.getBlockSize());
1763
1764 bool performImport = false;
1765 RCP<block_multivector_type> yBlockCol;
1766 if (Importer_.is_null()) {
1767 yBlockCol = rcpFromRef(yBlock);
1768 } else {
1769 if (yBlockColumnPointMap_.is_null() ||
1770 yBlockColumnPointMap_->getNumVectors() != yBlock.getNumVectors() ||
1771 yBlockColumnPointMap_->getBlockSize() != yBlock.getBlockSize()) {
1772 yBlockColumnPointMap_ =
1773 rcp(new block_multivector_type(*(A.getColMap()), A.getBlockSize(),
1774 static_cast<local_ordinal_type>(yBlock.getNumVectors())));
1775 }
1776 yBlockCol = yBlockColumnPointMap_;
1777 if (pointImporter_.is_null()) {
1778 auto srcMap = rcp(new map_type(yBlock.getPointMap()));
1779 auto tgtMap = rcp(new map_type(yBlockCol->getPointMap()));
1780 pointImporter_ = rcp(new import_type(srcMap, tgtMap));
1781 }
1782 performImport = true;
1783 }
1784
1785 multivector_type yBlock_mv;
1786 multivector_type yBlockCol_mv;
1787 RCP<const multivector_type> yBlockColPointDomain;
1788 if (performImport) { // create views (shallow copies)
1789 yBlock_mv = yBlock.getMultiVectorView();
1790 yBlockCol_mv = yBlockCol->getMultiVectorView();
1791 yBlockColPointDomain =
1792 yBlockCol_mv.offsetView(A.getDomainMap(), 0);
1793 }
1794
1795 if (ZeroStartingSolution_) {
1796 yBlockCol->putScalar(STS::zero());
1797 } else if (performImport) {
1798 yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1799 }
1800
1801 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1802 if (performImport && sweep > 0) {
1803 yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1804 }
1805 serialGaussSeidel_->applyBlock(*yBlockCol, xBlock, direction);
1806 if (performImport) {
1807 Tpetra::deep_copy(Y, *yBlockColPointDomain);
1808 }
1809 }
1810}
1811
1812template <class MatrixType>
1813void Relaxation<MatrixType>::
1814 ApplyInverseMTGS_CrsMatrix(
1815 const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& B,
1816 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1817 Tpetra::ESweepDirection direction) const {
1818 using Teuchos::as;
1819 using Teuchos::null;
1820 using Teuchos::RCP;
1821 using Teuchos::rcp;
1822 using Teuchos::rcp_const_cast;
1823 using Teuchos::rcpFromRef;
1824
1825 typedef scalar_type Scalar;
1826
1827 const char prefix[] = "Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1828 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1829
1830 auto crsMat = Details::getCrsMatrix(A_);
1831 TEUCHOS_TEST_FOR_EXCEPTION(crsMat.is_null(), std::logic_error,
1832 "Ifpack2::Relaxation::apply: "
1833 "Multithreaded Gauss-Seidel methods currently only work when the "
1834 "input matrix is a Tpetra::CrsMatrix.");
1835
1836 // Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
1837 TEUCHOS_TEST_FOR_EXCEPTION(!localSmoothingIndices_.is_null(), std::logic_error,
1838 "Ifpack2's implementation of Multithreaded Gauss-Seidel does not "
1839 "implement the case where the user supplies an iteration order. "
1840 "This error used to appear as \"MT GaussSeidel ignores the given "
1841 "order\". "
1842 "I tried to add more explanation, but I didn't implement \"MT "
1843 "GaussSeidel\" [sic]. "
1844 "You'll have to ask the person who did.");
1845
1846 TEUCHOS_TEST_FOR_EXCEPTION(!crsMat->isFillComplete(), std::runtime_error, prefix << "The "
1847 "input CrsMatrix is not fill complete. Please call fillComplete "
1848 "on the matrix, then do setup again, before calling apply(). "
1849 "\"Do setup\" means that if only the matrix's values have changed "
1850 "since last setup, you need only call compute(). If the matrix's "
1851 "structure may also have changed, you must first call initialize(), "
1852 "then call compute(). If you have not set up this preconditioner "
1853 "for this matrix before, you must first call initialize(), then "
1854 "call compute().");
1855 TEUCHOS_TEST_FOR_EXCEPTION(NumSweeps_ < 0, std::logic_error, prefix << ": NumSweeps_ = " << NumSweeps_ << " < 0. Please report this bug to the Ifpack2 "
1856 "developers.");
1857 if (NumSweeps_ == 0) {
1858 return;
1859 }
1860
1861 RCP<const import_type> importer = crsMat->getGraph()->getImporter();
1862 TEUCHOS_TEST_FOR_EXCEPTION(
1863 !crsMat->getGraph()->getExporter().is_null(), std::runtime_error,
1864 "This method's implementation currently requires that the matrix's row, "
1865 "domain, and range Maps be the same. This cannot be the case, because "
1866 "the matrix has a nontrivial Export object.");
1867
1868 RCP<const map_type> domainMap = crsMat->getDomainMap();
1869 RCP<const map_type> colMap = crsMat->getGraph()->getColMap();
1870 [[maybe_unused]] RCP<const map_type> rangeMap = crsMat->getRangeMap();
1871 [[maybe_unused]] RCP<const map_type> rowMap = crsMat->getGraph()->getRowMap();
1872
1874 // The relation 'isSameAs' is transitive. It's also a
1875 // collective, so we don't have to do a "shared" test for
1876 // exception (i.e., a global reduction on the test value).
1877 TEUCHOS_TEST_FOR_EXCEPTION(
1878 !X.getMap()->isSameAs(*domainMap), std::runtime_error,
1879 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1880 "multivector X be in the domain Map of the matrix.");
1881 TEUCHOS_TEST_FOR_EXCEPTION(
1882 !B.getMap()->isSameAs(*rangeMap), std::runtime_error,
1883 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1884 "B be in the range Map of the matrix.");
1885 TEUCHOS_TEST_FOR_EXCEPTION(
1886 !rowMap->isSameAs(*rangeMap), std::runtime_error,
1887 "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1888 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1889 TEUCHOS_TEST_FOR_EXCEPTION(
1890 !domainMap->isSameAs(*rangeMap), std::runtime_error,
1891 "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1892 "the range Map of the matrix be the same.");
1893 }
1894
1895 // Fetch a (possibly cached) temporary column Map multivector
1896 // X_colMap, and a domain Map view X_domainMap of it. Both have
1897 // constant stride by construction. We know that the domain Map
1898 // must include the column Map, because our Gauss-Seidel kernel
1899 // requires that the row Map, domain Map, and range Map are all
1900 // the same, and that each process owns all of its own diagonal
1901 // entries of the matrix.
1902
1903 RCP<multivector_type> X_colMap;
1904 RCP<multivector_type> X_domainMap;
1905 bool copyBackOutput = false;
1906 if (importer.is_null()) {
1907 if (X.isConstantStride()) {
1908 X_colMap = rcpFromRef(X);
1909 X_domainMap = rcpFromRef(X);
1910
1911 // Column Map and domain Map are the same, so there are no
1912 // remote entries. Thus, if we are not setting the initial
1913 // guess to zero, we don't have to worry about setting remote
1914 // entries to zero, even though we are not doing an Import in
1915 // this case.
1916 if (ZeroStartingSolution_) {
1917 X_colMap->putScalar(ZERO);
1918 }
1919 // No need to copy back to X at end.
1920 } else {
1921 // We must copy X into a constant stride multivector.
1922 // Just use the cached column Map multivector for that.
1923 // force=true means fill with zeros, so no need to fill
1924 // remote entries (not in domain Map) with zeros.
1925 updateCachedMultiVector(colMap, as<size_t>(X.getNumVectors()));
1926 X_colMap = cachedMV_;
1927 // X_domainMap is always a domain Map view of the column Map
1928 // multivector. In this case, the domain and column Maps are
1929 // the same, so X_domainMap _is_ X_colMap.
1930 X_domainMap = X_colMap;
1931 if (!ZeroStartingSolution_) { // Don't copy if zero initial guess
1932 try {
1933 deep_copy(*X_domainMap, X); // Copy X into constant stride MV
1934 } catch (std::exception& e) {
1935 std::ostringstream os;
1936 os << "Ifpack2::Relaxation::MTGaussSeidel: "
1937 "deep_copy(*X_domainMap, X) threw an exception: "
1938 << e.what() << ".";
1939 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what());
1940 }
1941 }
1942 copyBackOutput = true; // Don't forget to copy back at end.
1943 /*
1944 TPETRA_EFFICIENCY_WARNING(
1945 ! X.isConstantStride (),
1946 std::runtime_error,
1947 "MTGaussSeidel: The current implementation of the Gauss-Seidel "
1948 "kernel requires that X and B both have constant stride. Since X "
1949 "does not have constant stride, we had to make a copy. This is a "
1950 "limitation of the current implementation and not your fault, but we "
1951 "still report it as an efficiency warning for your information.");
1952 */
1953 }
1954 } else { // Column Map and domain Map are _not_ the same.
1955 updateCachedMultiVector(colMap, as<size_t>(X.getNumVectors()));
1956 X_colMap = cachedMV_;
1957
1958 X_domainMap = X_colMap->offsetViewNonConst(domainMap, 0);
1959
1960 if (ZeroStartingSolution_) {
1961 // No need for an Import, since we're filling with zeros.
1962 X_colMap->putScalar(ZERO);
1963 } else {
1964 // We could just copy X into X_domainMap. However, that
1965 // wastes a copy, because the Import also does a copy (plus
1966 // communication). Since the typical use case for
1967 // Gauss-Seidel is a small number of sweeps (2 is typical), we
1968 // don't want to waste that copy. Thus, we do the Import
1969 // here, and skip the first Import in the first sweep.
1970 // Importing directly from X effects the copy into X_domainMap
1971 // (which is a view of X_colMap).
1972 X_colMap->doImport(X, *importer, Tpetra::CombineMode::INSERT);
1973 }
1974 copyBackOutput = true; // Don't forget to copy back at end.
1975 } // if column and domain Maps are (not) the same
1976
1977 // The Gauss-Seidel / SOR kernel expects multivectors of constant
1978 // stride. X_colMap is by construction, but B might not be. If
1979 // it's not, we have to make a copy.
1980 RCP<const multivector_type> B_in;
1981 if (B.isConstantStride()) {
1982 B_in = rcpFromRef(B);
1983 } else {
1984 // Range Map and row Map are the same in this case, so we can
1985 // use the cached row Map multivector to store a constant stride
1986 // copy of B.
1987 RCP<multivector_type> B_in_nonconst = rcp(new multivector_type(rowMap, B.getNumVectors()));
1988 try {
1989 deep_copy(*B_in_nonconst, B);
1990 } catch (std::exception& e) {
1991 std::ostringstream os;
1992 os << "Ifpack2::Relaxation::MTGaussSeidel: "
1993 "deep_copy(*B_in_nonconst, B) threw an exception: "
1994 << e.what() << ".";
1995 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what());
1996 }
1997 B_in = rcp_const_cast<const multivector_type>(B_in_nonconst);
1998
1999 /*
2000 TPETRA_EFFICIENCY_WARNING(
2001 ! B.isConstantStride (),
2002 std::runtime_error,
2003 "MTGaussSeidel: The current implementation requires that B have "
2004 "constant stride. Since B does not have constant stride, we had to "
2005 "copy it into a separate constant-stride multivector. This is a "
2006 "limitation of the current implementation and not your fault, but we "
2007 "still report it as an efficiency warning for your information.");
2008 */
2009 }
2010
2011 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice();
2012
2013 bool update_y_vector = true;
2014 // false as it was done up already, and we dont want to zero it in each sweep.
2015 bool zero_x_vector = false;
2016
2017 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2018 if (!importer.is_null() && sweep > 0) {
2019 // We already did the first Import for the zeroth sweep above,
2020 // if it was necessary.
2021 X_colMap->doImport(*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
2022 }
2023
2024 if (direction == Tpetra::Symmetric) {
2025 KokkosSparse::symmetric_gauss_seidel_apply(mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2026 kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2027 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2028 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2029 zero_x_vector, update_y_vector, DampingFactor_, 1);
2030 } else if (direction == Tpetra::Forward) {
2031 KokkosSparse::forward_sweep_gauss_seidel_apply(mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2032 kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2033 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2034 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2035 zero_x_vector, update_y_vector, DampingFactor_, 1);
2036 } else if (direction == Tpetra::Backward) {
2037 KokkosSparse::backward_sweep_gauss_seidel_apply(mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2038 kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2039 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2040 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2041 zero_x_vector, update_y_vector, DampingFactor_, 1);
2042 } else {
2043 TEUCHOS_TEST_FOR_EXCEPTION(
2044 true, std::invalid_argument,
2045 prefix << "The 'direction' enum does not have any of its valid "
2046 "values: Forward, Backward, or Symmetric.");
2047 }
2048 update_y_vector = false;
2049 }
2050
2051 if (copyBackOutput) {
2052 try {
2053 deep_copy(X, *X_domainMap); // Copy result back into X.
2054 } catch (std::exception& e) {
2055 TEUCHOS_TEST_FOR_EXCEPTION(
2056 true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
2057 "threw an exception: "
2058 << e.what());
2059 }
2060 }
2061
2062 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2063 const double numVectors = as<double>(X.getNumVectors());
2064 const double numGlobalRows = as<double>(A_->getGlobalNumRows());
2065 const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
2066 double ApplyFlops = NumSweeps_ * numVectors *
2067 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2068 if (direction == Tpetra::Symmetric)
2069 ApplyFlops *= 2.0;
2070 ApplyFlops_ += ApplyFlops;
2071}
2072
2073template <class MatrixType>
2075 std::ostringstream os;
2076
2077 // Output is a valid YAML dictionary in flow style. If you don't
2078 // like everything on a single line, you should call describe()
2079 // instead.
2080 os << "\"Ifpack2::Relaxation\": {";
2081
2082 os << "Initialized: " << (isInitialized() ? "true" : "false") << ", "
2083 << "Computed: " << (isComputed() ? "true" : "false") << ", ";
2084
2085 // It's useful to print this instance's relaxation method (Jacobi,
2086 // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info
2087 // than that, call describe() instead.
2088 os << "Type: ";
2089 if (PrecType_ == Ifpack2::Details::JACOBI) {
2090 os << "Jacobi";
2091 } else if (PrecType_ == Ifpack2::Details::GS) {
2092 os << "Gauss-Seidel";
2093 } else if (PrecType_ == Ifpack2::Details::SGS) {
2094 os << "Symmetric Gauss-Seidel";
2095 } else if (PrecType_ == Ifpack2::Details::MTGS) {
2096 os << "MT Gauss-Seidel";
2097 } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2098 os << "MT Symmetric Gauss-Seidel";
2099 } else if (PrecType_ == Ifpack2::Details::GS2) {
2100 os << "Two-stage Gauss-Seidel";
2101 } else if (PrecType_ == Ifpack2::Details::SGS2) {
2102 os << "Two-stage Symmetric Gauss-Seidel";
2103 } else {
2104 os << "INVALID";
2105 }
2106 if (hasBlockCrsMatrix_)
2107 os << ", BlockCrs";
2108
2109 os << ", "
2110 << "sweeps: " << NumSweeps_ << ", "
2111 << "damping factor: " << DampingFactor_ << ", ";
2112
2113 if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2114 os << "\"relaxation: mtgs cluster size\": " << clusterSize_ << ", ";
2115 os << "\"relaxation: long row threshold\": " << longRowThreshold_ << ", ";
2116 os << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << ", ";
2117 os << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2118 switch (mtColoringAlgorithm_) {
2119 case KokkosGraph::COLORING_DEFAULT:
2120 os << "DEFAULT";
2121 break;
2122 case KokkosGraph::COLORING_SERIAL:
2123 os << "SERIAL";
2124 break;
2125 case KokkosGraph::COLORING_VB:
2126 os << "VB";
2127 break;
2128 case KokkosGraph::COLORING_VBBIT:
2129 os << "VBBIT";
2130 break;
2131 case KokkosGraph::COLORING_VBCS:
2132 os << "VBCS";
2133 break;
2134 case KokkosGraph::COLORING_VBD:
2135 os << "VBD";
2136 break;
2137 case KokkosGraph::COLORING_VBDBIT:
2138 os << "VBDBIT";
2139 break;
2140 case KokkosGraph::COLORING_EB:
2141 os << "EB";
2142 break;
2143 default:
2144 os << "*Invalid*";
2145 }
2146 os << ", ";
2147 }
2148
2149 if (PrecType_ == Ifpack2::Details::GS2 ||
2150 PrecType_ == Ifpack2::Details::SGS2) {
2151 os << "outer sweeps: " << NumOuterSweeps_ << ", "
2152 << "inner sweeps: " << NumInnerSweeps_ << ", "
2153 << "inner damping factor: " << InnerDampingFactor_ << ", ";
2154 }
2155
2156 if (DoL1Method_) {
2157 os << "use l1: " << DoL1Method_ << ", "
2158 << "l1 eta: " << L1Eta_ << ", ";
2159 }
2160
2161 if (A_.is_null()) {
2162 os << "Matrix: null";
2163 } else {
2164 os << "Global matrix dimensions: ["
2165 << A_->getGlobalNumRows() << ", " << A_->getGlobalNumCols() << "]"
2166 << ", Global nnz: " << A_->getGlobalNumEntries();
2167 }
2168
2169 os << "}";
2170 return os.str();
2171}
2172
2173template <class MatrixType>
2175 describe(Teuchos::FancyOStream& out,
2176 const Teuchos::EVerbosityLevel verbLevel) const {
2177 using std::endl;
2178 using Teuchos::OSTab;
2179 using Teuchos::TypeNameTraits;
2180 using Teuchos::VERB_DEFAULT;
2181 using Teuchos::VERB_EXTREME;
2182 using Teuchos::VERB_HIGH;
2183 using Teuchos::VERB_LOW;
2184 using Teuchos::VERB_MEDIUM;
2185 using Teuchos::VERB_NONE;
2186
2187 const Teuchos::EVerbosityLevel vl =
2188 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2189
2190 const int myRank = this->getComm()->getRank();
2191
2192 // none: print nothing
2193 // low: print O(1) info from Proc 0
2194 // medium:
2195 // high:
2196 // extreme:
2197
2198 if (vl != VERB_NONE && myRank == 0) {
2199 // Describable interface asks each implementation to start with a tab.
2200 OSTab tab1(out);
2201 // Output is valid YAML; hence the quotes, to protect the colons.
2202 out << "\"Ifpack2::Relaxation\":" << endl;
2203 OSTab tab2(out);
2204 out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name() << "\""
2205 << endl;
2206 if (this->getObjectLabel() != "") {
2207 out << "Label: " << this->getObjectLabel() << endl;
2208 }
2209 out << "Initialized: " << (isInitialized() ? "true" : "false") << endl
2210 << "Computed: " << (isComputed() ? "true" : "false") << endl
2211 << "Parameters: " << endl;
2212 {
2213 OSTab tab3(out);
2214 out << "\"relaxation: type\": ";
2215 if (PrecType_ == Ifpack2::Details::JACOBI) {
2216 out << "Jacobi";
2217 } else if (PrecType_ == Ifpack2::Details::GS) {
2218 out << "Gauss-Seidel";
2219 } else if (PrecType_ == Ifpack2::Details::SGS) {
2220 out << "Symmetric Gauss-Seidel";
2221 } else if (PrecType_ == Ifpack2::Details::MTGS) {
2222 out << "MT Gauss-Seidel";
2223 } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2224 out << "MT Symmetric Gauss-Seidel";
2225 } else if (PrecType_ == Ifpack2::Details::GS2) {
2226 out << "Two-stage Gauss-Seidel";
2227 } else if (PrecType_ == Ifpack2::Details::SGS2) {
2228 out << "Two-stage Symmetric Gauss-Seidel";
2229 } else {
2230 out << "INVALID";
2231 }
2232 // We quote these parameter names because they contain colons.
2233 // YAML uses the colon to distinguish key from value.
2234 out << endl
2235 << "\"relaxation: sweeps\": " << NumSweeps_ << endl
2236 << "\"relaxation: damping factor\": " << DampingFactor_ << endl
2237 << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2238 << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2239 << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2240 << "\"relaxation: use l1\": " << DoL1Method_ << endl
2241 << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
2242 if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2243 out << "\"relaxation: mtgs cluster size\": " << clusterSize_ << endl;
2244 out << "\"relaxation: long row threshold\": " << longRowThreshold_ << endl;
2245 out << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << endl;
2246 out << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2247 switch (mtColoringAlgorithm_) {
2248 case KokkosGraph::COLORING_DEFAULT:
2249 out << "DEFAULT";
2250 break;
2251 case KokkosGraph::COLORING_SERIAL:
2252 out << "SERIAL";
2253 break;
2254 case KokkosGraph::COLORING_VB:
2255 out << "VB";
2256 break;
2257 case KokkosGraph::COLORING_VBBIT:
2258 out << "VBBIT";
2259 break;
2260 case KokkosGraph::COLORING_VBCS:
2261 out << "VBCS";
2262 break;
2263 case KokkosGraph::COLORING_VBD:
2264 out << "VBD";
2265 break;
2266 case KokkosGraph::COLORING_VBDBIT:
2267 out << "VBDBIT";
2268 break;
2269 case KokkosGraph::COLORING_EB:
2270 out << "EB";
2271 break;
2272 default:
2273 out << "*Invalid*";
2274 }
2275 out << endl;
2276 }
2277 if (PrecType_ == Ifpack2::Details::GS2 || PrecType_ == Ifpack2::Details::SGS2) {
2278 out << "\"relaxation: inner damping factor\": " << InnerDampingFactor_ << endl;
2279 out << "\"relaxation: outer sweeps\" : " << NumOuterSweeps_ << endl;
2280 out << "\"relaxation: inner sweeps\" : " << NumInnerSweeps_ << endl;
2281 }
2282 }
2283 out << "Computed quantities:" << endl;
2284 {
2285 OSTab tab3(out);
2286 out << "Global number of rows: " << A_->getGlobalNumRows() << endl
2287 << "Global number of columns: " << A_->getGlobalNumCols() << endl;
2288 }
2289 if (checkDiagEntries_ && isComputed()) {
2290 out << "Properties of input diagonal entries:" << endl;
2291 {
2292 OSTab tab3(out);
2293 out << "Magnitude of minimum-magnitude diagonal entry: "
2294 << globalMinMagDiagEntryMag_ << endl
2295 << "Magnitude of maximum-magnitude diagonal entry: "
2296 << globalMaxMagDiagEntryMag_ << endl
2297 << "Number of diagonal entries with small magnitude: "
2298 << globalNumSmallDiagEntries_ << endl
2299 << "Number of zero diagonal entries: "
2300 << globalNumZeroDiagEntries_ << endl
2301 << "Number of diagonal entries with negative real part: "
2302 << globalNumNegDiagEntries_ << endl
2303 << "Abs 2-norm diff between computed and actual inverse "
2304 << "diagonal: " << globalDiagNormDiff_ << endl;
2305 }
2306 }
2307 if (isComputed()) {
2308 out << "Saved diagonal offsets: "
2309 << (savedDiagOffsets_ ? "true" : "false") << endl;
2310 }
2311 out << "Call counts and total times (in seconds): " << endl;
2312 {
2313 OSTab tab3(out);
2314 out << "initialize: " << endl;
2315 {
2316 OSTab tab4(out);
2317 out << "Call count: " << NumInitialize_ << endl;
2318 out << "Total time: " << InitializeTime_ << endl;
2319 }
2320 out << "compute: " << endl;
2321 {
2322 OSTab tab4(out);
2323 out << "Call count: " << NumCompute_ << endl;
2324 out << "Total time: " << ComputeTime_ << endl;
2325 }
2326 out << "apply: " << endl;
2327 {
2328 OSTab tab4(out);
2329 out << "Call count: " << NumApply_ << endl;
2330 out << "Total time: " << ApplyTime_ << endl;
2331 }
2332 }
2333 }
2334}
2335
2336} // namespace Ifpack2
2337
2338#define IFPACK2_RELAXATION_INSTANT(S, LO, GO, N) \
2339 template class Ifpack2::Relaxation<Tpetra::RowMatrix<S, LO, GO, N>>;
2340
2341#endif // IFPACK2_RELAXATION_DEF_HPP
Declaration of Ifpack2::Details::Behavior, a class that describes Ifpack2's run-time behavior.
File for utility functions.
static bool debug()
Whether Ifpack2 is in debug mode.
Definition Ifpack2_Details_Behavior.cpp:31
Ifpack2's implementation of Trilinos::Details::LinearSolver interface.
Definition Ifpack2_Details_LinearSolver_decl.hpp:75
Diagonal preconditioner.
Definition Ifpack2_Diagonal_decl.hpp:45
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition Ifpack2_Relaxation_decl.hpp:215
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition Ifpack2_Relaxation_def.hpp:183
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:227
void setParameters(const Teuchos::ParameterList &params)
Set the relaxation / preconditioner parameters.
Definition Ifpack2_Relaxation_def.hpp:435
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:230
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition Ifpack2_Relaxation_def.hpp:443
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition Ifpack2_Relaxation_def.hpp:462
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition Ifpack2_Relaxation_def.hpp:515
int getNumCompute() const
Total number of calls to compute().
Definition Ifpack2_Relaxation_def.hpp:495
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition Ifpack2_Relaxation_def.hpp:454
void compute()
Compute the preconditioner ("numeric setup");.
Definition Ifpack2_Relaxation_def.hpp:961
void initialize()
Initialize the preconditioner ("symbolic setup").
Definition Ifpack2_Relaxation_def.hpp:675
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition Ifpack2_Relaxation_def.hpp:510
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object's attributes to the given output stream.
Definition Ifpack2_Relaxation_def.hpp:2175
std::string description() const
A simple one-line description of this object.
Definition Ifpack2_Relaxation_def.hpp:2074
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition Ifpack2_Relaxation_def.hpp:192
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition Ifpack2_Relaxation_def.hpp:505
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition Ifpack2_Relaxation_def.hpp:163
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:221
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition Ifpack2_Relaxation_def.hpp:542
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:224
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the input matrix to X, returning the result in Y.
Definition Ifpack2_Relaxation_def.hpp:654
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition Ifpack2_Relaxation_decl.hpp:241
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition Ifpack2_Relaxation_decl.hpp:236
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition Ifpack2_Relaxation_def.hpp:530
int getNumApply() const
Total number of calls to apply().
Definition Ifpack2_Relaxation_def.hpp:500
int getNumInitialize() const
Total number of calls to initialize().
Definition Ifpack2_Relaxation_def.hpp:490
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition Ifpack2_Relaxation_def.hpp:520
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition Ifpack2_Relaxation_def.hpp:485
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition Ifpack2_Relaxation_def.hpp:475
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition Ifpack2_Relaxation_def.hpp:525
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40
void getValidParameters(Teuchos::ParameterList &params)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition Ifpack2_Parameters.cpp:18