Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_LocalSparseTriangularSolver_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
11#define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
12
13#include <sstream> // ostringstream
14#include <stdexcept> // runtime_error
15
16#include "Ifpack2_LocalSparseTriangularSolver_decl.hpp"
17#include "Tpetra_CrsMatrix.hpp"
18#include "Tpetra_Core.hpp"
19#include "Teuchos_StandardParameterEntryValidators.hpp"
20#include "Tpetra_Details_determineLocalTriangularStructure.hpp"
21#include "KokkosSparse_trsv.hpp"
22
23#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
24#include "shylu_hts.hpp"
25#endif
26
27namespace Ifpack2 {
28
29namespace Details {
30
31#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
32
33inline void cusparse_error_throw(cusparseStatus_t cusparseStatus, const char* name,
34 const char* file, const int line) {
35 std::ostringstream out;
36#if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION)
37 out << name << " error( " << cusparseGetErrorName(cusparseStatus) << "): " << cusparseGetErrorString(cusparseStatus);
38#else
39 out << name << " error( ";
40 switch (cusparseStatus) {
41 case CUSPARSE_STATUS_NOT_INITIALIZED:
42 out << "CUSPARSE_STATUS_NOT_INITIALIZED): cusparse handle was not "
43 "created correctly.";
44 break;
45 case CUSPARSE_STATUS_ALLOC_FAILED:
46 out << "CUSPARSE_STATUS_ALLOC_FAILED): you might tried to allocate too "
47 "much memory";
48 break;
49 case CUSPARSE_STATUS_INVALID_VALUE: out << "CUSPARSE_STATUS_INVALID_VALUE)"; break;
50 case CUSPARSE_STATUS_ARCH_MISMATCH: out << "CUSPARSE_STATUS_ARCH_MISMATCH)"; break;
51 case CUSPARSE_STATUS_MAPPING_ERROR: out << "CUSPARSE_STATUS_MAPPING_ERROR)"; break;
52 case CUSPARSE_STATUS_EXECUTION_FAILED: out << "CUSPARSE_STATUS_EXECUTION_FAILED)"; break;
53 case CUSPARSE_STATUS_INTERNAL_ERROR: out << "CUSPARSE_STATUS_INTERNAL_ERROR)"; break;
54 case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: out << "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED)"; break;
55 case CUSPARSE_STATUS_ZERO_PIVOT: out << "CUSPARSE_STATUS_ZERO_PIVOT)"; break;
56 default: out << "unrecognized error code): this is bad!"; break;
57 }
58#endif // CUSPARSE_VERSION
59 if (file) {
60 out << " " << file << ":" << line;
61 }
62 throw std::runtime_error(out.str());
63}
64
65inline void cusparse_safe_call(cusparseStatus_t cusparseStatus, const char* name, const char* file = nullptr,
66 const int line = 0) {
67 if (CUSPARSE_STATUS_SUCCESS != cusparseStatus) {
68 cusparse_error_throw(cusparseStatus, name, file, line);
69 }
70}
71
72#define IFPACK2_DETAILS_CUSPARSE_SAFE_CALL(call) \
73 Ifpack2::Details::cusparse_safe_call(call, #call, __FILE__, __LINE__)
74
75#endif // defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
76
77struct TrisolverType {
78 enum Enum {
79 Internal,
80 HTS,
81 KSPTRSV
82 };
83
84 static void loadPLTypeOption(Teuchos::Array<std::string>& type_strs, Teuchos::Array<Enum>& type_enums) {
85 type_strs.resize(3);
86 type_strs[0] = "Internal";
87 type_strs[1] = "HTS";
88 type_strs[2] = "KSPTRSV";
89 type_enums.resize(3);
90 type_enums[0] = Internal;
91 type_enums[1] = HTS;
92 type_enums[2] = KSPTRSV;
93 }
94};
95} // namespace Details
96
97template <class MatrixType>
98class LocalSparseTriangularSolver<MatrixType>::HtsImpl {
99 public:
100 typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> crs_matrix_type;
101
102 void reset() {
103#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
104 Timpl_ = Teuchos::null;
105 levelset_block_size_ = 1;
106#endif
107 }
108
109 void setParameters(const Teuchos::ParameterList& pl) {
110 (void)pl;
111#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
112 const char* block_size_s = "trisolver: block size";
113 if (pl.isParameter(block_size_s)) {
114 TEUCHOS_TEST_FOR_EXCEPT_MSG(!pl.isType<int>(block_size_s),
115 "The parameter \"" << block_size_s << "\" must be of type int.");
116 levelset_block_size_ = pl.get<int>(block_size_s);
117 }
118 if (levelset_block_size_ < 1)
119 levelset_block_size_ = 1;
120#endif
121 }
122
123 // HTS has the phases symbolic+numeric, numeric, and apply. Hence the first
124 // call to compute() will trigger the symbolic+numeric phase, and subsequent
125 // calls (with the same Timpl_) will trigger the numeric phase. In the call to
126 // initialize(), essentially nothing happens.
127 void initialize(const crs_matrix_type& /* unused */) {
128#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
129 reset();
130 transpose_ = conjugate_ = false;
131#endif
132 }
133
134 void compute(const crs_matrix_type& T_in, const Teuchos::RCP<Teuchos::FancyOStream>& out) {
135 (void)T_in;
136 (void)out;
137#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
138 using Teuchos::ArrayRCP;
139
140 auto rowptr = T_in.getLocalRowPtrsHost();
141 auto colidx = T_in.getLocalIndicesHost();
142 auto val = T_in.getLocalValuesHost(Tpetra::Access::ReadOnly);
143 Kokkos::fence();
144
145 Teuchos::RCP<HtsCrsMatrix> T_hts = Teuchos::rcpWithDealloc(
146 HTST::make_CrsMatrix(rowptr.size() - 1,
147 rowptr.data(), colidx.data(),
148 // For std/Kokkos::complex.
149 reinterpret_cast<const scalar_type*>(val.data()),
150 transpose_, conjugate_),
151 HtsCrsMatrixDeleter());
152
153 if (Teuchos::nonnull(Timpl_)) {
154 // Reuse the nonzero pattern.
155 HTST::reprocess_numeric(Timpl_.get(), T_hts.get());
156 } else {
157 // Build from scratch.
158 if (T_in.getCrsGraph().is_null()) {
159 if (Teuchos::nonnull(out))
160 *out << "HTS compute failed because T_in.getCrsGraph().is_null().\n";
161 return;
162 }
163 if (!T_in.getCrsGraph()->isSorted()) {
164 if (Teuchos::nonnull(out))
165 *out << "HTS compute failed because ! T_in.getCrsGraph().isSorted().\n";
166 return;
167 }
168 if (!T_in.isStorageOptimized()) {
169 if (Teuchos::nonnull(out))
170 *out << "HTS compute failed because ! T_in.isStorageOptimized().\n";
171 return;
172 }
173
174 typename HTST::PreprocessArgs args;
175 args.T = T_hts.get();
176 args.max_nrhs = 1;
177#ifdef _OPENMP
178 args.nthreads = omp_get_max_threads();
179#else
180 args.nthreads = 1;
181#endif
182 args.save_for_reprocess = true;
183 typename HTST::Options opts;
184 opts.levelset_block_size = levelset_block_size_;
185 args.options = &opts;
186
187 try {
188 Timpl_ = Teuchos::rcpWithDealloc(HTST::preprocess(args), TImplDeleter());
189 } catch (const std::exception& e) {
190 if (Teuchos::nonnull(out))
191 *out << "HTS preprocess threw: " << e.what() << "\n";
192 }
193 }
194#endif
195 }
196
197 // HTS may not be able to handle a matrix, so query whether compute()
198 // succeeded.
199 bool isComputed() {
200#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
201 return Teuchos::nonnull(Timpl_);
202#else
203 return false;
204#endif
205 }
206
207 // Y := beta * Y + alpha * (M * X)
208 void localApply(const MV& X, MV& Y,
209 const Teuchos::ETransp /* mode */,
210 const scalar_type& alpha, const scalar_type& beta) const {
211 (void)X;
212 (void)Y;
213 (void)alpha;
214 (void)beta;
215#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
216 const auto& X_view = X.getLocalViewHost(Tpetra::Access::ReadOnly);
217 const auto& Y_view = Y.getLocalViewHost(Tpetra::Access::ReadWrite);
218
219 // Only does something if #rhs > current capacity.
220 HTST::reset_max_nrhs(Timpl_.get(), X_view.extent(1));
221 // Switch alpha and beta because of HTS's opposite convention.
222 HTST::solve_omp(Timpl_.get(),
223 // For std/Kokkos::complex.
224 reinterpret_cast<const scalar_type*>(X_view.data()),
225 X_view.extent(1),
226 // For std/Kokkos::complex.
227 reinterpret_cast<scalar_type*>(Y_view.data()),
228 beta, alpha);
229#endif
230 }
231
232 private:
233#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
234 typedef ::Experimental::HTS<local_ordinal_type, size_t, scalar_type> HTST;
235 typedef typename HTST::Impl TImpl;
236 typedef typename HTST::CrsMatrix HtsCrsMatrix;
237
238 struct TImplDeleter {
239 void free(TImpl* impl) {
240 HTST::delete_Impl(impl);
241 }
242 };
243
244 struct HtsCrsMatrixDeleter {
245 void free(HtsCrsMatrix* T) {
246 HTST::delete_CrsMatrix(T);
247 }
248 };
249
250 Teuchos::RCP<TImpl> Timpl_;
251 bool transpose_, conjugate_;
252 int levelset_block_size_;
253#endif
254};
255
256template <class MatrixType>
258 LocalSparseTriangularSolver(const Teuchos::RCP<const row_matrix_type>& A)
259 : A_(A) {
260 initializeState();
261
262 if (!A.is_null()) {
263 Teuchos::RCP<const crs_matrix_type> A_crs =
264 Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
265 TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument,
266 "Ifpack2::LocalSparseTriangularSolver constructor: "
267 "The input matrix A is not a Tpetra::CrsMatrix.");
268 A_crs_ = A_crs;
269 }
270}
271
272template <class MatrixType>
274 LocalSparseTriangularSolver(const Teuchos::RCP<const row_matrix_type>& A,
275 const Teuchos::RCP<Teuchos::FancyOStream>& out)
276 : A_(A)
277 , out_(out) {
278 initializeState();
279 if (!out_.is_null()) {
280 *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
281 << std::endl;
282 }
283
284 if (!A.is_null()) {
285 Teuchos::RCP<const crs_matrix_type> A_crs =
286 Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
287 TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument,
288 "Ifpack2::LocalSparseTriangularSolver constructor: "
289 "The input matrix A is not a Tpetra::CrsMatrix.");
290 A_crs_ = A_crs;
291 }
292}
293
294template <class MatrixType>
299
300template <class MatrixType>
302 LocalSparseTriangularSolver(const bool /* unused */, const Teuchos::RCP<Teuchos::FancyOStream>& out)
303 : out_(out) {
304 initializeState();
305 if (!out_.is_null()) {
306 *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
307 << std::endl;
308 }
309}
310
311template <class MatrixType>
313 isInitialized_ = false;
314 isComputed_ = false;
315 reverseStorage_ = false;
316 isInternallyChanged_ = false;
317 numInitialize_ = 0;
318 numCompute_ = 0;
319 numApply_ = 0;
320 initializeTime_ = 0.0;
321 computeTime_ = 0.0;
322 applyTime_ = 0.0;
323 isKokkosKernelsSptrsv_ = false;
324 isKokkosKernelsStream_ = false;
325 num_streams_ = 0;
326 uplo_ = "N";
327 diag_ = "N";
328}
329
330template <class MatrixType>
333 if (!isKokkosKernelsStream_) {
334 if (Teuchos::nonnull(kh_)) {
335 kh_->destroy_sptrsv_handle();
336 }
337 } else {
338 for (size_t i = 0; i < kh_v_.size(); i++) {
339 if (Teuchos::nonnull(kh_v_[i])) {
340 kh_v_[i]->destroy_sptrsv_handle();
341 }
342 }
343 }
344}
345
346template <class MatrixType>
348 setParameters(const Teuchos::ParameterList& pl) {
349 using Teuchos::Array;
350 using Teuchos::ParameterList;
351 using Teuchos::RCP;
352
353 Details::TrisolverType::Enum trisolverType = Details::TrisolverType::Internal;
354 do {
355 static const char typeName[] = "trisolver: type";
356
357 if (!pl.isType<std::string>(typeName)) break;
358
359 // Map std::string <-> TrisolverType::Enum.
360 Array<std::string> trisolverTypeStrs;
361 Array<Details::TrisolverType::Enum> trisolverTypeEnums;
362 Details::TrisolverType::loadPLTypeOption(trisolverTypeStrs, trisolverTypeEnums);
363 Teuchos::StringToIntegralParameterEntryValidator<Details::TrisolverType::Enum>
364 s2i(trisolverTypeStrs(), trisolverTypeEnums(), typeName, false);
365
366 trisolverType = s2i.getIntegralValue(pl.get<std::string>(typeName));
367 } while (0);
368
369 if (trisolverType == Details::TrisolverType::HTS) {
370 htsImpl_ = Teuchos::rcp(new HtsImpl());
371 htsImpl_->setParameters(pl);
372 }
373
374 if (trisolverType == Details::TrisolverType::KSPTRSV) {
375 this->isKokkosKernelsSptrsv_ = true;
376 } else {
377 this->isKokkosKernelsSptrsv_ = false;
378 }
379
380 if (pl.isParameter("trisolver: reverse U"))
381 reverseStorage_ = pl.get<bool>("trisolver: reverse U");
382
383 TEUCHOS_TEST_FOR_EXCEPTION(reverseStorage_ && (trisolverType == Details::TrisolverType::HTS || trisolverType == Details::TrisolverType::KSPTRSV),
384 std::logic_error,
385 "Ifpack2::LocalSparseTriangularSolver::setParameters: "
386 "You are not allowed to enable both HTS or KSPTRSV and the \"trisolver: reverse U\" "
387 "options. See GitHub issue #2647.");
388}
389
390template <class MatrixType>
392 initialize() {
393 using Tpetra::Details::determineLocalTriangularStructure;
394
395 using local_matrix_type = typename crs_matrix_type::local_matrix_device_type;
396 using LO = local_ordinal_type;
397
398 const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::initialize: ";
399 if (!out_.is_null()) {
400 *out_ << ">>> DEBUG " << prefix << std::endl;
401 }
402
403 if (!isKokkosKernelsStream_) {
404 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix << "You must call "
405 "setMatrix() with a nonnull input matrix before you may call "
406 "initialize() or compute().");
407 if (A_crs_.is_null()) {
408 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_);
409 TEUCHOS_TEST_FOR_EXCEPTION(A_crs.get() == nullptr, std::invalid_argument,
410 prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
411 A_crs_ = A_crs;
412 }
413 auto G = A_crs_->getGraph();
414 TEUCHOS_TEST_FOR_EXCEPTION(G.is_null(), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
415 "but A_crs_'s RowGraph G is null. "
416 "Please report this bug to the Ifpack2 developers.");
417 // At this point, the graph MUST be fillComplete. The "initialize"
418 // (symbolic) part of setup only depends on the graph structure, so
419 // the matrix itself need not be fill complete.
420 TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::runtime_error,
421 "If you call this method, "
422 "the matrix's graph must be fill complete. It is not.");
423
424 // mfh 30 Apr 2018: See GitHub Issue #2658.
425 constexpr bool ignoreMapsForTriStructure = true;
426 auto lclTriStructure = [&] {
427 auto lclMatrix = A_crs_->getLocalMatrixDevice();
428 auto lclRowMap = A_crs_->getRowMap()->getLocalMap();
429 auto lclColMap = A_crs_->getColMap()->getLocalMap();
430 auto lclTriStruct =
431 determineLocalTriangularStructure(lclMatrix.graph,
432 lclRowMap,
433 lclColMap,
434 ignoreMapsForTriStructure);
435 const LO lclNumRows = lclRowMap.getLocalNumElements();
436 this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
437 this->uplo_ = lclTriStruct.couldBeLowerTriangular ? "L" : (lclTriStruct.couldBeUpperTriangular ? "U" : "N");
438 return lclTriStruct;
439 }();
440
441 if (reverseStorage_ && lclTriStructure.couldBeUpperTriangular &&
442 htsImpl_.is_null()) {
443 // Reverse the storage for an upper triangular matrix
444 auto Alocal = A_crs_->getLocalMatrixDevice();
445 auto ptr = Alocal.graph.row_map;
446 auto ind = Alocal.graph.entries;
447 auto val = Alocal.values;
448
449 auto numRows = Alocal.numRows();
450 auto numCols = Alocal.numCols();
451 auto numNnz = Alocal.nnz();
452
453 typename decltype(ptr)::non_const_type newptr("ptr", ptr.extent(0));
454 typename decltype(ind)::non_const_type newind("ind", ind.extent(0));
455 decltype(val) newval("val", val.extent(0));
456
457 // FIXME: The code below assumes UVM
458 typename crs_matrix_type::execution_space().fence();
459 newptr(0) = 0;
460 for (local_ordinal_type row = 0, rowStart = 0; row < numRows; ++row) {
461 auto A_r = Alocal.row(numRows - 1 - row);
462
463 auto numEnt = A_r.length;
464 for (local_ordinal_type k = 0; k < numEnt; ++k) {
465 newind(rowStart + k) = numCols - 1 - A_r.colidx(numEnt - 1 - k);
466 newval(rowStart + k) = A_r.value(numEnt - 1 - k);
467 }
468 rowStart += numEnt;
469 newptr(row + 1) = rowStart;
470 }
471 typename crs_matrix_type::execution_space().fence();
472
473 // Reverse maps
474 Teuchos::RCP<map_type> newRowMap, newColMap;
475 {
476 // Reverse row map
477 auto rowMap = A_->getRowMap();
478 auto numElems = rowMap->getLocalNumElements();
479 auto rowElems = rowMap->getLocalElementList();
480
481 Teuchos::Array<global_ordinal_type> newRowElems(rowElems.size());
482 for (size_t i = 0; i < numElems; i++)
483 newRowElems[i] = rowElems[numElems - 1 - i];
484
485 newRowMap = Teuchos::rcp(new map_type(rowMap->getGlobalNumElements(), newRowElems, rowMap->getIndexBase(), rowMap->getComm()));
486 }
487 {
488 // Reverse column map
489 auto colMap = A_->getColMap();
490 auto numElems = colMap->getLocalNumElements();
491 auto colElems = colMap->getLocalElementList();
492
493 Teuchos::Array<global_ordinal_type> newColElems(colElems.size());
494 for (size_t i = 0; i < numElems; i++)
495 newColElems[i] = colElems[numElems - 1 - i];
496
497 newColMap = Teuchos::rcp(new map_type(colMap->getGlobalNumElements(), newColElems, colMap->getIndexBase(), colMap->getComm()));
498 }
499
500 // Construct new matrix
501 local_matrix_type newLocalMatrix("Upermuted", numRows, numCols, numNnz, newval, newptr, newind);
502
503 A_crs_ = Teuchos::rcp(new crs_matrix_type(newLocalMatrix, newRowMap, newColMap, A_crs_->getDomainMap(), A_crs_->getRangeMap()));
504
505 isInternallyChanged_ = true;
506
507 // FIXME (mfh 18 Apr 2019) Recomputing this is unnecessary, but I
508 // didn't want to break any invariants, especially considering
509 // that this branch is likely poorly tested.
510 auto newLclTriStructure =
511 determineLocalTriangularStructure(newLocalMatrix.graph,
512 newRowMap->getLocalMap(),
513 newColMap->getLocalMap(),
514 ignoreMapsForTriStructure);
515 const LO newLclNumRows = newRowMap->getLocalNumElements();
516 this->diag_ = (newLclTriStructure.diagCount < newLclNumRows) ? "U" : "N";
517 this->uplo_ = newLclTriStructure.couldBeLowerTriangular ? "L" : (newLclTriStructure.couldBeUpperTriangular ? "U" : "N");
518 }
519 } else {
520 bool prev_ambiguous = false;
521 bool all_ambiguous = true;
522 for (int i = 0; i < num_streams_; i++) {
523 TEUCHOS_TEST_FOR_EXCEPTION(A_crs_v_[i].is_null(), std::runtime_error, prefix << "You must call "
524 "setMatrix() with a nonnull input matrix before you may call "
525 "initialize() or compute().");
526 auto G = A_crs_v_[i]->getGraph();
527 TEUCHOS_TEST_FOR_EXCEPTION(G.is_null(), std::logic_error, prefix << "A_crs_ are nonnull, "
528 "but A_crs_'s RowGraph G is null. "
529 "Please report this bug to the Ifpack2 developers.");
530 // At this point, the graph MUST be fillComplete. The "initialize"
531 // (symbolic) part of setup only depends on the graph structure, so
532 // the matrix itself need not be fill complete.
533 TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::runtime_error,
534 "If you call this method, "
535 "the matrix's graph must be fill complete. It is not.");
536
537 // mfh 30 Apr 2018: See GitHub Issue #2658.
538 constexpr bool ignoreMapsForTriStructure = true;
539 std::string prev_uplo = this->uplo_;
540 std::string prev_diag = this->diag_;
541 auto lclMatrix = A_crs_v_[i]->getLocalMatrixDevice();
542 auto lclRowMap = A_crs_v_[i]->getRowMap()->getLocalMap();
543 auto lclColMap = A_crs_v_[i]->getColMap()->getLocalMap();
544 auto lclTriStruct =
545 determineLocalTriangularStructure(lclMatrix.graph,
546 lclRowMap,
547 lclColMap,
548 ignoreMapsForTriStructure);
549 const LO lclNumRows = lclRowMap.getLocalNumElements();
550 this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
551 const bool could_be_lower = lclTriStruct.couldBeLowerTriangular;
552 const bool could_be_upper = lclTriStruct.couldBeUpperTriangular;
553 if (could_be_lower && could_be_upper) {
554 // Ambiguous, but that's OK if at least one stream is unabiguous
555 this->uplo_ = prev_uplo;
556 prev_ambiguous = true;
557 } else {
558 this->uplo_ = could_be_lower ? "L" : (could_be_upper ? "U" : "N");
559 if (this->uplo_ != "N" && prev_uplo == "N" && prev_ambiguous) {
560 prev_uplo = this->uplo_;
561 }
562 prev_ambiguous = false;
563 }
564 all_ambiguous &= prev_ambiguous;
565 if (i > 0) {
566 TEUCHOS_TEST_FOR_EXCEPTION((this->diag_ != prev_diag) || (this->uplo_ != prev_uplo),
567 std::logic_error, prefix << "A_crs_'s structures in streams "
568 "are different. Please report this bug to the Ifpack2 developers.");
569 }
570 }
571 // If all streams were ambiguous, just call it "L"
572 if (all_ambiguous) {
573 this->uplo_ = "L";
574 }
575 }
576
577 if (Teuchos::nonnull(htsImpl_)) {
578 htsImpl_->initialize(*A_crs_);
579 isInternallyChanged_ = true;
580 }
581
582 const bool ksptrsv_valid_uplo = (this->uplo_ != "N");
583 kh_v_nonnull_ = false;
584 if (this->isKokkosKernelsSptrsv_ && ksptrsv_valid_uplo && this->diag_ != "U") {
585 if (!isKokkosKernelsStream_) {
586 kh_ = Teuchos::rcp(new k_handle());
587 const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
588
589 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_, true);
590 auto Alocal = A_crs->getLocalMatrixDevice();
591 auto ptr = Alocal.graph.row_map;
592 auto ind = Alocal.graph.entries;
593 auto val = Alocal.values;
594
595 auto numRows = Alocal.numRows();
596 kh_->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows, is_lower_tri);
597 KokkosSparse::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
598 } else {
599 kh_v_ = std::vector<Teuchos::RCP<k_handle>>(num_streams_);
600 for (int i = 0; i < num_streams_; i++) {
601 kh_v_[i] = Teuchos::rcp(new k_handle());
602 auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_v_[i], true);
603 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
604 auto ptr_i = Alocal_i.graph.row_map;
605 auto ind_i = Alocal_i.graph.entries;
606 auto val_i = Alocal_i.values;
607
608 auto numRows_i = Alocal_i.numRows();
609
610 const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
611 kh_v_[i]->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows_i, is_lower_tri);
612 KokkosSparse::sptrsv_symbolic(kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
613 }
614 kh_v_nonnull_ = true;
615 }
616 }
617
618 isInitialized_ = true;
619 ++numInitialize_;
620}
621
622template <class MatrixType>
623KokkosSparse::Experimental::SPTRSVAlgorithm
625#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
626 // CuSparse only supports int type ordinals
627 // and scalar types of float, double, float complex and double complex
628 if constexpr (std::is_same<Kokkos::Cuda, HandleExecSpace>::value &&
629 std::is_same<int, local_ordinal_type>::value &&
630 (std::is_same<scalar_type, float>::value ||
631 std::is_same<scalar_type, double>::value ||
632 std::is_same<impl_scalar_type, Kokkos::complex<float>>::value ||
633 std::is_same<impl_scalar_type, Kokkos::complex<double>>::value)) {
634 return KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE;
635 }
636#endif
637 return KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
638}
639
640template <class MatrixType>
642 compute() {
643 const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::compute: ";
644 if (!out_.is_null()) {
645 *out_ << ">>> DEBUG " << prefix << std::endl;
646 }
647
648 if (!isKokkosKernelsStream_) {
649 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix << "You must call "
650 "setMatrix() with a nonnull input matrix before you may call "
651 "initialize() or compute().");
652 TEUCHOS_TEST_FOR_EXCEPTION(A_crs_.is_null(), std::logic_error, prefix << "A_ is nonnull, but "
653 "A_crs_ is null. Please report this bug to the Ifpack2 developers.");
654 // At this point, the matrix MUST be fillComplete.
655 TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_->isFillComplete(), std::runtime_error,
656 "If you call this "
657 "method, the matrix must be fill complete. It is not.");
658 } else {
659 for (int i = 0; i < num_streams_; i++) {
660 TEUCHOS_TEST_FOR_EXCEPTION(A_crs_v_[i].is_null(), std::runtime_error, prefix << "You must call "
661 "setMatrices() with a nonnull input matrix before you may call "
662 "initialize() or compute().");
663 // At this point, the matrix MUST be fillComplete.
664 TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_v_[i]->isFillComplete(), std::runtime_error,
665 "If you call this "
666 "method, the matrix must be fill complete. It is not.");
667 }
668 }
669
670 if (!isInitialized_) {
671 initialize();
672 }
673 TEUCHOS_TEST_FOR_EXCEPTION(!isInitialized_, std::logic_error, prefix << "initialize() should have "
674 "been called by this point, but isInitialized_ is false. "
675 "Please report this bug to the Ifpack2 developers.");
676
677// NOTE (Nov-09-2022):
678// For Cuda >= 11.3 (using cusparseSpSV), always call symbolic during compute
679// even when matrix values are changed with the same sparsity pattern.
680// For Cuda >= 12.1 has a new cusparseSpSV_updateMatrix function just for updating the
681// values that is substantially faster.
682// This would all be much better handled via a KokkosSparse::sptrsv_numeric(...)
683// that could hide the Cuda implementation details.
684#if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && (CUDA_VERSION >= 11030)
685 if constexpr (std::is_same_v<typename crs_matrix_type::execution_space, Kokkos::Cuda>) {
686 if (this->isKokkosKernelsSptrsv_) {
687 if (Teuchos::nonnull(kh_) && !isKokkosKernelsStream_) {
688 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_, true);
689 auto Alocal = A_crs->getLocalMatrixDevice();
690 auto val = Alocal.values;
691 auto ptr = Alocal.graph.row_map;
692 auto ind = Alocal.graph.entries;
693 KokkosSparse::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
694 } else if (kh_v_nonnull_) {
695 for (int i = 0; i < num_streams_; i++) {
696 auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_v_[i], true);
697 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
698 auto val_i = Alocal_i.values;
699 auto ptr_i = Alocal_i.graph.row_map;
700 auto ind_i = Alocal_i.graph.entries;
701 KokkosSparse::sptrsv_symbolic(exec_space_instances_[i], kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
702 }
703 }
704 }
705 }
706#endif
707
708 if (!isComputed_) { // Only compute if not computed before
709 if (Teuchos::nonnull(htsImpl_))
710 htsImpl_->compute(*A_crs_, out_);
711
712 isComputed_ = true;
713 ++numCompute_;
714 }
715}
716
717template <class MatrixType>
719 apply(const Tpetra::MultiVector<scalar_type, local_ordinal_type,
721 Tpetra::MultiVector<scalar_type, local_ordinal_type,
723 Teuchos::ETransp mode,
724 scalar_type alpha,
725 scalar_type beta) const {
726 using Teuchos::RCP;
727 using Teuchos::rcp;
728 using Teuchos::rcpFromRef;
729 typedef scalar_type ST;
730 typedef Teuchos::ScalarTraits<ST> STS;
731 const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::apply: ";
732
733 if (!out_.is_null()) {
734 *out_ << ">>> DEBUG " << prefix;
735 if (!isKokkosKernelsStream_) {
736 if (A_crs_.is_null()) {
737 *out_ << "A_crs_ is null!" << std::endl;
738 } else {
739 Teuchos::RCP<const crs_matrix_type> A_crs =
740 Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_);
741 const std::string uplo = this->uplo_;
742 const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" : (mode == Teuchos::TRANS ? "T" : "N");
743 const std::string diag = this->diag_;
744 *out_ << "uplo=\"" << uplo
745 << "\", trans=\"" << trans
746 << "\", diag=\"" << diag << "\"" << std::endl;
747 }
748 } else {
749 for (int i = 0; i < num_streams_; i++) {
750 if (A_crs_v_[i].is_null()) {
751 *out_ << "A_crs_v_[" << i << "]"
752 << " is null!" << std::endl;
753 } else {
754 const std::string uplo = this->uplo_;
755 const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" : (mode == Teuchos::TRANS ? "T" : "N");
756 const std::string diag = this->diag_;
757 *out_ << "A_crs_v_[" << i << "]: "
758 << "uplo=\"" << uplo
759 << "\", trans=\"" << trans
760 << "\", diag=\"" << diag << "\"" << std::endl;
761 }
762 }
763 }
764 }
765
766 TEUCHOS_TEST_FOR_EXCEPTION(!isComputed(), std::runtime_error, prefix << "If compute() has not yet "
767 "been called, or if you have changed the matrix via setMatrix(), you must "
768 "call compute() before you may call this method.");
769 // If isComputed() is true, it's impossible for the matrix to be
770 // null, or for it not to be a Tpetra::CrsMatrix.
771 if (!isKokkosKernelsStream_) {
772 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::logic_error, prefix << "A_ is null. "
773 "Please report this bug to the Ifpack2 developers.");
774 TEUCHOS_TEST_FOR_EXCEPTION(A_crs_.is_null(), std::logic_error, prefix << "A_crs_ is null. "
775 "Please report this bug to the Ifpack2 developers.");
776 // However, it _is_ possible that the user called resumeFill() on
777 // the matrix, after calling compute(). This is NOT allowed.
778 TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_->isFillComplete(), std::runtime_error,
779 "If you call this "
780 "method, the matrix must be fill complete. It is not. This means that "
781 " you must have called resumeFill() on the matrix before calling apply(). "
782 "This is NOT allowed. Note that this class may use the matrix's data in "
783 "place without copying it. Thus, you cannot change the matrix and expect "
784 "the solver to stay the same. If you have changed the matrix, first call "
785 "fillComplete() on it, then call compute() on this object, before you call"
786 " apply(). You do NOT need to call setMatrix, as long as the matrix "
787 "itself (that is, its address in memory) is the same.");
788 } else {
789 for (int i = 0; i < num_streams_; i++) {
790 TEUCHOS_TEST_FOR_EXCEPTION(A_crs_v_[i].is_null(), std::logic_error, prefix << "A_crs_ is null. "
791 "Please report this bug to the Ifpack2 developers.");
792 // However, it _is_ possible that the user called resumeFill() on
793 // the matrix, after calling compute(). This is NOT allowed.
794 TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_v_[i]->isFillComplete(), std::runtime_error,
795 "If you call this "
796 "method, the matrix must be fill complete. It is not. This means that "
797 " you must have called resumeFill() on the matrix before calling apply(). "
798 "This is NOT allowed. Note that this class may use the matrix's data in "
799 "place without copying it. Thus, you cannot change the matrix and expect "
800 "the solver to stay the same. If you have changed the matrix, first call "
801 "fillComplete() on it, then call compute() on this object, before you call"
802 " apply(). You do NOT need to call setMatrix, as long as the matrix "
803 "itself (that is, its address in memory) is the same.");
804 }
805 }
806
807 RCP<const MV> X_cur;
808 RCP<MV> Y_cur;
809
810 if (!isKokkosKernelsStream_) {
811 auto G = A_crs_->getGraph();
812 TEUCHOS_TEST_FOR_EXCEPTION(G.is_null(), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
813 "but A_crs_'s RowGraph G is null. "
814 "Please report this bug to the Ifpack2 developers.");
815 auto importer = G->getImporter();
816 auto exporter = G->getExporter();
817
818 if (!importer.is_null()) {
819 if (X_colMap_.is_null() || X_colMap_->getNumVectors() != X.getNumVectors()) {
820 X_colMap_ = rcp(new MV(importer->getTargetMap(), X.getNumVectors()));
821 } else {
822 X_colMap_->putScalar(STS::zero());
823 }
824 // See discussion of Github Issue #672 for why the Import needs to
825 // use the ZERO CombineMode. The case where the Export is
826 // nontrivial is likely never exercised.
827 X_colMap_->doImport(X, *importer, Tpetra::ZERO);
828 }
829 X_cur = importer.is_null() ? rcpFromRef(X) : Teuchos::rcp_const_cast<const MV>(X_colMap_);
830
831 if (!exporter.is_null()) {
832 if (Y_rowMap_.is_null() || Y_rowMap_->getNumVectors() != Y.getNumVectors()) {
833 Y_rowMap_ = rcp(new MV(exporter->getSourceMap(), Y.getNumVectors()));
834 } else {
835 Y_rowMap_->putScalar(STS::zero());
836 }
837 Y_rowMap_->doExport(Y, *importer, Tpetra::ADD);
838 }
839 Y_cur = exporter.is_null() ? rcpFromRef(Y) : Y_rowMap_;
840 } else {
841 // Currently assume X and Y are local vectors (same sizes as A_crs_).
842 // Should do a better job here!!!
843 X_cur = rcpFromRef(X);
844 Y_cur = rcpFromRef(Y);
845 }
846
847 localApply(*X_cur, *Y_cur, mode, alpha, beta);
848
849 if (!isKokkosKernelsStream_) {
850 auto G = A_crs_->getGraph();
851 auto exporter = G->getExporter();
852 if (!exporter.is_null()) {
853 Y.putScalar(STS::zero());
854 Y.doExport(*Y_cur, *exporter, Tpetra::ADD);
855 }
856 }
857 ++numApply_;
858}
859
860template <class MatrixType>
862 localTriangularSolve(const MV& Y,
863 MV& X,
864 const Teuchos::ETransp mode) const {
865 using Teuchos::CONJ_TRANS;
866 using Teuchos::NO_TRANS;
867 using Teuchos::TRANS;
868 const char tfecfFuncName[] = "localTriangularSolve: ";
869
870 if (!isKokkosKernelsStream_) {
871 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!A_crs_->isFillComplete(), std::runtime_error,
872 "The matrix is not fill complete.");
873 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A_crs_->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
874 "The matrix is neither upper triangular or lower triangular. "
875 "You may only call this method if the matrix is triangular. "
876 "Remember that this is a local (per MPI process) property, and that "
877 "Tpetra only knows how to do a local (per process) triangular solve.");
878 } else {
879 for (int i = 0; i < num_streams_; i++) {
880 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!A_crs_v_[i]->isFillComplete(), std::runtime_error,
881 "The matrix is not fill complete.");
882 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A_crs_v_[i]->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
883 "The matrix is neither upper triangular or lower triangular. "
884 "You may only call this method if the matrix is triangular. "
885 "Remember that this is a local (per MPI process) property, and that "
886 "Tpetra only knows how to do a local (per process) triangular solve.");
887 }
888 }
889 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!X.isConstantStride() || !Y.isConstantStride(), std::invalid_argument,
890 "X and Y must be constant stride.");
891 using STS = Teuchos::ScalarTraits<scalar_type>;
892 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(STS::isComplex && mode == TRANS, std::logic_error,
893 "This method does "
894 "not currently support non-conjugated transposed solve (mode == "
895 "Teuchos::TRANS) for complex scalar types.");
896
897 const std::string uplo = this->uplo_;
898 const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" : (mode == Teuchos::TRANS ? "T" : "N");
899 const size_t numVecs = std::min(X.getNumVectors(), Y.getNumVectors());
900
901 if (Teuchos::nonnull(kh_) && this->isKokkosKernelsSptrsv_ && trans == "N") {
902 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(this->A_);
903 auto A_lclk = A_crs->getLocalMatrixDevice();
904 auto ptr = A_lclk.graph.row_map;
905 auto ind = A_lclk.graph.entries;
906 auto val = A_lclk.values;
907
908 for (size_t j = 0; j < numVecs; ++j) {
909 auto X_j = X.getVectorNonConst(j);
910 auto Y_j = Y.getVector(j);
911 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
912 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
913 auto X_lcl_1d = Kokkos::subview(X_lcl, Kokkos::ALL(), 0);
914 auto Y_lcl_1d = Kokkos::subview(Y_lcl, Kokkos::ALL(), 0);
915 KokkosSparse::sptrsv_solve(kh_.getRawPtr(), ptr, ind, val, Y_lcl_1d, X_lcl_1d);
916 // TODO is this fence needed...
917 typename k_handle::HandleExecSpace().fence();
918 }
919 } // End using regular interface of Kokkos Kernels Sptrsv
920 else if (kh_v_nonnull_ && this->isKokkosKernelsSptrsv_ && trans == "N") {
921 std::vector<lno_row_view_t> ptr_v(num_streams_);
922 std::vector<lno_nonzero_view_t> ind_v(num_streams_);
923 std::vector<scalar_nonzero_view_t> val_v(num_streams_);
924 std::vector<k_handle*> KernelHandle_rawptr_v_(num_streams_);
925 for (size_t j = 0; j < numVecs; ++j) {
926 auto X_j = X.getVectorNonConst(j);
927 auto Y_j = Y.getVector(j);
928 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
929 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
930 auto X_lcl_1d = Kokkos::subview(X_lcl, Kokkos::ALL(), 0);
931 auto Y_lcl_1d = Kokkos::subview(Y_lcl, Kokkos::ALL(), 0);
932 std::vector<decltype(X_lcl_1d)> x_v(num_streams_);
933 std::vector<decltype(Y_lcl_1d)> y_v(num_streams_);
934 local_ordinal_type stream_begin = 0;
935 local_ordinal_type stream_end;
936 for (int i = 0; i < num_streams_; i++) {
937 auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_v_[i]);
938 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
939 ptr_v[i] = Alocal_i.graph.row_map;
940 ind_v[i] = Alocal_i.graph.entries;
941 val_v[i] = Alocal_i.values;
942 stream_end = stream_begin + Alocal_i.numRows();
943 x_v[i] = Kokkos::subview(X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
944 y_v[i] = Kokkos::subview(Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
945 KernelHandle_rawptr_v_[i] = kh_v_[i].getRawPtr();
946 stream_begin = stream_end;
947 }
948 Kokkos::fence();
949 KokkosSparse::Experimental::sptrsv_solve_streams(exec_space_instances_, KernelHandle_rawptr_v_,
950 ptr_v, ind_v, val_v, y_v, x_v);
951 for (int i = 0; i < num_streams_; i++) exec_space_instances_[i].fence();
952 // Kokkos::fence();
953 }
954 } // End using stream interface of Kokkos Kernels Sptrsv
955 else {
956 const std::string diag = this->diag_;
957 // NOTE (mfh 20 Aug 2017): KokkosSparse::trsv currently is a
958 // sequential, host-only code. See
959 // https://github.com/kokkos/kokkos-kernels/issues/48.
960
961 auto A_lcl = this->A_crs_->getLocalMatrixHost();
962
963 if (X.isConstantStride() && Y.isConstantStride()) {
964 auto X_lcl = X.getLocalViewHost(Tpetra::Access::ReadWrite);
965 auto Y_lcl = Y.getLocalViewHost(Tpetra::Access::ReadOnly);
966 KokkosSparse::trsv(uplo.c_str(), trans.c_str(), diag.c_str(),
967 A_lcl, Y_lcl, X_lcl);
968 } else {
969 for (size_t j = 0; j < numVecs; ++j) {
970 auto X_j = X.getVectorNonConst(j);
971 auto Y_j = Y.getVector(j);
972 auto X_lcl = X_j->getLocalViewHost(Tpetra::Access::ReadWrite);
973 auto Y_lcl = Y_j->getLocalViewHost(Tpetra::Access::ReadOnly);
974 KokkosSparse::trsv(uplo.c_str(), trans.c_str(),
975 diag.c_str(), A_lcl, Y_lcl, X_lcl);
976 }
977 }
978 }
979}
980
981template <class MatrixType>
982void LocalSparseTriangularSolver<MatrixType>::
983 localApply(const MV& X,
984 MV& Y,
985 const Teuchos::ETransp mode,
986 const scalar_type& alpha,
987 const scalar_type& beta) const {
988 if (mode == Teuchos::NO_TRANS && Teuchos::nonnull(htsImpl_) &&
989 htsImpl_->isComputed()) {
990 htsImpl_->localApply(X, Y, mode, alpha, beta);
991 return;
992 }
993
994 using Teuchos::RCP;
995 typedef scalar_type ST;
996 typedef Teuchos::ScalarTraits<ST> STS;
997
998 if (beta == STS::zero()) {
999 if (alpha == STS::zero()) {
1000 Y.putScalar(STS::zero()); // Y := 0 * Y (ignore contents of Y)
1001 } else { // alpha != 0
1002 this->localTriangularSolve(X, Y, mode);
1003 if (alpha != STS::one()) {
1004 Y.scale(alpha);
1005 }
1006 }
1007 } else { // beta != 0
1008 if (alpha == STS::zero()) {
1009 Y.scale(beta); // Y := beta * Y
1010 } else { // alpha != 0
1011 MV Y_tmp(Y, Teuchos::Copy);
1012 this->localTriangularSolve(X, Y_tmp, mode); // Y_tmp := M * X
1013 Y.update(alpha, Y_tmp, beta); // Y := beta * Y + alpha * Y_tmp
1014 }
1015 }
1016}
1017
1018template <class MatrixType>
1020 getNumInitialize() const {
1021 return numInitialize_;
1022}
1023
1024template <class MatrixType>
1026 getNumCompute() const {
1027 return numCompute_;
1028}
1029
1030template <class MatrixType>
1032 getNumApply() const {
1033 return numApply_;
1034}
1035
1036template <class MatrixType>
1037double
1039 getInitializeTime() const {
1040 return initializeTime_;
1041}
1042
1043template <class MatrixType>
1044double
1046 getComputeTime() const {
1047 return computeTime_;
1048}
1049
1050template <class MatrixType>
1051double
1053 getApplyTime() const {
1054 return applyTime_;
1055}
1056
1057template <class MatrixType>
1058std::string
1060 description() const {
1061 std::ostringstream os;
1062
1063 // Output is a valid YAML dictionary in flow style. If you don't
1064 // like everything on a single line, you should call describe()
1065 // instead.
1066 os << "\"Ifpack2::LocalSparseTriangularSolver\": {";
1067 if (this->getObjectLabel() != "") {
1068 os << "Label: \"" << this->getObjectLabel() << "\", ";
1069 }
1070 os << "Initialized: " << (isInitialized() ? "true" : "false") << ", "
1071 << "Computed: " << (isComputed() ? "true" : "false") << ", ";
1072
1073 if (isKokkosKernelsSptrsv_) os << "KK-SPTRSV, ";
1074 if (isKokkosKernelsStream_) os << "KK-SolveStream, ";
1075
1076 if (A_.is_null()) {
1077 os << "Matrix: null";
1078 } else {
1079 os << "Matrix dimensions: ["
1080 << A_->getGlobalNumRows() << ", "
1081 << A_->getGlobalNumCols() << "]"
1082 << ", Number of nonzeros: " << A_->getGlobalNumEntries();
1083 }
1084
1085 if (Teuchos::nonnull(htsImpl_))
1086 os << ", HTS computed: " << (htsImpl_->isComputed() ? "true" : "false");
1087 os << "}";
1088 return os.str();
1089}
1090
1091template <class MatrixType>
1093 describe(Teuchos::FancyOStream& out,
1094 const Teuchos::EVerbosityLevel verbLevel) const {
1095 using std::endl;
1096 // Default verbosity level is VERB_LOW, which prints only on Process
1097 // 0 of the matrix's communicator.
1098 const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
1099
1100 if (vl != Teuchos::VERB_NONE) {
1101 // Print only on Process 0 in the matrix's communicator. If the
1102 // matrix is null, though, we have to get the communicator from
1103 // somewhere, so we ask Tpetra for its default communicator. If
1104 // MPI is enabled, this wraps MPI_COMM_WORLD or a clone thereof.
1105 auto comm = A_.is_null() ? Tpetra::getDefaultComm() : A_->getComm();
1106
1107 // Users aren't supposed to do anything with the matrix on
1108 // processes where its communicator is null.
1109 if (!comm.is_null() && comm->getRank() == 0) {
1110 // By convention, describe() should always begin with a tab.
1111 Teuchos::OSTab tab0(out);
1112 // Output is in YAML format. We have to escape the class name,
1113 // because it has a colon.
1114 out << "\"Ifpack2::LocalSparseTriangularSolver\":" << endl;
1115 Teuchos::OSTab tab1(out);
1116 out << "Scalar: " << Teuchos::TypeNameTraits<scalar_type>::name() << endl
1117 << "LocalOrdinal: " << Teuchos::TypeNameTraits<local_ordinal_type>::name() << endl
1118 << "GlobalOrdinal: " << Teuchos::TypeNameTraits<global_ordinal_type>::name() << endl
1119 << "Node: " << Teuchos::TypeNameTraits<node_type>::name() << endl;
1120 }
1121 }
1122}
1123
1124template <class MatrixType>
1125Teuchos::RCP<const typename LocalSparseTriangularSolver<MatrixType>::map_type>
1127 getDomainMap() const {
1128 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error,
1129 "Ifpack2::LocalSparseTriangularSolver::getDomainMap: "
1130 "The matrix is null. Please call setMatrix() with a nonnull input "
1131 "before calling this method.");
1132 return A_->getDomainMap();
1133}
1134
1135template <class MatrixType>
1136Teuchos::RCP<const typename LocalSparseTriangularSolver<MatrixType>::map_type>
1138 getRangeMap() const {
1139 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error,
1140 "Ifpack2::LocalSparseTriangularSolver::getRangeMap: "
1141 "The matrix is null. Please call setMatrix() with a nonnull input "
1142 "before calling this method.");
1143 return A_->getRangeMap();
1144}
1145
1146template <class MatrixType>
1148 setMatrix(const Teuchos::RCP<const row_matrix_type>& A) {
1149 const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrix: ";
1150
1151 // If the pointer didn't change, do nothing. This is reasonable
1152 // because users are supposed to call this method with the same
1153 // object over all participating processes, and pointer identity
1154 // implies object identity.
1155 if (A.getRawPtr() != A_.getRawPtr() || isInternallyChanged_) {
1156 // Check in serial or one-process mode if the matrix is square.
1157 TEUCHOS_TEST_FOR_EXCEPTION(!A.is_null() && A->getComm()->getSize() == 1 &&
1158 A->getLocalNumRows() != A->getLocalNumCols(),
1159 std::runtime_error, prefix << "If A's communicator only contains one "
1160 "process, then A must be square. Instead, you provided a matrix A with "
1161 << A->getLocalNumRows() << " rows and " << A->getLocalNumCols() << " columns.");
1162
1163 // It's legal for A to be null; in that case, you may not call
1164 // initialize() until calling setMatrix() with a nonnull input.
1165 // Regardless, setting the matrix invalidates the preconditioner.
1166 isInitialized_ = false;
1167 isComputed_ = false;
1168
1169 if (A.is_null()) {
1170 A_crs_ = Teuchos::null;
1171 A_ = Teuchos::null;
1172 } else { // A is not null
1173 Teuchos::RCP<const crs_matrix_type> A_crs =
1174 Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
1175 TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument, prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
1176 A_crs_ = A_crs;
1177 A_ = A;
1178 }
1179
1180 if (Teuchos::nonnull(htsImpl_))
1181 htsImpl_->reset();
1182 } // pointers are not the same
1183}
1184
1185template <class MatrixType>
1187 setStreamInfo(const bool& isKokkosKernelsStream, const int& num_streams,
1188 const std::vector<HandleExecSpace>& exec_space_instances) {
1189 isKokkosKernelsStream_ = isKokkosKernelsStream;
1190 num_streams_ = num_streams;
1191 exec_space_instances_ = exec_space_instances;
1192 A_crs_v_ = std::vector<Teuchos::RCP<crs_matrix_type>>(num_streams_);
1193}
1194
1195template <class MatrixType>
1197 setMatrices(const std::vector<Teuchos::RCP<crs_matrix_type>>& A_crs_v) {
1198 const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrixWithStreams: ";
1199
1200 for (int i = 0; i < num_streams_; i++) {
1201 // If the pointer didn't change, do nothing. This is reasonable
1202 // because users are supposed to call this method with the same
1203 // object over all participating processes, and pointer identity
1204 // implies object identity.
1205 if (A_crs_v[i].getRawPtr() != A_crs_v_[i].getRawPtr() || isInternallyChanged_) {
1206 // Check in serial or one-process mode if the matrix is square.
1207 TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_v[i].is_null() && A_crs_v[i]->getComm()->getSize() == 1 &&
1208 A_crs_v[i]->getLocalNumRows() != A_crs_v[i]->getLocalNumCols(),
1209 std::runtime_error, prefix << "If A's communicator only contains one "
1210 "process, then A must be square. Instead, you provided a matrix A with "
1211 << A_crs_v[i]->getLocalNumRows() << " rows and " << A_crs_v[i]->getLocalNumCols() << " columns.");
1212
1213 // It's legal for A to be null; in that case, you may not call
1214 // initialize() until calling setMatrix() with a nonnull input.
1215 // Regardless, setting the matrix invalidates the preconditioner.
1216 isInitialized_ = false;
1217 isComputed_ = false;
1218
1219 if (A_crs_v[i].is_null()) {
1220 A_crs_v_[i] = Teuchos::null;
1221 } else { // A is not null
1222 Teuchos::RCP<crs_matrix_type> A_crs =
1223 Teuchos::rcp_dynamic_cast<crs_matrix_type>(A_crs_v[i]);
1224 TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument, prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
1225 A_crs_v_[i] = A_crs;
1226 }
1227 } // pointers are not the same
1228 }
1229}
1230
1231} // namespace Ifpack2
1232
1233#define IFPACK2_LOCALSPARSETRIANGULARSOLVER_INSTANT(S, LO, GO, N) \
1234 template class Ifpack2::LocalSparseTriangularSolver<Tpetra::RowMatrix<S, LO, GO, N>>;
1235
1236#endif // IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
"Preconditioner" that solves local sparse triangular systems.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:53
bool isComputed() const
Return true if compute() has been called.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:191
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with given verbosity to the given output stream.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1093
std::string description() const
A one-line description of this object.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1060
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Specialization of Tpetra::Map used by this class.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:69
LocalSparseTriangularSolver()
Constructor that takes no input matrix.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:296
MatrixType::local_ordinal_type local_ordinal_type
Type of the local indices of the input matrix.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:60
void setParameters(const Teuchos::ParameterList &params)
Set this object's parameters.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:348
double getComputeTime() const
Return the time spent in compute().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1046
void initialize()
"Symbolic" phase of setup
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:392
MatrixType::global_ordinal_type global_ordinal_type
Type of the global indices of the input matrix.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:62
int getNumCompute() const
Return the number of calls to compute().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1026
double getInitializeTime() const
Return the time spent in initialize().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1039
Teuchos::RCP< const map_type > getDomainMap() const
The domain of this operator.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1127
MatrixType::scalar_type scalar_type
Scalar type of the entries of the input matrix.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:56
MatrixType::node_type node_type
Node type of the input matrix.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:64
int getNumInitialize() const
Return the number of calls to initialize().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1020
void setStreamInfo(const bool &isKokkosKernelsStream, const int &num_streams, const std::vector< HandleExecSpace > &exec_space_instances)
Set this triangular solver's stream information.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1187
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Specialization of Tpetra::CrsMatrix used by this class.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:77
Teuchos::RCP< const map_type > getRangeMap() const
The range of this operator.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1138
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Set this preconditioner's matrix.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1148
void setMatrices(const std::vector< Teuchos::RCP< crs_matrix_type > > &A_crs_v)
Set this preconditioner's matrices (used by stream interface of triangular solve).
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1197
virtual ~LocalSparseTriangularSolver()
Destructor (virtual for memory safety).
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:332
double getApplyTime() const
Return the time spent in apply().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1053
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, and put the result in Y.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:719
int getNumApply() const
Return the number of calls to apply().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1032
void compute()
"Numeric" phase of setup
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:642
TRANS
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40