Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_LocalSparseTriangularSolver_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
11#define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
12
13#include <sstream> // ostringstream
14#include <stdexcept> // runtime_error
15
16#include "Ifpack2_LocalSparseTriangularSolver_decl.hpp"
17#include "Tpetra_CrsMatrix.hpp"
18#include "Tpetra_Core.hpp"
19#include "Teuchos_StandardParameterEntryValidators.hpp"
20#include "Tpetra_Details_determineLocalTriangularStructure.hpp"
21#include "KokkosSparse_sptrsv.hpp"
22#include "KokkosSparse_trsv.hpp"
23
24#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
25#include "shylu_hts.hpp"
26#endif
27
28namespace Ifpack2 {
29
30namespace Details {
31
32#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
33
34inline void cusparse_error_throw(cusparseStatus_t cusparseStatus, const char* name,
35 const char* file, const int line) {
36 std::ostringstream out;
37#if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION)
38 out << name << " error( " << cusparseGetErrorName(cusparseStatus) << "): " << cusparseGetErrorString(cusparseStatus);
39#else
40 out << name << " error( ";
41 switch (cusparseStatus) {
42 case CUSPARSE_STATUS_NOT_INITIALIZED:
43 out << "CUSPARSE_STATUS_NOT_INITIALIZED): cusparse handle was not "
44 "created correctly.";
45 break;
46 case CUSPARSE_STATUS_ALLOC_FAILED:
47 out << "CUSPARSE_STATUS_ALLOC_FAILED): you might tried to allocate too "
48 "much memory";
49 break;
50 case CUSPARSE_STATUS_INVALID_VALUE: out << "CUSPARSE_STATUS_INVALID_VALUE)"; break;
51 case CUSPARSE_STATUS_ARCH_MISMATCH: out << "CUSPARSE_STATUS_ARCH_MISMATCH)"; break;
52 case CUSPARSE_STATUS_MAPPING_ERROR: out << "CUSPARSE_STATUS_MAPPING_ERROR)"; break;
53 case CUSPARSE_STATUS_EXECUTION_FAILED: out << "CUSPARSE_STATUS_EXECUTION_FAILED)"; break;
54 case CUSPARSE_STATUS_INTERNAL_ERROR: out << "CUSPARSE_STATUS_INTERNAL_ERROR)"; break;
55 case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: out << "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED)"; break;
56 case CUSPARSE_STATUS_ZERO_PIVOT: out << "CUSPARSE_STATUS_ZERO_PIVOT)"; break;
57 default: out << "unrecognized error code): this is bad!"; break;
58 }
59#endif // CUSPARSE_VERSION
60 if (file) {
61 out << " " << file << ":" << line;
62 }
63 throw std::runtime_error(out.str());
64}
65
66inline void cusparse_safe_call(cusparseStatus_t cusparseStatus, const char* name, const char* file = nullptr,
67 const int line = 0) {
68 if (CUSPARSE_STATUS_SUCCESS != cusparseStatus) {
69 cusparse_error_throw(cusparseStatus, name, file, line);
70 }
71}
72
73#define IFPACK2_DETAILS_CUSPARSE_SAFE_CALL(call) \
74 Ifpack2::Details::cusparse_safe_call(call, #call, __FILE__, __LINE__)
75
76#endif // defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
77
78struct TrisolverType {
79 enum Enum {
80 Internal,
81 HTS,
82 KSPTRSV
83 };
84
85 static void loadPLTypeOption(Teuchos::Array<std::string>& type_strs, Teuchos::Array<Enum>& type_enums) {
86 type_strs.resize(3);
87 type_strs[0] = "Internal";
88 type_strs[1] = "HTS";
89 type_strs[2] = "KSPTRSV";
90 type_enums.resize(3);
91 type_enums[0] = Internal;
92 type_enums[1] = HTS;
93 type_enums[2] = KSPTRSV;
94 }
95};
96} // namespace Details
97
98template <class MatrixType>
99class LocalSparseTriangularSolver<MatrixType>::HtsImpl {
100 public:
101 typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> crs_matrix_type;
102
103 void reset() {
104#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
105 Timpl_ = Teuchos::null;
106 levelset_block_size_ = 1;
107#endif
108 }
109
110 void setParameters(const Teuchos::ParameterList& pl) {
111 (void)pl;
112#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
113 const char* block_size_s = "trisolver: block size";
114 if (pl.isParameter(block_size_s)) {
115 TEUCHOS_TEST_FOR_EXCEPT_MSG(!pl.isType<int>(block_size_s),
116 "The parameter \"" << block_size_s << "\" must be of type int.");
117 levelset_block_size_ = pl.get<int>(block_size_s);
118 }
119 if (levelset_block_size_ < 1)
120 levelset_block_size_ = 1;
121#endif
122 }
123
124 // HTS has the phases symbolic+numeric, numeric, and apply. Hence the first
125 // call to compute() will trigger the symbolic+numeric phase, and subsequent
126 // calls (with the same Timpl_) will trigger the numeric phase. In the call to
127 // initialize(), essentially nothing happens.
128 void initialize(const crs_matrix_type& /* unused */) {
129#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
130 reset();
131 transpose_ = conjugate_ = false;
132#endif
133 }
134
135 void compute(const crs_matrix_type& T_in, const Teuchos::RCP<Teuchos::FancyOStream>& out) {
136 (void)T_in;
137 (void)out;
138#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
139 using Teuchos::ArrayRCP;
140
141 auto rowptr = T_in.getLocalRowPtrsHost();
142 auto colidx = T_in.getLocalIndicesHost();
143 auto val = T_in.getLocalValuesHost(Tpetra::Access::ReadOnly);
144 Kokkos::fence();
145
146 Teuchos::RCP<HtsCrsMatrix> T_hts = Teuchos::rcpWithDealloc(
147 HTST::make_CrsMatrix(rowptr.size() - 1,
148 rowptr.data(), colidx.data(),
149 // For std/Kokkos::complex.
150 reinterpret_cast<const scalar_type*>(val.data()),
151 transpose_, conjugate_),
152 HtsCrsMatrixDeleter());
153
154 if (Teuchos::nonnull(Timpl_)) {
155 // Reuse the nonzero pattern.
156 HTST::reprocess_numeric(Timpl_.get(), T_hts.get());
157 } else {
158 // Build from scratch.
159 if (T_in.getCrsGraph().is_null()) {
160 if (Teuchos::nonnull(out))
161 *out << "HTS compute failed because T_in.getCrsGraph().is_null().\n";
162 return;
163 }
164 if (!T_in.getCrsGraph()->isSorted()) {
165 if (Teuchos::nonnull(out))
166 *out << "HTS compute failed because ! T_in.getCrsGraph().isSorted().\n";
167 return;
168 }
169 if (!T_in.isStorageOptimized()) {
170 if (Teuchos::nonnull(out))
171 *out << "HTS compute failed because ! T_in.isStorageOptimized().\n";
172 return;
173 }
174
175 typename HTST::PreprocessArgs args;
176 args.T = T_hts.get();
177 args.max_nrhs = 1;
178#ifdef _OPENMP
179 args.nthreads = omp_get_max_threads();
180#else
181 args.nthreads = 1;
182#endif
183 args.save_for_reprocess = true;
184 typename HTST::Options opts;
185 opts.levelset_block_size = levelset_block_size_;
186 args.options = &opts;
187
188 try {
189 Timpl_ = Teuchos::rcpWithDealloc(HTST::preprocess(args), TImplDeleter());
190 } catch (const std::exception& e) {
191 if (Teuchos::nonnull(out))
192 *out << "HTS preprocess threw: " << e.what() << "\n";
193 }
194 }
195#endif
196 }
197
198 // HTS may not be able to handle a matrix, so query whether compute()
199 // succeeded.
200 bool isComputed() {
201#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
202 return Teuchos::nonnull(Timpl_);
203#else
204 return false;
205#endif
206 }
207
208 // Y := beta * Y + alpha * (M * X)
209 void localApply(const MV& X, MV& Y,
210 const Teuchos::ETransp /* mode */,
211 const scalar_type& alpha, const scalar_type& beta) const {
212 (void)X;
213 (void)Y;
214 (void)alpha;
215 (void)beta;
216#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
217 const auto& X_view = X.getLocalViewHost(Tpetra::Access::ReadOnly);
218 const auto& Y_view = Y.getLocalViewHost(Tpetra::Access::ReadWrite);
219
220 // Only does something if #rhs > current capacity.
221 HTST::reset_max_nrhs(Timpl_.get(), X_view.extent(1));
222 // Switch alpha and beta because of HTS's opposite convention.
223 HTST::solve_omp(Timpl_.get(),
224 // For std/Kokkos::complex.
225 reinterpret_cast<const scalar_type*>(X_view.data()),
226 X_view.extent(1),
227 // For std/Kokkos::complex.
228 reinterpret_cast<scalar_type*>(Y_view.data()),
229 beta, alpha);
230#endif
231 }
232
233 private:
234#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
235 typedef ::Experimental::HTS<local_ordinal_type, size_t, scalar_type> HTST;
236 typedef typename HTST::Impl TImpl;
237 typedef typename HTST::CrsMatrix HtsCrsMatrix;
238
239 struct TImplDeleter {
240 void free(TImpl* impl) {
241 HTST::delete_Impl(impl);
242 }
243 };
244
245 struct HtsCrsMatrixDeleter {
246 void free(HtsCrsMatrix* T) {
247 HTST::delete_CrsMatrix(T);
248 }
249 };
250
251 Teuchos::RCP<TImpl> Timpl_;
252 bool transpose_, conjugate_;
253 int levelset_block_size_;
254#endif
255};
256
257template <class MatrixType>
259 LocalSparseTriangularSolver(const Teuchos::RCP<const row_matrix_type>& A)
260 : A_(A) {
261 initializeState();
262
263 if (!A.is_null()) {
264 Teuchos::RCP<const crs_matrix_type> A_crs =
265 Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
266 TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument,
267 "Ifpack2::LocalSparseTriangularSolver constructor: "
268 "The input matrix A is not a Tpetra::CrsMatrix.");
269 A_crs_ = A_crs;
270 }
271}
272
273template <class MatrixType>
275 LocalSparseTriangularSolver(const Teuchos::RCP<const row_matrix_type>& A,
276 const Teuchos::RCP<Teuchos::FancyOStream>& out)
277 : A_(A)
278 , out_(out) {
279 initializeState();
280 if (!out_.is_null()) {
281 *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
282 << std::endl;
283 }
284
285 if (!A.is_null()) {
286 Teuchos::RCP<const crs_matrix_type> A_crs =
287 Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
288 TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument,
289 "Ifpack2::LocalSparseTriangularSolver constructor: "
290 "The input matrix A is not a Tpetra::CrsMatrix.");
291 A_crs_ = A_crs;
292 }
293}
294
295template <class MatrixType>
300
301template <class MatrixType>
303 LocalSparseTriangularSolver(const bool /* unused */, const Teuchos::RCP<Teuchos::FancyOStream>& out)
304 : out_(out) {
305 initializeState();
306 if (!out_.is_null()) {
307 *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
308 << std::endl;
309 }
310}
311
312template <class MatrixType>
314 isInitialized_ = false;
315 isComputed_ = false;
316 reverseStorage_ = false;
317 isInternallyChanged_ = false;
318 numInitialize_ = 0;
319 numCompute_ = 0;
320 numApply_ = 0;
321 initializeTime_ = 0.0;
322 computeTime_ = 0.0;
323 applyTime_ = 0.0;
324 isKokkosKernelsSptrsv_ = false;
325 isKokkosKernelsStream_ = false;
326 num_streams_ = 0;
327 uplo_ = "N";
328 diag_ = "N";
329}
330
331template <class MatrixType>
334 if (!isKokkosKernelsStream_) {
335 if (Teuchos::nonnull(kh_)) {
336 kh_->destroy_sptrsv_handle();
337 }
338 } else {
339 for (size_t i = 0; i < kh_v_.size(); i++) {
340 if (Teuchos::nonnull(kh_v_[i])) {
341 kh_v_[i]->destroy_sptrsv_handle();
342 }
343 }
344 }
345}
346
347template <class MatrixType>
349 setParameters(const Teuchos::ParameterList& pl) {
350 using Teuchos::Array;
351 using Teuchos::ParameterList;
352 using Teuchos::RCP;
353
354 Details::TrisolverType::Enum trisolverType = Details::TrisolverType::Internal;
355 do {
356 static const char typeName[] = "trisolver: type";
357
358 if (!pl.isType<std::string>(typeName)) break;
359
360 // Map std::string <-> TrisolverType::Enum.
361 Array<std::string> trisolverTypeStrs;
362 Array<Details::TrisolverType::Enum> trisolverTypeEnums;
363 Details::TrisolverType::loadPLTypeOption(trisolverTypeStrs, trisolverTypeEnums);
364 Teuchos::StringToIntegralParameterEntryValidator<Details::TrisolverType::Enum>
365 s2i(trisolverTypeStrs(), trisolverTypeEnums(), typeName, false);
366
367 trisolverType = s2i.getIntegralValue(pl.get<std::string>(typeName));
368 } while (0);
369
370 if (trisolverType == Details::TrisolverType::HTS) {
371 htsImpl_ = Teuchos::rcp(new HtsImpl());
372 htsImpl_->setParameters(pl);
373 }
374
375 if (trisolverType == Details::TrisolverType::KSPTRSV) {
376 this->isKokkosKernelsSptrsv_ = true;
377 } else {
378 this->isKokkosKernelsSptrsv_ = false;
379 }
380
381 if (pl.isParameter("trisolver: reverse U"))
382 reverseStorage_ = pl.get<bool>("trisolver: reverse U");
383
384 TEUCHOS_TEST_FOR_EXCEPTION(reverseStorage_ && (trisolverType == Details::TrisolverType::HTS || trisolverType == Details::TrisolverType::KSPTRSV),
385 std::logic_error,
386 "Ifpack2::LocalSparseTriangularSolver::setParameters: "
387 "You are not allowed to enable both HTS or KSPTRSV and the \"trisolver: reverse U\" "
388 "options. See GitHub issue #2647.");
389}
390
391template <class MatrixType>
393 initialize() {
394 using Tpetra::Details::determineLocalTriangularStructure;
395
396 using local_matrix_type = typename crs_matrix_type::local_matrix_device_type;
397 using LO = local_ordinal_type;
398
399 const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::initialize: ";
400 if (!out_.is_null()) {
401 *out_ << ">>> DEBUG " << prefix << std::endl;
402 }
403
404 if (!isKokkosKernelsStream_) {
405 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix << "You must call "
406 "setMatrix() with a nonnull input matrix before you may call "
407 "initialize() or compute().");
408 if (A_crs_.is_null()) {
409 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_);
410 TEUCHOS_TEST_FOR_EXCEPTION(A_crs.get() == nullptr, std::invalid_argument,
411 prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
412 A_crs_ = A_crs;
413 }
414 auto G = A_crs_->getGraph();
415 TEUCHOS_TEST_FOR_EXCEPTION(G.is_null(), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
416 "but A_crs_'s RowGraph G is null. "
417 "Please report this bug to the Ifpack2 developers.");
418 // At this point, the graph MUST be fillComplete. The "initialize"
419 // (symbolic) part of setup only depends on the graph structure, so
420 // the matrix itself need not be fill complete.
421 TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::runtime_error,
422 "If you call this method, "
423 "the matrix's graph must be fill complete. It is not.");
424
425 // mfh 30 Apr 2018: See GitHub Issue #2658.
426 constexpr bool ignoreMapsForTriStructure = true;
427 auto lclTriStructure = [&] {
428 auto lclMatrix = A_crs_->getLocalMatrixDevice();
429 auto lclRowMap = A_crs_->getRowMap()->getLocalMap();
430 auto lclColMap = A_crs_->getColMap()->getLocalMap();
431 auto lclTriStruct =
432 determineLocalTriangularStructure(lclMatrix.graph,
433 lclRowMap,
434 lclColMap,
435 ignoreMapsForTriStructure);
436 const LO lclNumRows = lclRowMap.getLocalNumElements();
437 this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
438 this->uplo_ = lclTriStruct.couldBeLowerTriangular ? "L" : (lclTriStruct.couldBeUpperTriangular ? "U" : "N");
439 return lclTriStruct;
440 }();
441
442 if (reverseStorage_ && lclTriStructure.couldBeUpperTriangular &&
443 htsImpl_.is_null()) {
444 // Reverse the storage for an upper triangular matrix
445 auto Alocal = A_crs_->getLocalMatrixDevice();
446 auto ptr = Alocal.graph.row_map;
447 auto ind = Alocal.graph.entries;
448 auto val = Alocal.values;
449
450 auto numRows = Alocal.numRows();
451 auto numCols = Alocal.numCols();
452 auto numNnz = Alocal.nnz();
453
454 typename decltype(ptr)::non_const_type newptr("ptr", ptr.extent(0));
455 typename decltype(ind)::non_const_type newind("ind", ind.extent(0));
456 decltype(val) newval("val", val.extent(0));
457
458 // FIXME: The code below assumes UVM
459 typename crs_matrix_type::execution_space().fence();
460 newptr(0) = 0;
461 for (local_ordinal_type row = 0, rowStart = 0; row < numRows; ++row) {
462 auto A_r = Alocal.row(numRows - 1 - row);
463
464 auto numEnt = A_r.length;
465 for (local_ordinal_type k = 0; k < numEnt; ++k) {
466 newind(rowStart + k) = numCols - 1 - A_r.colidx(numEnt - 1 - k);
467 newval(rowStart + k) = A_r.value(numEnt - 1 - k);
468 }
469 rowStart += numEnt;
470 newptr(row + 1) = rowStart;
471 }
472 typename crs_matrix_type::execution_space().fence();
473
474 // Reverse maps
475 Teuchos::RCP<map_type> newRowMap, newColMap;
476 {
477 // Reverse row map
478 auto rowMap = A_->getRowMap();
479 auto numElems = rowMap->getLocalNumElements();
480 auto rowElems = rowMap->getLocalElementList();
481
482 Teuchos::Array<global_ordinal_type> newRowElems(rowElems.size());
483 for (size_t i = 0; i < numElems; i++)
484 newRowElems[i] = rowElems[numElems - 1 - i];
485
486 newRowMap = Teuchos::rcp(new map_type(rowMap->getGlobalNumElements(), newRowElems, rowMap->getIndexBase(), rowMap->getComm()));
487 }
488 {
489 // Reverse column map
490 auto colMap = A_->getColMap();
491 auto numElems = colMap->getLocalNumElements();
492 auto colElems = colMap->getLocalElementList();
493
494 Teuchos::Array<global_ordinal_type> newColElems(colElems.size());
495 for (size_t i = 0; i < numElems; i++)
496 newColElems[i] = colElems[numElems - 1 - i];
497
498 newColMap = Teuchos::rcp(new map_type(colMap->getGlobalNumElements(), newColElems, colMap->getIndexBase(), colMap->getComm()));
499 }
500
501 // Construct new matrix
502 local_matrix_type newLocalMatrix("Upermuted", numRows, numCols, numNnz, newval, newptr, newind);
503
504 A_crs_ = Teuchos::rcp(new crs_matrix_type(newLocalMatrix, newRowMap, newColMap, A_crs_->getDomainMap(), A_crs_->getRangeMap()));
505
506 isInternallyChanged_ = true;
507
508 // FIXME (mfh 18 Apr 2019) Recomputing this is unnecessary, but I
509 // didn't want to break any invariants, especially considering
510 // that this branch is likely poorly tested.
511 auto newLclTriStructure =
512 determineLocalTriangularStructure(newLocalMatrix.graph,
513 newRowMap->getLocalMap(),
514 newColMap->getLocalMap(),
515 ignoreMapsForTriStructure);
516 const LO newLclNumRows = newRowMap->getLocalNumElements();
517 this->diag_ = (newLclTriStructure.diagCount < newLclNumRows) ? "U" : "N";
518 this->uplo_ = newLclTriStructure.couldBeLowerTriangular ? "L" : (newLclTriStructure.couldBeUpperTriangular ? "U" : "N");
519 }
520 } else {
521 bool prev_ambiguous = false;
522 bool all_ambiguous = true;
523 for (int i = 0; i < num_streams_; i++) {
524 TEUCHOS_TEST_FOR_EXCEPTION(A_crs_v_[i].is_null(), std::runtime_error, prefix << "You must call "
525 "setMatrix() with a nonnull input matrix before you may call "
526 "initialize() or compute().");
527 auto G = A_crs_v_[i]->getGraph();
528 TEUCHOS_TEST_FOR_EXCEPTION(G.is_null(), std::logic_error, prefix << "A_crs_ are nonnull, "
529 "but A_crs_'s RowGraph G is null. "
530 "Please report this bug to the Ifpack2 developers.");
531 // At this point, the graph MUST be fillComplete. The "initialize"
532 // (symbolic) part of setup only depends on the graph structure, so
533 // the matrix itself need not be fill complete.
534 TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::runtime_error,
535 "If you call this method, "
536 "the matrix's graph must be fill complete. It is not.");
537
538 // mfh 30 Apr 2018: See GitHub Issue #2658.
539 constexpr bool ignoreMapsForTriStructure = true;
540 std::string prev_uplo = this->uplo_;
541 std::string prev_diag = this->diag_;
542 auto lclMatrix = A_crs_v_[i]->getLocalMatrixDevice();
543 auto lclRowMap = A_crs_v_[i]->getRowMap()->getLocalMap();
544 auto lclColMap = A_crs_v_[i]->getColMap()->getLocalMap();
545 auto lclTriStruct =
546 determineLocalTriangularStructure(lclMatrix.graph,
547 lclRowMap,
548 lclColMap,
549 ignoreMapsForTriStructure);
550 const LO lclNumRows = lclRowMap.getLocalNumElements();
551 this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
552 const bool could_be_lower = lclTriStruct.couldBeLowerTriangular;
553 const bool could_be_upper = lclTriStruct.couldBeUpperTriangular;
554 if (could_be_lower && could_be_upper) {
555 // Ambiguous, but that's OK if at least one stream is unabiguous
556 this->uplo_ = prev_uplo;
557 prev_ambiguous = true;
558 } else {
559 this->uplo_ = could_be_lower ? "L" : (could_be_upper ? "U" : "N");
560 if (this->uplo_ != "N" && prev_uplo == "N" && prev_ambiguous) {
561 prev_uplo = this->uplo_;
562 }
563 prev_ambiguous = false;
564 }
565 all_ambiguous &= prev_ambiguous;
566 if (i > 0) {
567 TEUCHOS_TEST_FOR_EXCEPTION((this->diag_ != prev_diag) || (this->uplo_ != prev_uplo),
568 std::logic_error, prefix << "A_crs_'s structures in streams "
569 "are different. Please report this bug to the Ifpack2 developers.");
570 }
571 }
572 // If all streams were ambiguous, just call it "L"
573 if (all_ambiguous) {
574 this->uplo_ = "L";
575 }
576 }
577
578 if (Teuchos::nonnull(htsImpl_)) {
579 htsImpl_->initialize(*A_crs_);
580 isInternallyChanged_ = true;
581 }
582
583 const bool ksptrsv_valid_uplo = (this->uplo_ != "N");
584 kh_v_nonnull_ = false;
585 if (this->isKokkosKernelsSptrsv_ && ksptrsv_valid_uplo && this->diag_ != "U") {
586 if (!isKokkosKernelsStream_) {
587 kh_ = Teuchos::rcp(new k_handle());
588 const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
589
590 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_, true);
591 auto Alocal = A_crs->getLocalMatrixDevice();
592 auto ptr = Alocal.graph.row_map;
593 auto ind = Alocal.graph.entries;
594 auto val = Alocal.values;
595
596 auto numRows = Alocal.numRows();
597 kh_->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows, is_lower_tri);
598 KokkosSparse::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
599 } else {
600 kh_v_ = std::vector<Teuchos::RCP<k_handle>>(num_streams_);
601 for (int i = 0; i < num_streams_; i++) {
602 kh_v_[i] = Teuchos::rcp(new k_handle());
603 auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_v_[i], true);
604 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
605 auto ptr_i = Alocal_i.graph.row_map;
606 auto ind_i = Alocal_i.graph.entries;
607 auto val_i = Alocal_i.values;
608
609 auto numRows_i = Alocal_i.numRows();
610
611 const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
612 kh_v_[i]->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows_i, is_lower_tri);
613 KokkosSparse::sptrsv_symbolic(kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
614 }
615 kh_v_nonnull_ = true;
616 }
617 }
618
619 isInitialized_ = true;
620 ++numInitialize_;
621}
622
623template <class MatrixType>
624KokkosSparse::Experimental::SPTRSVAlgorithm
626#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
627 // CuSparse only supports int type ordinals
628 // and scalar types of float, double, float complex and double complex
629 if constexpr (std::is_same<Kokkos::Cuda, HandleExecSpace>::value &&
630 std::is_same<int, local_ordinal_type>::value &&
631 (std::is_same<scalar_type, float>::value ||
632 std::is_same<scalar_type, double>::value ||
633 std::is_same<impl_scalar_type, Kokkos::complex<float>>::value ||
634 std::is_same<impl_scalar_type, Kokkos::complex<double>>::value)) {
635 return KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE;
636 }
637#endif
638 return KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
639}
640
641template <class MatrixType>
643 compute() {
644 const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::compute: ";
645 if (!out_.is_null()) {
646 *out_ << ">>> DEBUG " << prefix << std::endl;
647 }
648
649 if (!isKokkosKernelsStream_) {
650 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix << "You must call "
651 "setMatrix() with a nonnull input matrix before you may call "
652 "initialize() or compute().");
653 TEUCHOS_TEST_FOR_EXCEPTION(A_crs_.is_null(), std::logic_error, prefix << "A_ is nonnull, but "
654 "A_crs_ is null. Please report this bug to the Ifpack2 developers.");
655 // At this point, the matrix MUST be fillComplete.
656 TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_->isFillComplete(), std::runtime_error,
657 "If you call this "
658 "method, the matrix must be fill complete. It is not.");
659 } else {
660 for (int i = 0; i < num_streams_; i++) {
661 TEUCHOS_TEST_FOR_EXCEPTION(A_crs_v_[i].is_null(), std::runtime_error, prefix << "You must call "
662 "setMatrices() with a nonnull input matrix before you may call "
663 "initialize() or compute().");
664 // At this point, the matrix MUST be fillComplete.
665 TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_v_[i]->isFillComplete(), std::runtime_error,
666 "If you call this "
667 "method, the matrix must be fill complete. It is not.");
668 }
669 }
670
671 if (!isInitialized_) {
672 initialize();
673 }
674 TEUCHOS_TEST_FOR_EXCEPTION(!isInitialized_, std::logic_error, prefix << "initialize() should have "
675 "been called by this point, but isInitialized_ is false. "
676 "Please report this bug to the Ifpack2 developers.");
677
678// NOTE (Nov-09-2022):
679// For Cuda >= 11.3 (using cusparseSpSV), always call symbolic during compute
680// even when matrix values are changed with the same sparsity pattern.
681// For Cuda >= 12.1 has a new cusparseSpSV_updateMatrix function just for updating the
682// values that is substantially faster.
683// This would all be much better handled via a KokkosSparse::sptrsv_numeric(...)
684// that could hide the Cuda implementation details.
685#if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && (CUDA_VERSION >= 11030)
686 if constexpr (std::is_same_v<typename crs_matrix_type::execution_space, Kokkos::Cuda>) {
687 if (this->isKokkosKernelsSptrsv_) {
688 if (Teuchos::nonnull(kh_) && !isKokkosKernelsStream_) {
689 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_, true);
690 auto Alocal = A_crs->getLocalMatrixDevice();
691 auto val = Alocal.values;
692 auto ptr = Alocal.graph.row_map;
693 auto ind = Alocal.graph.entries;
694 KokkosSparse::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
695 } else if (kh_v_nonnull_) {
696 for (int i = 0; i < num_streams_; i++) {
697 auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_v_[i], true);
698 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
699 auto val_i = Alocal_i.values;
700 auto ptr_i = Alocal_i.graph.row_map;
701 auto ind_i = Alocal_i.graph.entries;
702 KokkosSparse::sptrsv_symbolic(exec_space_instances_[i], kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
703 }
704 }
705 }
706 }
707#endif
708
709 if (!isComputed_) { // Only compute if not computed before
710 if (Teuchos::nonnull(htsImpl_))
711 htsImpl_->compute(*A_crs_, out_);
712
713 isComputed_ = true;
714 ++numCompute_;
715 }
716}
717
718template <class MatrixType>
720 apply(const Tpetra::MultiVector<scalar_type, local_ordinal_type,
722 Tpetra::MultiVector<scalar_type, local_ordinal_type,
724 Teuchos::ETransp mode,
725 scalar_type alpha,
726 scalar_type beta) const {
727 using Teuchos::RCP;
728 using Teuchos::rcp;
729 using Teuchos::rcpFromRef;
730 typedef scalar_type ST;
731 typedef Teuchos::ScalarTraits<ST> STS;
732 const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::apply: ";
733
734 if (!out_.is_null()) {
735 *out_ << ">>> DEBUG " << prefix;
736 if (!isKokkosKernelsStream_) {
737 if (A_crs_.is_null()) {
738 *out_ << "A_crs_ is null!" << std::endl;
739 } else {
740 Teuchos::RCP<const crs_matrix_type> A_crs =
741 Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_);
742 const std::string uplo = this->uplo_;
743 const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" : (mode == Teuchos::TRANS ? "T" : "N");
744 const std::string diag = this->diag_;
745 *out_ << "uplo=\"" << uplo
746 << "\", trans=\"" << trans
747 << "\", diag=\"" << diag << "\"" << std::endl;
748 }
749 } else {
750 for (int i = 0; i < num_streams_; i++) {
751 if (A_crs_v_[i].is_null()) {
752 *out_ << "A_crs_v_[" << i << "]"
753 << " is null!" << std::endl;
754 } else {
755 const std::string uplo = this->uplo_;
756 const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" : (mode == Teuchos::TRANS ? "T" : "N");
757 const std::string diag = this->diag_;
758 *out_ << "A_crs_v_[" << i << "]: "
759 << "uplo=\"" << uplo
760 << "\", trans=\"" << trans
761 << "\", diag=\"" << diag << "\"" << std::endl;
762 }
763 }
764 }
765 }
766
767 TEUCHOS_TEST_FOR_EXCEPTION(!isComputed(), std::runtime_error, prefix << "If compute() has not yet "
768 "been called, or if you have changed the matrix via setMatrix(), you must "
769 "call compute() before you may call this method.");
770 // If isComputed() is true, it's impossible for the matrix to be
771 // null, or for it not to be a Tpetra::CrsMatrix.
772 if (!isKokkosKernelsStream_) {
773 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::logic_error, prefix << "A_ is null. "
774 "Please report this bug to the Ifpack2 developers.");
775 TEUCHOS_TEST_FOR_EXCEPTION(A_crs_.is_null(), std::logic_error, prefix << "A_crs_ is null. "
776 "Please report this bug to the Ifpack2 developers.");
777 // However, it _is_ possible that the user called resumeFill() on
778 // the matrix, after calling compute(). This is NOT allowed.
779 TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_->isFillComplete(), std::runtime_error,
780 "If you call this "
781 "method, the matrix must be fill complete. It is not. This means that "
782 " you must have called resumeFill() on the matrix before calling apply(). "
783 "This is NOT allowed. Note that this class may use the matrix's data in "
784 "place without copying it. Thus, you cannot change the matrix and expect "
785 "the solver to stay the same. If you have changed the matrix, first call "
786 "fillComplete() on it, then call compute() on this object, before you call"
787 " apply(). You do NOT need to call setMatrix, as long as the matrix "
788 "itself (that is, its address in memory) is the same.");
789 } else {
790 for (int i = 0; i < num_streams_; i++) {
791 TEUCHOS_TEST_FOR_EXCEPTION(A_crs_v_[i].is_null(), std::logic_error, prefix << "A_crs_ is null. "
792 "Please report this bug to the Ifpack2 developers.");
793 // However, it _is_ possible that the user called resumeFill() on
794 // the matrix, after calling compute(). This is NOT allowed.
795 TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_v_[i]->isFillComplete(), std::runtime_error,
796 "If you call this "
797 "method, the matrix must be fill complete. It is not. This means that "
798 " you must have called resumeFill() on the matrix before calling apply(). "
799 "This is NOT allowed. Note that this class may use the matrix's data in "
800 "place without copying it. Thus, you cannot change the matrix and expect "
801 "the solver to stay the same. If you have changed the matrix, first call "
802 "fillComplete() on it, then call compute() on this object, before you call"
803 " apply(). You do NOT need to call setMatrix, as long as the matrix "
804 "itself (that is, its address in memory) is the same.");
805 }
806 }
807
808 RCP<const MV> X_cur;
809 RCP<MV> Y_cur;
810
811 if (!isKokkosKernelsStream_) {
812 auto G = A_crs_->getGraph();
813 TEUCHOS_TEST_FOR_EXCEPTION(G.is_null(), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
814 "but A_crs_'s RowGraph G is null. "
815 "Please report this bug to the Ifpack2 developers.");
816 auto importer = G->getImporter();
817 auto exporter = G->getExporter();
818
819 if (!importer.is_null()) {
820 if (X_colMap_.is_null() || X_colMap_->getNumVectors() != X.getNumVectors()) {
821 X_colMap_ = rcp(new MV(importer->getTargetMap(), X.getNumVectors()));
822 } else {
823 X_colMap_->putScalar(STS::zero());
824 }
825 // See discussion of Github Issue #672 for why the Import needs to
826 // use the ZERO CombineMode. The case where the Export is
827 // nontrivial is likely never exercised.
828 X_colMap_->doImport(X, *importer, Tpetra::ZERO);
829 }
830 X_cur = importer.is_null() ? rcpFromRef(X) : Teuchos::rcp_const_cast<const MV>(X_colMap_);
831
832 if (!exporter.is_null()) {
833 if (Y_rowMap_.is_null() || Y_rowMap_->getNumVectors() != Y.getNumVectors()) {
834 Y_rowMap_ = rcp(new MV(exporter->getSourceMap(), Y.getNumVectors()));
835 } else {
836 Y_rowMap_->putScalar(STS::zero());
837 }
838 Y_rowMap_->doExport(Y, *importer, Tpetra::ADD);
839 }
840 Y_cur = exporter.is_null() ? rcpFromRef(Y) : Y_rowMap_;
841 } else {
842 // Currently assume X and Y are local vectors (same sizes as A_crs_).
843 // Should do a better job here!!!
844 X_cur = rcpFromRef(X);
845 Y_cur = rcpFromRef(Y);
846 }
847
848 localApply(*X_cur, *Y_cur, mode, alpha, beta);
849
850 if (!isKokkosKernelsStream_) {
851 auto G = A_crs_->getGraph();
852 auto exporter = G->getExporter();
853 if (!exporter.is_null()) {
854 Y.putScalar(STS::zero());
855 Y.doExport(*Y_cur, *exporter, Tpetra::ADD);
856 }
857 }
858 ++numApply_;
859}
860
861template <class MatrixType>
863 localTriangularSolve(const MV& Y,
864 MV& X,
865 const Teuchos::ETransp mode) const {
866 using Teuchos::CONJ_TRANS;
867 using Teuchos::NO_TRANS;
868 using Teuchos::TRANS;
869 const char tfecfFuncName[] = "localTriangularSolve: ";
870
871 if (!isKokkosKernelsStream_) {
872 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!A_crs_->isFillComplete(), std::runtime_error,
873 "The matrix is not fill complete.");
874 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A_crs_->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
875 "The matrix is neither upper triangular or lower triangular. "
876 "You may only call this method if the matrix is triangular. "
877 "Remember that this is a local (per MPI process) property, and that "
878 "Tpetra only knows how to do a local (per process) triangular solve.");
879 } else {
880 for (int i = 0; i < num_streams_; i++) {
881 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!A_crs_v_[i]->isFillComplete(), std::runtime_error,
882 "The matrix is not fill complete.");
883 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A_crs_v_[i]->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
884 "The matrix is neither upper triangular or lower triangular. "
885 "You may only call this method if the matrix is triangular. "
886 "Remember that this is a local (per MPI process) property, and that "
887 "Tpetra only knows how to do a local (per process) triangular solve.");
888 }
889 }
890 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!X.isConstantStride() || !Y.isConstantStride(), std::invalid_argument,
891 "X and Y must be constant stride.");
892 using STS = Teuchos::ScalarTraits<scalar_type>;
893 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(STS::isComplex && mode == TRANS, std::logic_error,
894 "This method does "
895 "not currently support non-conjugated transposed solve (mode == "
896 "Teuchos::TRANS) for complex scalar types.");
897
898 const std::string uplo = this->uplo_;
899 const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" : (mode == Teuchos::TRANS ? "T" : "N");
900 const size_t numVecs = std::min(X.getNumVectors(), Y.getNumVectors());
901
902 if (Teuchos::nonnull(kh_) && this->isKokkosKernelsSptrsv_ && trans == "N") {
903 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(this->A_);
904 auto A_lclk = A_crs->getLocalMatrixDevice();
905 auto ptr = A_lclk.graph.row_map;
906 auto ind = A_lclk.graph.entries;
907 auto val = A_lclk.values;
908
909 for (size_t j = 0; j < numVecs; ++j) {
910 auto X_j = X.getVectorNonConst(j);
911 auto Y_j = Y.getVector(j);
912 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
913 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
914 auto X_lcl_1d = Kokkos::subview(X_lcl, Kokkos::ALL(), 0);
915 auto Y_lcl_1d = Kokkos::subview(Y_lcl, Kokkos::ALL(), 0);
916 KokkosSparse::sptrsv_solve(kh_.getRawPtr(), ptr, ind, val, Y_lcl_1d, X_lcl_1d);
917 // TODO is this fence needed...
918 typename k_handle::HandleExecSpace().fence();
919 }
920 } // End using regular interface of Kokkos Kernels Sptrsv
921 else if (kh_v_nonnull_ && this->isKokkosKernelsSptrsv_ && trans == "N") {
922 std::vector<lno_row_view_t> ptr_v(num_streams_);
923 std::vector<lno_nonzero_view_t> ind_v(num_streams_);
924 std::vector<scalar_nonzero_view_t> val_v(num_streams_);
925 std::vector<k_handle*> KernelHandle_rawptr_v_(num_streams_);
926 for (size_t j = 0; j < numVecs; ++j) {
927 auto X_j = X.getVectorNonConst(j);
928 auto Y_j = Y.getVector(j);
929 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
930 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
931 auto X_lcl_1d = Kokkos::subview(X_lcl, Kokkos::ALL(), 0);
932 auto Y_lcl_1d = Kokkos::subview(Y_lcl, Kokkos::ALL(), 0);
933 std::vector<decltype(X_lcl_1d)> x_v(num_streams_);
934 std::vector<decltype(Y_lcl_1d)> y_v(num_streams_);
935 local_ordinal_type stream_begin = 0;
936 local_ordinal_type stream_end;
937 for (int i = 0; i < num_streams_; i++) {
938 auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_v_[i]);
939 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
940 ptr_v[i] = Alocal_i.graph.row_map;
941 ind_v[i] = Alocal_i.graph.entries;
942 val_v[i] = Alocal_i.values;
943 stream_end = stream_begin + Alocal_i.numRows();
944 x_v[i] = Kokkos::subview(X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
945 y_v[i] = Kokkos::subview(Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
946 KernelHandle_rawptr_v_[i] = kh_v_[i].getRawPtr();
947 stream_begin = stream_end;
948 }
949 Kokkos::fence();
950 KokkosSparse::Experimental::sptrsv_solve_streams(exec_space_instances_, KernelHandle_rawptr_v_,
951 ptr_v, ind_v, val_v, y_v, x_v);
952 for (int i = 0; i < num_streams_; i++) exec_space_instances_[i].fence();
953 // Kokkos::fence();
954 }
955 } // End using stream interface of Kokkos Kernels Sptrsv
956 else {
957 const std::string diag = this->diag_;
958 // NOTE (mfh 20 Aug 2017): KokkosSparse::trsv currently is a
959 // sequential, host-only code. See
960 // https://github.com/kokkos/kokkos-kernels/issues/48.
961
962 auto A_lcl = this->A_crs_->getLocalMatrixHost();
963
964 if (X.isConstantStride() && Y.isConstantStride()) {
965 auto X_lcl = X.getLocalViewHost(Tpetra::Access::ReadWrite);
966 auto Y_lcl = Y.getLocalViewHost(Tpetra::Access::ReadOnly);
967 KokkosSparse::trsv(uplo.c_str(), trans.c_str(), diag.c_str(),
968 A_lcl, Y_lcl, X_lcl);
969 } else {
970 for (size_t j = 0; j < numVecs; ++j) {
971 auto X_j = X.getVectorNonConst(j);
972 auto Y_j = Y.getVector(j);
973 auto X_lcl = X_j->getLocalViewHost(Tpetra::Access::ReadWrite);
974 auto Y_lcl = Y_j->getLocalViewHost(Tpetra::Access::ReadOnly);
975 KokkosSparse::trsv(uplo.c_str(), trans.c_str(),
976 diag.c_str(), A_lcl, Y_lcl, X_lcl);
977 }
978 }
979 }
980}
981
982template <class MatrixType>
983void LocalSparseTriangularSolver<MatrixType>::
984 localApply(const MV& X,
985 MV& Y,
986 const Teuchos::ETransp mode,
987 const scalar_type& alpha,
988 const scalar_type& beta) const {
989 if (mode == Teuchos::NO_TRANS && Teuchos::nonnull(htsImpl_) &&
990 htsImpl_->isComputed()) {
991 htsImpl_->localApply(X, Y, mode, alpha, beta);
992 return;
993 }
994
995 using Teuchos::RCP;
996 typedef scalar_type ST;
997 typedef Teuchos::ScalarTraits<ST> STS;
998
999 if (beta == STS::zero()) {
1000 if (alpha == STS::zero()) {
1001 Y.putScalar(STS::zero()); // Y := 0 * Y (ignore contents of Y)
1002 } else { // alpha != 0
1003 this->localTriangularSolve(X, Y, mode);
1004 if (alpha != STS::one()) {
1005 Y.scale(alpha);
1006 }
1007 }
1008 } else { // beta != 0
1009 if (alpha == STS::zero()) {
1010 Y.scale(beta); // Y := beta * Y
1011 } else { // alpha != 0
1012 MV Y_tmp(Y, Teuchos::Copy);
1013 this->localTriangularSolve(X, Y_tmp, mode); // Y_tmp := M * X
1014 Y.update(alpha, Y_tmp, beta); // Y := beta * Y + alpha * Y_tmp
1015 }
1016 }
1017}
1018
1019template <class MatrixType>
1021 getNumInitialize() const {
1022 return numInitialize_;
1023}
1024
1025template <class MatrixType>
1027 getNumCompute() const {
1028 return numCompute_;
1029}
1030
1031template <class MatrixType>
1033 getNumApply() const {
1034 return numApply_;
1035}
1036
1037template <class MatrixType>
1038double
1040 getInitializeTime() const {
1041 return initializeTime_;
1042}
1043
1044template <class MatrixType>
1045double
1047 getComputeTime() const {
1048 return computeTime_;
1049}
1050
1051template <class MatrixType>
1052double
1054 getApplyTime() const {
1055 return applyTime_;
1056}
1057
1058template <class MatrixType>
1059std::string
1061 description() const {
1062 std::ostringstream os;
1063
1064 // Output is a valid YAML dictionary in flow style. If you don't
1065 // like everything on a single line, you should call describe()
1066 // instead.
1067 os << "\"Ifpack2::LocalSparseTriangularSolver\": {";
1068 if (this->getObjectLabel() != "") {
1069 os << "Label: \"" << this->getObjectLabel() << "\", ";
1070 }
1071 os << "Initialized: " << (isInitialized() ? "true" : "false") << ", "
1072 << "Computed: " << (isComputed() ? "true" : "false") << ", ";
1073
1074 if (isKokkosKernelsSptrsv_) os << "KK-SPTRSV, ";
1075 if (isKokkosKernelsStream_) os << "KK-SolveStream, ";
1076
1077 if (A_.is_null()) {
1078 os << "Matrix: null";
1079 } else {
1080 os << "Matrix dimensions: ["
1081 << A_->getGlobalNumRows() << ", "
1082 << A_->getGlobalNumCols() << "]"
1083 << ", Number of nonzeros: " << A_->getGlobalNumEntries();
1084 }
1085
1086 if (Teuchos::nonnull(htsImpl_))
1087 os << ", HTS computed: " << (htsImpl_->isComputed() ? "true" : "false");
1088 os << "}";
1089 return os.str();
1090}
1091
1092template <class MatrixType>
1094 describe(Teuchos::FancyOStream& out,
1095 const Teuchos::EVerbosityLevel verbLevel) const {
1096 using std::endl;
1097 // Default verbosity level is VERB_LOW, which prints only on Process
1098 // 0 of the matrix's communicator.
1099 const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
1100
1101 if (vl != Teuchos::VERB_NONE) {
1102 // Print only on Process 0 in the matrix's communicator. If the
1103 // matrix is null, though, we have to get the communicator from
1104 // somewhere, so we ask Tpetra for its default communicator. If
1105 // MPI is enabled, this wraps MPI_COMM_WORLD or a clone thereof.
1106 auto comm = A_.is_null() ? Tpetra::getDefaultComm() : A_->getComm();
1107
1108 // Users aren't supposed to do anything with the matrix on
1109 // processes where its communicator is null.
1110 if (!comm.is_null() && comm->getRank() == 0) {
1111 // By convention, describe() should always begin with a tab.
1112 Teuchos::OSTab tab0(out);
1113 // Output is in YAML format. We have to escape the class name,
1114 // because it has a colon.
1115 out << "\"Ifpack2::LocalSparseTriangularSolver\":" << endl;
1116 Teuchos::OSTab tab1(out);
1117 out << "Scalar: " << Teuchos::TypeNameTraits<scalar_type>::name() << endl
1118 << "LocalOrdinal: " << Teuchos::TypeNameTraits<local_ordinal_type>::name() << endl
1119 << "GlobalOrdinal: " << Teuchos::TypeNameTraits<global_ordinal_type>::name() << endl
1120 << "Node: " << Teuchos::TypeNameTraits<node_type>::name() << endl;
1121 }
1122 }
1123}
1124
1125template <class MatrixType>
1126Teuchos::RCP<const typename LocalSparseTriangularSolver<MatrixType>::map_type>
1128 getDomainMap() const {
1129 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error,
1130 "Ifpack2::LocalSparseTriangularSolver::getDomainMap: "
1131 "The matrix is null. Please call setMatrix() with a nonnull input "
1132 "before calling this method.");
1133 return A_->getDomainMap();
1134}
1135
1136template <class MatrixType>
1137Teuchos::RCP<const typename LocalSparseTriangularSolver<MatrixType>::map_type>
1139 getRangeMap() const {
1140 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error,
1141 "Ifpack2::LocalSparseTriangularSolver::getRangeMap: "
1142 "The matrix is null. Please call setMatrix() with a nonnull input "
1143 "before calling this method.");
1144 return A_->getRangeMap();
1145}
1146
1147template <class MatrixType>
1149 setMatrix(const Teuchos::RCP<const row_matrix_type>& A) {
1150 const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrix: ";
1151
1152 // If the pointer didn't change, do nothing. This is reasonable
1153 // because users are supposed to call this method with the same
1154 // object over all participating processes, and pointer identity
1155 // implies object identity.
1156 if (A.getRawPtr() != A_.getRawPtr() || isInternallyChanged_) {
1157 // Check in serial or one-process mode if the matrix is square.
1158 TEUCHOS_TEST_FOR_EXCEPTION(!A.is_null() && A->getComm()->getSize() == 1 &&
1159 A->getLocalNumRows() != A->getLocalNumCols(),
1160 std::runtime_error, prefix << "If A's communicator only contains one "
1161 "process, then A must be square. Instead, you provided a matrix A with "
1162 << A->getLocalNumRows() << " rows and " << A->getLocalNumCols() << " columns.");
1163
1164 // It's legal for A to be null; in that case, you may not call
1165 // initialize() until calling setMatrix() with a nonnull input.
1166 // Regardless, setting the matrix invalidates the preconditioner.
1167 isInitialized_ = false;
1168 isComputed_ = false;
1169
1170 if (A.is_null()) {
1171 A_crs_ = Teuchos::null;
1172 A_ = Teuchos::null;
1173 } else { // A is not null
1174 Teuchos::RCP<const crs_matrix_type> A_crs =
1175 Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
1176 TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument, prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
1177 A_crs_ = A_crs;
1178 A_ = A;
1179 }
1180
1181 if (Teuchos::nonnull(htsImpl_))
1182 htsImpl_->reset();
1183 } // pointers are not the same
1184}
1185
1186template <class MatrixType>
1188 setStreamInfo(const bool& isKokkosKernelsStream, const int& num_streams,
1189 const std::vector<HandleExecSpace>& exec_space_instances) {
1190 isKokkosKernelsStream_ = isKokkosKernelsStream;
1191 num_streams_ = num_streams;
1192 exec_space_instances_ = exec_space_instances;
1193 A_crs_v_ = std::vector<Teuchos::RCP<crs_matrix_type>>(num_streams_);
1194}
1195
1196template <class MatrixType>
1198 setMatrices(const std::vector<Teuchos::RCP<crs_matrix_type>>& A_crs_v) {
1199 const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrixWithStreams: ";
1200
1201 for (int i = 0; i < num_streams_; i++) {
1202 // If the pointer didn't change, do nothing. This is reasonable
1203 // because users are supposed to call this method with the same
1204 // object over all participating processes, and pointer identity
1205 // implies object identity.
1206 if (A_crs_v[i].getRawPtr() != A_crs_v_[i].getRawPtr() || isInternallyChanged_) {
1207 // Check in serial or one-process mode if the matrix is square.
1208 TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_v[i].is_null() && A_crs_v[i]->getComm()->getSize() == 1 &&
1209 A_crs_v[i]->getLocalNumRows() != A_crs_v[i]->getLocalNumCols(),
1210 std::runtime_error, prefix << "If A's communicator only contains one "
1211 "process, then A must be square. Instead, you provided a matrix A with "
1212 << A_crs_v[i]->getLocalNumRows() << " rows and " << A_crs_v[i]->getLocalNumCols() << " columns.");
1213
1214 // It's legal for A to be null; in that case, you may not call
1215 // initialize() until calling setMatrix() with a nonnull input.
1216 // Regardless, setting the matrix invalidates the preconditioner.
1217 isInitialized_ = false;
1218 isComputed_ = false;
1219
1220 if (A_crs_v[i].is_null()) {
1221 A_crs_v_[i] = Teuchos::null;
1222 } else { // A is not null
1223 Teuchos::RCP<crs_matrix_type> A_crs =
1224 Teuchos::rcp_dynamic_cast<crs_matrix_type>(A_crs_v[i]);
1225 TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument, prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
1226 A_crs_v_[i] = A_crs;
1227 }
1228 } // pointers are not the same
1229 }
1230}
1231
1232} // namespace Ifpack2
1233
1234#define IFPACK2_LOCALSPARSETRIANGULARSOLVER_INSTANT(S, LO, GO, N) \
1235 template class Ifpack2::LocalSparseTriangularSolver<Tpetra::RowMatrix<S, LO, GO, N>>;
1236
1237#endif // IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
"Preconditioner" that solves local sparse triangular systems.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:54
bool isComputed() const
Return true if compute() has been called.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:192
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with given verbosity to the given output stream.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1094
std::string description() const
A one-line description of this object.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1061
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Specialization of Tpetra::Map used by this class.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:70
LocalSparseTriangularSolver()
Constructor that takes no input matrix.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:297
MatrixType::local_ordinal_type local_ordinal_type
Type of the local indices of the input matrix.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:61
void setParameters(const Teuchos::ParameterList &params)
Set this object's parameters.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:349
double getComputeTime() const
Return the time spent in compute().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1047
void initialize()
"Symbolic" phase of setup
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:393
MatrixType::global_ordinal_type global_ordinal_type
Type of the global indices of the input matrix.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:63
int getNumCompute() const
Return the number of calls to compute().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1027
double getInitializeTime() const
Return the time spent in initialize().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1040
Teuchos::RCP< const map_type > getDomainMap() const
The domain of this operator.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1128
MatrixType::scalar_type scalar_type
Scalar type of the entries of the input matrix.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:57
MatrixType::node_type node_type
Node type of the input matrix.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:65
int getNumInitialize() const
Return the number of calls to initialize().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1021
void setStreamInfo(const bool &isKokkosKernelsStream, const int &num_streams, const std::vector< HandleExecSpace > &exec_space_instances)
Set this triangular solver's stream information.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1188
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Specialization of Tpetra::CrsMatrix used by this class.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:78
Teuchos::RCP< const map_type > getRangeMap() const
The range of this operator.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1139
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Set this preconditioner's matrix.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1149
void setMatrices(const std::vector< Teuchos::RCP< crs_matrix_type > > &A_crs_v)
Set this preconditioner's matrices (used by stream interface of triangular solve).
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1198
virtual ~LocalSparseTriangularSolver()
Destructor (virtual for memory safety).
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:333
double getApplyTime() const
Return the time spent in apply().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1054
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, and put the result in Y.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:720
int getNumApply() const
Return the number of calls to apply().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1033
void compute()
"Numeric" phase of setup
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:643
TRANS
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40