Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_RILUK_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_CRSRILUK_DEF_HPP
11#define IFPACK2_CRSRILUK_DEF_HPP
12
14#include "Ifpack2_LocalFilter.hpp"
15#include "Tpetra_CrsMatrix.hpp"
16#include "Teuchos_StandardParameterEntryValidators.hpp"
17#include "Ifpack2_LocalSparseTriangularSolver.hpp"
18#include "Ifpack2_Details_getParamTryingTypes.hpp"
19#include "Ifpack2_Details_getCrsMatrix.hpp"
21#include "Kokkos_Sort.hpp"
22#include "KokkosSparse_spiluk.hpp"
23#include "KokkosSparse_Utils.hpp"
24#include "KokkosKernels_Sorting.hpp"
25#include "KokkosSparse_IOUtils.hpp"
26
27namespace Ifpack2 {
28
29namespace Details {
30struct IlukImplType {
31 enum Enum {
32 Serial,
33 KSPILUK
34 };
35
36 static void loadPLTypeOption(Teuchos::Array<std::string>& type_strs, Teuchos::Array<Enum>& type_enums) {
37 type_strs.resize(2);
38 type_strs[0] = "Serial";
39 type_strs[1] = "KSPILUK";
40 type_enums.resize(2);
41 type_enums[0] = Serial;
42 type_enums[1] = KSPILUK;
43 }
44};
45} // namespace Details
46
47template <class MatrixType>
48RILUK<MatrixType>::RILUK(const Teuchos::RCP<const row_matrix_type>& Matrix_in, const Teuchos::RCP<const coord_type>& Matrix_in_coordinates)
49 : A_(Matrix_in)
50 , A_coordinates_(Matrix_in_coordinates)
51 , LevelOfFill_(0)
52 , Overalloc_(2.)
53 , isAllocated_(false)
54 , isInitialized_(false)
55 , isComputed_(false)
56 , numInitialize_(0)
57 , numCompute_(0)
58 , numApply_(0)
59 , initializeTime_(0.0)
60 , computeTime_(0.0)
61 , applyTime_(0.0)
62 , RelaxValue_(Teuchos::ScalarTraits<magnitude_type>::zero())
63 , Athresh_(Teuchos::ScalarTraits<magnitude_type>::zero())
64 , Rthresh_(Teuchos::ScalarTraits<magnitude_type>::one())
65 , isKokkosKernelsSpiluk_(false)
66 , isKokkosKernelsStream_(false)
67 , num_streams_(0)
68 , hasStreamReordered_(false)
69 , hasStreamsWithRCB_(false) {
70 allocateSolvers();
71}
72
73template <class MatrixType>
74RILUK<MatrixType>::RILUK(const Teuchos::RCP<const crs_matrix_type>& Matrix_in, const Teuchos::RCP<const coord_type>& Matrix_in_coordinates)
75 : A_(Matrix_in)
76 , A_coordinates_(Matrix_in_coordinates)
77 , LevelOfFill_(0)
78 , Overalloc_(2.)
79 , isAllocated_(false)
80 , isInitialized_(false)
81 , isComputed_(false)
82 , numInitialize_(0)
83 , numCompute_(0)
84 , numApply_(0)
85 , initializeTime_(0.0)
86 , computeTime_(0.0)
87 , applyTime_(0.0)
88 , RelaxValue_(Teuchos::ScalarTraits<magnitude_type>::zero())
89 , Athresh_(Teuchos::ScalarTraits<magnitude_type>::zero())
90 , Rthresh_(Teuchos::ScalarTraits<magnitude_type>::one())
91 , isKokkosKernelsSpiluk_(false)
92 , isKokkosKernelsStream_(false)
93 , num_streams_(0)
94 , hasStreamReordered_(false)
95 , hasStreamsWithRCB_(false) {
96 allocateSolvers();
97}
98
99template <class MatrixType>
101 if (!isKokkosKernelsStream_) {
102 if (Teuchos::nonnull(KernelHandle_)) {
103 KernelHandle_->destroy_spiluk_handle();
104 }
105 } else {
106 for (size_t i = 0; i < KernelHandle_v_.size(); i++) {
107 if (Teuchos::nonnull(KernelHandle_v_[i])) {
108 KernelHandle_v_[i]->destroy_spiluk_handle();
109 }
110 }
111 }
112}
113
114template <class MatrixType>
116 L_solver_ = Teuchos::rcp(new LocalSparseTriangularSolver<row_matrix_type>());
117 L_solver_->setObjectLabel("lower");
118 U_solver_ = Teuchos::rcp(new LocalSparseTriangularSolver<row_matrix_type>());
119 U_solver_->setObjectLabel("upper");
120}
121
122template <class MatrixType>
123void RILUK<MatrixType>::setMatrix(const Teuchos::RCP<const row_matrix_type>& A) {
124 // It's legal for A to be null; in that case, you may not call
125 // initialize() until calling setMatrix() with a nonnull input.
126 // Regardless, setting the matrix invalidates any previous
127 // factorization.
128 if (A.getRawPtr() != A_.getRawPtr()) {
129 isAllocated_ = false;
130 isInitialized_ = false;
131 isComputed_ = false;
132 A_local_ = Teuchos::null;
133 Graph_ = Teuchos::null;
134
135 // The sparse triangular solvers get a triangular factor as their
136 // input matrix. The triangular factors L_ and U_ are getting
137 // reset, so we reset the solvers' matrices to null. Do that
138 // before setting L_ and U_ to null, so that latter step actually
139 // frees the factors.
140 if (!L_solver_.is_null()) {
141 L_solver_->setMatrix(Teuchos::null);
142 }
143 if (!U_solver_.is_null()) {
144 U_solver_->setMatrix(Teuchos::null);
145 }
146
147 L_ = Teuchos::null;
148 U_ = Teuchos::null;
149 D_ = Teuchos::null;
150 A_ = A;
151 }
152}
153
154template <class MatrixType>
155void RILUK<MatrixType>::setCoord(const Teuchos::RCP<const coord_type>& A_coordinates) {
156 if (A_coordinates.getRawPtr() != A_coordinates_.getRawPtr()) {
157 A_coordinates_ = A_coordinates;
158 }
159}
160
161template <class MatrixType>
164 TEUCHOS_TEST_FOR_EXCEPTION(
165 L_.is_null(), std::runtime_error,
166 "Ifpack2::RILUK::getL: The L factor "
167 "is null. Please call initialize() (and preferably also compute()) "
168 "before calling this method. If the input matrix has not yet been set, "
169 "you must first call setMatrix() with a nonnull input matrix before you "
170 "may call initialize() or compute().");
171 return *L_;
172}
173
174template <class MatrixType>
175const Tpetra::Vector<typename RILUK<MatrixType>::scalar_type,
180 TEUCHOS_TEST_FOR_EXCEPTION(
181 D_.is_null(), std::runtime_error,
182 "Ifpack2::RILUK::getD: The D factor "
183 "(of diagonal entries) is null. Please call initialize() (and "
184 "preferably also compute()) before calling this method. If the input "
185 "matrix has not yet been set, you must first call setMatrix() with a "
186 "nonnull input matrix before you may call initialize() or compute().");
187 return *D_;
188}
189
190template <class MatrixType>
193 TEUCHOS_TEST_FOR_EXCEPTION(
194 U_.is_null(), std::runtime_error,
195 "Ifpack2::RILUK::getU: The U factor "
196 "is null. Please call initialize() (and preferably also compute()) "
197 "before calling this method. If the input matrix has not yet been set, "
198 "you must first call setMatrix() with a nonnull input matrix before you "
199 "may call initialize() or compute().");
200 return *U_;
201}
202
203template <class MatrixType>
205 TEUCHOS_TEST_FOR_EXCEPTION(
206 A_.is_null(), std::runtime_error,
207 "Ifpack2::RILUK::getNodeSmootherComplexity: "
208 "The input matrix A is null. Please call setMatrix() with a nonnull "
209 "input matrix, then call compute(), before calling this method.");
210 // RILUK methods cost roughly one apply + the nnz in the upper+lower triangles
211 if (!L_.is_null() && !U_.is_null())
212 return A_->getLocalNumEntries() + L_->getLocalNumEntries() + U_->getLocalNumEntries();
213 else
214 return 0;
215}
216
217template <class MatrixType>
218Teuchos::RCP<const Tpetra::Map<typename RILUK<MatrixType>::local_ordinal_type,
222 TEUCHOS_TEST_FOR_EXCEPTION(
223 A_.is_null(), std::runtime_error,
224 "Ifpack2::RILUK::getDomainMap: "
225 "The matrix is null. Please call setMatrix() with a nonnull input "
226 "before calling this method.");
227
228 // FIXME (mfh 25 Jan 2014) Shouldn't this just come from A_?
229 TEUCHOS_TEST_FOR_EXCEPTION(
230 Graph_.is_null(), std::runtime_error,
231 "Ifpack2::RILUK::getDomainMap: "
232 "The computed graph is null. Please call initialize() before calling "
233 "this method.");
234 return Graph_->getL_Graph()->getDomainMap();
235}
236
237template <class MatrixType>
238Teuchos::RCP<const Tpetra::Map<typename RILUK<MatrixType>::local_ordinal_type,
242 TEUCHOS_TEST_FOR_EXCEPTION(
243 A_.is_null(), std::runtime_error,
244 "Ifpack2::RILUK::getRangeMap: "
245 "The matrix is null. Please call setMatrix() with a nonnull input "
246 "before calling this method.");
247
248 // FIXME (mfh 25 Jan 2014) Shouldn't this just come from A_?
249 TEUCHOS_TEST_FOR_EXCEPTION(
250 Graph_.is_null(), std::runtime_error,
251 "Ifpack2::RILUK::getRangeMap: "
252 "The computed graph is null. Please call initialize() before calling "
253 "this method.");
254 return Graph_->getL_Graph()->getRangeMap();
255}
256
257template <class MatrixType>
259 using Teuchos::null;
260 using Teuchos::rcp;
261
262 if (!isAllocated_) {
263 if (!isKokkosKernelsStream_) {
264 // Deallocate any existing storage. This avoids storing 2x
265 // memory, since RCP op= does not deallocate until after the
266 // assignment.
267 L_ = null;
268 U_ = null;
269 D_ = null;
270
271 // Allocate Matrix using ILUK graphs
272 L_ = rcp(new crs_matrix_type(Graph_->getL_Graph()));
273 U_ = rcp(new crs_matrix_type(Graph_->getU_Graph()));
274 L_->setAllToScalar(STS::zero()); // Zero out L and U matrices
275 U_->setAllToScalar(STS::zero());
276
277 // FIXME (mfh 24 Jan 2014) This assumes domain == range Map for L and U.
278 L_->fillComplete();
279 U_->fillComplete();
280 D_ = rcp(new vec_type(Graph_->getL_Graph()->getRowMap()));
281 } else {
282 L_v_ = std::vector<Teuchos::RCP<crs_matrix_type> >(num_streams_, null);
283 U_v_ = std::vector<Teuchos::RCP<crs_matrix_type> >(num_streams_, null);
284 for (int i = 0; i < num_streams_; i++) {
285 L_v_[i] = rcp(new crs_matrix_type(Graph_v_[i]->getL_Graph()));
286 U_v_[i] = rcp(new crs_matrix_type(Graph_v_[i]->getU_Graph()));
287 L_v_[i]->setAllToScalar(STS::zero()); // Zero out L and U matrices
288 U_v_[i]->setAllToScalar(STS::zero());
289
290 L_v_[i]->fillComplete();
291 U_v_[i]->fillComplete();
292 }
293 }
294 }
295 isAllocated_ = true;
296}
298template <class MatrixType>
300 setParameters(const Teuchos::ParameterList& params) {
301 using Details::getParamTryingTypes;
302 using Teuchos::Array;
303 using Teuchos::ParameterList;
304 using Teuchos::RCP;
305 const char prefix[] = "Ifpack2::RILUK: ";
307 // Default values of the various parameters.
308 int fillLevel = 0;
309 magnitude_type absThresh = STM::zero();
310 magnitude_type relThresh = STM::one();
311 magnitude_type relaxValue = STM::zero();
312 double overalloc = 2.;
313 int nstreams = 0;
314
315 // "fact: iluk level-of-fill" parsing is more complicated, because
316 // we want to allow as many types as make sense. int is the native
317 // type, but we also want to accept double (for backwards
318 // compatibilty with ILUT). You can't cast arbitrary magnitude_type
319 // (e.g., Sacado::MP::Vector) to int, so we use float instead, to
320 // get coverage of the most common magnitude_type cases. Weirdly,
321 // there's an Ifpack2 test that sets the fill level as a
322 // global_ordinal_type.
323 {
324 const std::string paramName("fact: iluk level-of-fill");
325 getParamTryingTypes<int, int, global_ordinal_type, double, float>(fillLevel, params, paramName, prefix);
326 }
327 // For the other parameters, we prefer magnitude_type, but allow
328 // double for backwards compatibility.
329 {
330 const std::string paramName("fact: absolute threshold");
331 getParamTryingTypes<magnitude_type, magnitude_type, double>(absThresh, params, paramName, prefix);
332 }
333 {
334 const std::string paramName("fact: relative threshold");
335 getParamTryingTypes<magnitude_type, magnitude_type, double>(relThresh, params, paramName, prefix);
336 }
337 {
338 const std::string paramName("fact: relax value");
339 getParamTryingTypes<magnitude_type, magnitude_type, double>(relaxValue, params, paramName, prefix);
340 }
341 {
342 const std::string paramName("fact: iluk overalloc");
343 getParamTryingTypes<double, double>(overalloc, params, paramName, prefix);
344 }
345
346 // Parsing implementation type
347 Details::IlukImplType::Enum ilukimplType = Details::IlukImplType::Serial;
348 do {
349 static const char typeName[] = "fact: type";
350
351 if (!params.isType<std::string>(typeName)) break;
352
353 // Map std::string <-> IlukImplType::Enum.
354 Array<std::string> ilukimplTypeStrs;
355 Array<Details::IlukImplType::Enum> ilukimplTypeEnums;
356 Details::IlukImplType::loadPLTypeOption(ilukimplTypeStrs, ilukimplTypeEnums);
357 Teuchos::StringToIntegralParameterEntryValidator<Details::IlukImplType::Enum>
358 s2i(ilukimplTypeStrs(), ilukimplTypeEnums(), typeName, false);
359
360 ilukimplType = s2i.getIntegralValue(params.get<std::string>(typeName));
361 } while (0);
362
363 if (ilukimplType == Details::IlukImplType::KSPILUK) {
364 this->isKokkosKernelsSpiluk_ = true;
365 } else {
366 this->isKokkosKernelsSpiluk_ = false;
367 }
368
369 {
370 const std::string paramName("fact: kspiluk number-of-streams");
371 getParamTryingTypes<int, int, global_ordinal_type>(nstreams, params, paramName, prefix);
372 }
373
374 // Forward to trisolvers.
375 L_solver_->setParameters(params);
376 U_solver_->setParameters(params);
377
378 // "Commit" the values only after validating all of them. This
379 // ensures that there are no side effects if this routine throws an
380 // exception.
381
382 LevelOfFill_ = fillLevel;
383 Overalloc_ = overalloc;
384#ifdef KOKKOS_ENABLE_OPENMP
385 if constexpr (std::is_same_v<execution_space, Kokkos::OpenMP>) {
386 nstreams = std::min(nstreams, execution_space{}.concurrency());
387 }
388#endif
389 num_streams_ = nstreams;
390
391 if (num_streams_ >= 1) {
392 this->isKokkosKernelsStream_ = true;
393 // Will we do reordering in streams?
394 if (params.isParameter("fact: kspiluk reordering in streams"))
395 hasStreamReordered_ = params.get<bool>("fact: kspiluk reordering in streams");
396
397 if (params.isParameter("fact: kspiluk streams use rcb"))
398 hasStreamsWithRCB_ = params.get<bool>("fact: kspiluk streams use rcb");
399 } else {
400 this->isKokkosKernelsStream_ = false;
401 }
402
403 // mfh 28 Nov 2012: The previous code would not assign Athresh_,
404 // Rthresh_, or RelaxValue_, if the read-in value was -1. I don't
405 // know if keeping this behavior is correct, but I'll keep it just
406 // so as not to change previous behavior.
407
408 if (absThresh != -STM::one()) {
409 Athresh_ = absThresh;
411 if (relThresh != -STM::one()) {
412 Rthresh_ = relThresh;
413 }
414 if (relaxValue != -STM::one()) {
415 RelaxValue_ = relaxValue;
416 }
418
419template <class MatrixType>
420Teuchos::RCP<const typename RILUK<MatrixType>::row_matrix_type>
422 return Teuchos::rcp_implicit_cast<const row_matrix_type>(A_);
423}
424
425template <class MatrixType>
426Teuchos::RCP<const typename RILUK<MatrixType>::crs_matrix_type>
428 return Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_, true);
430
431template <class MatrixType>
432Teuchos::RCP<const typename RILUK<MatrixType>::coord_type>
434 return A_coordinates_;
435}
436
437template <class MatrixType>
438Teuchos::RCP<const typename RILUK<MatrixType>::row_matrix_type>
439RILUK<MatrixType>::makeLocalFilter(const Teuchos::RCP<const row_matrix_type>& A) {
440 using Teuchos::RCP;
441 using Teuchos::rcp;
442 using Teuchos::rcp_dynamic_cast;
443 using Teuchos::rcp_implicit_cast;
444
445 // If A_'s communicator only has one process, or if its column and
446 // row Maps are the same, then it is already local, so use it
447 // directly.
448 if (A->getRowMap()->getComm()->getSize() == 1 ||
449 A->getRowMap()->isSameAs(*(A->getColMap()))) {
450 return A;
451 }
452
453 // If A_ is already a LocalFilter, then use it directly. This
454 // should be the case if RILUK is being used through
455 // AdditiveSchwarz, for example.
456 RCP<const LocalFilter<row_matrix_type> > A_lf_r =
457 rcp_dynamic_cast<const LocalFilter<row_matrix_type> >(A);
458 if (!A_lf_r.is_null()) {
459 return rcp_implicit_cast<const row_matrix_type>(A_lf_r);
460 } else {
461 // A_'s communicator has more than one process, its row Map and
462 // its column Map differ, and A_ is not a LocalFilter. Thus, we
463 // have to wrap it in a LocalFilter.
464 return rcp(new LocalFilter<row_matrix_type>(A));
465 }
466}
467
468template <class MatrixType>
470 using Teuchos::Array;
471 using Teuchos::ArrayView;
472 using Teuchos::RCP;
473 using Teuchos::rcp;
474 using Teuchos::rcp_const_cast;
475 using Teuchos::rcp_dynamic_cast;
476 using Teuchos::rcp_implicit_cast;
477 typedef Tpetra::CrsGraph<local_ordinal_type,
479 node_type>
480 crs_graph_type;
481 typedef Tpetra::Map<local_ordinal_type,
483 node_type>
484 crs_map_type;
485 const char prefix[] = "Ifpack2::RILUK::initialize: ";
486
487 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix << "The matrix is null. Please "
488 "call setMatrix() with a nonnull input before calling this method.");
489 TEUCHOS_TEST_FOR_EXCEPTION(!A_->isFillComplete(), std::runtime_error, prefix << "The matrix is not "
490 "fill complete. You may not invoke initialize() or compute() with this "
491 "matrix until the matrix is fill complete. If your matrix is a "
492 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
493 "range Maps, if appropriate) before calling this method.");
494
495 Teuchos::Time timer("RILUK::initialize");
496 double startTime = timer.wallTime();
497 { // Start timing
498 Teuchos::TimeMonitor timeMon(timer);
499
500 // Calling initialize() means that the user asserts that the graph
501 // of the sparse matrix may have changed. We must not just reuse
502 // the previous graph in that case.
503 //
504 // Regarding setting isAllocated_ to false: Eventually, we may want
505 // some kind of clever memory reuse strategy, but it's always
506 // correct just to blow everything away and start over.
507 isInitialized_ = false;
508 isAllocated_ = false;
509 isComputed_ = false;
510 Graph_ = Teuchos::null;
511 Y_tmp_ = nullptr;
512 reordered_x_ = nullptr;
513 reordered_y_ = nullptr;
514
515 if (isKokkosKernelsStream_) {
516 Graph_v_ = std::vector<Teuchos::RCP<iluk_graph_type> >(num_streams_);
517 A_local_diagblks_v_ = std::vector<local_matrix_device_type>(num_streams_);
518 for (int i = 0; i < num_streams_; i++) {
519 Graph_v_[i] = Teuchos::null;
520 }
521 }
522
523 A_local_ = makeLocalFilter(A_);
524
525 TEUCHOS_TEST_FOR_EXCEPTION(
526 A_local_.is_null(), std::logic_error,
527 "Ifpack2::RILUK::initialize: "
528 "makeLocalFilter returned null; it failed to compute A_local. "
529 "Please report this bug to the Ifpack2 developers.");
530
531 // FIXME (mfh 24 Jan 2014, 26 Mar 2014) It would be more efficient
532 // to rewrite RILUK so that it works with any RowMatrix input, not
533 // just CrsMatrix. (That would require rewriting IlukGraph to
534 // handle a Tpetra::RowGraph.) However, to make it work for now,
535 // we just copy the input matrix if it's not a CrsMatrix.
536
537 {
538 A_local_crs_ = Details::getCrsMatrix(A_local_);
539 if (A_local_crs_.is_null()) {
540 local_ordinal_type numRows = A_local_->getLocalNumRows();
541 Array<size_t> entriesPerRow(numRows);
542 for (local_ordinal_type i = 0; i < numRows; i++) {
543 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
545 A_local_crs_nc_ =
546 rcp(new crs_matrix_type(A_local_->getRowMap(),
547 A_local_->getColMap(),
548 entriesPerRow()));
549 // copy entries into A_local_crs
550 nonconst_local_inds_host_view_type indices("indices", A_local_->getLocalMaxNumRowEntries());
551 nonconst_values_host_view_type values("values", A_local_->getLocalMaxNumRowEntries());
552 for (local_ordinal_type i = 0; i < numRows; i++) {
553 size_t numEntries = 0;
554 A_local_->getLocalRowCopy(i, indices, values, numEntries);
555 A_local_crs_nc_->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
556 }
557 A_local_crs_nc_->fillComplete(A_local_->getDomainMap(), A_local_->getRangeMap());
558 A_local_crs_ = rcp_const_cast<const crs_matrix_type>(A_local_crs_nc_);
559 }
560 if (!isKokkosKernelsStream_) {
561 Graph_ = rcp(new Ifpack2::IlukGraph<crs_graph_type, kk_handle_type>(A_local_crs_->getCrsGraph(),
562 LevelOfFill_, 0, Overalloc_));
563 } else {
564 std::vector<int> weights(num_streams_);
565 std::fill(weights.begin(), weights.end(), 1);
566 exec_space_instances_ = Kokkos::Experimental::partition_space(execution_space(), weights);
567
568 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
569 if (!hasStreamReordered_) {
570 if (hasStreamsWithRCB_) {
571 TEUCHOS_TEST_FOR_EXCEPTION(A_coordinates_.is_null(), std::runtime_error, prefix << "The coordinates associated with rows of the input matrix is null while RILUK uses streams with RCB. Please call setCoord() with a nonnull input before calling this method.");
572 auto A_coordinates_lcl = A_coordinates_->getLocalViewDevice(Tpetra::Access::ReadOnly);
573 perm_rcb_ = perm_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_rcb_"), A_coordinates_lcl.extent(0));
574#if KOKKOS_VERSION >= 50100
575 reverse_perm_rcb_ = perm_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "reverse_perm_rcb_"), A_coordinates_lcl.extent(0));
576#endif
577 coors_rcb_ = coors_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "coors_rcb_"), A_coordinates_lcl.extent(0), A_coordinates_lcl.extent(1));
578 Kokkos::deep_copy(coors_rcb_, A_coordinates_lcl);
579#if KOKKOS_VERSION >= 50100
580 local_ordinal_type n_levels = static_cast<local_ordinal_type>(std::log2(static_cast<double>(num_streams_)) + 1);
581 partition_sizes_rcb_ = KokkosGraph::Experimental::recursive_coordinate_bisection(coors_rcb_, perm_rcb_, reverse_perm_rcb_, n_levels);
582 KokkosSparse::Experimental::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, perm_rcb_, reverse_perm_rcb_, partition_sizes_rcb_,
583 A_local_diagblks_v_);
584#else
585 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, coors_rcb_,
586 A_local_diagblks_v_, perm_rcb_);
587#endif
588 } else {
589#if KOKKOS_VERSION >= 50100
590 KokkosSparse::Experimental::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_);
591#else
592 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_);
593#endif
594 }
595 } else {
596#if KOKKOS_VERSION >= 50100
597 perm_v_ = KokkosSparse::Experimental::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_, true);
598#else
599 perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_, true);
600#endif
601 reverse_perm_v_.resize(perm_v_.size());
602 for (size_t istream = 0; istream < perm_v_.size(); ++istream) {
603 using perm_type = typename lno_nonzero_view_t::non_const_type;
604 const auto perm = perm_v_[istream];
605 const auto perm_length = perm.extent(0);
606 perm_type reverse_perm(
607 Kokkos::view_alloc(Kokkos::WithoutInitializing, "reverse_perm"),
608 perm_length);
609 Kokkos::parallel_for(
610 Kokkos::RangePolicy<execution_space>(exec_space_instances_[istream], 0, perm_length),
611 KOKKOS_LAMBDA(const local_ordinal_type ii) {
612 reverse_perm(perm(ii)) = ii;
613 });
614 reverse_perm_v_[istream] = reverse_perm;
615 }
616 }
617
618 A_local_diagblks_rowmap_v_ = std::vector<lno_row_view_t>(num_streams_);
619 A_local_diagblks_entries_v_ = std::vector<lno_nonzero_view_t>(num_streams_);
620 A_local_diagblks_values_v_ = std::vector<scalar_nonzero_view_t>(num_streams_);
621
622 for (int i = 0; i < num_streams_; i++) {
623 A_local_diagblks_rowmap_v_[i] = A_local_diagblks_v_[i].graph.row_map;
624 A_local_diagblks_entries_v_[i] = A_local_diagblks_v_[i].graph.entries;
625 A_local_diagblks_values_v_[i] = A_local_diagblks_v_[i].values;
626
627 Teuchos::RCP<const crs_map_type> A_local_diagblks_RowMap = rcp(new crs_map_type(A_local_diagblks_v_[i].numRows(),
628 A_local_diagblks_v_[i].numRows(),
629 A_local_crs_->getRowMap()->getComm()));
630 Teuchos::RCP<const crs_map_type> A_local_diagblks_ColMap = rcp(new crs_map_type(A_local_diagblks_v_[i].numCols(),
631 A_local_diagblks_v_[i].numCols(),
632 A_local_crs_->getColMap()->getComm()));
633 Teuchos::RCP<crs_matrix_type> A_local_diagblks = rcp(new crs_matrix_type(A_local_diagblks_RowMap,
634 A_local_diagblks_ColMap,
635 A_local_diagblks_v_[i]));
636 Graph_v_[i] = rcp(new Ifpack2::IlukGraph<crs_graph_type, kk_handle_type>(A_local_diagblks->getCrsGraph(),
637 LevelOfFill_, 0, Overalloc_));
638 }
639 }
640 }
641
642 if (this->isKokkosKernelsSpiluk_) {
643 if (!isKokkosKernelsStream_) {
644 this->KernelHandle_ = Teuchos::rcp(new kk_handle_type());
645 KernelHandle_->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
646 A_local_->getLocalNumRows(),
647 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1),
648 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1));
649 Graph_->initialize(KernelHandle_); // this calls spiluk_symbolic
650 } else {
651 KernelHandle_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(num_streams_);
652 for (int i = 0; i < num_streams_; i++) {
653 KernelHandle_v_[i] = Teuchos::rcp(new kk_handle_type());
654 KernelHandle_v_[i]->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
655 A_local_diagblks_v_[i].numRows(),
656 2 * A_local_diagblks_v_[i].nnz() * (LevelOfFill_ + 1),
657 2 * A_local_diagblks_v_[i].nnz() * (LevelOfFill_ + 1));
658 Graph_v_[i]->initialize(KernelHandle_v_[i]); // this calls spiluk_symbolic
659 }
660 }
661 } else {
662 Graph_->initialize();
663 }
664
665 allocate_L_and_U();
666 checkOrderingConsistency(*A_local_);
667 if (!isKokkosKernelsStream_) {
668 L_solver_->setMatrix(L_);
669 } else {
670 L_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
671 L_solver_->setMatrices(L_v_);
672 }
673 L_solver_->initialize();
674
675 if (!isKokkosKernelsStream_) {
676 U_solver_->setMatrix(U_);
677 } else {
678 U_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
679 U_solver_->setMatrices(U_v_);
680 }
681 U_solver_->initialize();
682
683 // Do not call initAllValues. compute() always calls initAllValues to
684 // fill L and U with possibly new numbers. initialize() is concerned
685 // only with the nonzero pattern.
686 } // Stop timing
687
688 isInitialized_ = true;
689 ++numInitialize_;
690 initializeTime_ += (timer.wallTime() - startTime);
691}
692
693template <class MatrixType>
694void RILUK<MatrixType>::
695 checkOrderingConsistency(const row_matrix_type& A) {
696 // First check that the local row map ordering is the same as the local portion of the column map.
697 // The extraction of the strictly lower/upper parts of A, as well as the factorization,
698 // implicitly assume that this is the case.
699 Teuchos::ArrayView<const global_ordinal_type> rowGIDs = A.getRowMap()->getLocalElementList();
700 Teuchos::ArrayView<const global_ordinal_type> colGIDs = A.getColMap()->getLocalElementList();
701 bool gidsAreConsistentlyOrdered = true;
702 global_ordinal_type indexOfInconsistentGID = 0;
703 for (global_ordinal_type i = 0; i < rowGIDs.size(); ++i) {
704 if (rowGIDs[i] != colGIDs[i]) {
705 gidsAreConsistentlyOrdered = false;
706 indexOfInconsistentGID = i;
707 break;
708 }
709 }
710 TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered == false, std::runtime_error,
711 "The ordering of the local GIDs in the row and column maps is not the same"
712 << std::endl
713 << "at index " << indexOfInconsistentGID
714 << ". Consistency is required, as all calculations are done with"
715 << std::endl
716 << "local indexing.");
717}
718
719template <class MatrixType>
720void RILUK<MatrixType>::
721 initAllValues(const row_matrix_type& A) {
722 using Teuchos::ArrayRCP;
723 using Teuchos::Comm;
724 using Teuchos::ptr;
725 using Teuchos::RCP;
726 using Teuchos::REDUCE_SUM;
727 using Teuchos::reduceAll;
728 typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
729
730 size_t NumIn = 0, NumL = 0, NumU = 0;
731 bool DiagFound = false;
732 size_t NumNonzeroDiags = 0;
733 size_t MaxNumEntries = A.getGlobalMaxNumRowEntries();
734
735 // Allocate temporary space for extracting the strictly
736 // lower and upper parts of the matrix A.
737 nonconst_local_inds_host_view_type InI("InI", MaxNumEntries);
738 Teuchos::Array<local_ordinal_type> LI(MaxNumEntries);
739 Teuchos::Array<local_ordinal_type> UI(MaxNumEntries);
740 nonconst_values_host_view_type InV("InV", MaxNumEntries);
741 Teuchos::Array<scalar_type> LV(MaxNumEntries);
742 Teuchos::Array<scalar_type> UV(MaxNumEntries);
743
744 // Check if values should be inserted or replaced
745 const bool ReplaceValues = L_->isStaticGraph() || L_->isLocallyIndexed();
746
747 L_->resumeFill();
748 U_->resumeFill();
749 if (ReplaceValues) {
750 L_->setAllToScalar(STS::zero()); // Zero out L and U matrices
751 U_->setAllToScalar(STS::zero());
752 }
753
754 D_->putScalar(STS::zero()); // Set diagonal values to zero
755 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
756
757 RCP<const map_type> rowMap = L_->getRowMap();
758
759 // First we copy the user's matrix into L and U, regardless of fill level.
760 // It is important to note that L and U are populated using local indices.
761 // This means that if the row map GIDs are not monotonically increasing
762 // (i.e., permuted or gappy), then the strictly lower (upper) part of the
763 // matrix is not the one that you would get if you based L (U) on GIDs.
764 // This is ok, as the *order* of the GIDs in the rowmap is a better
765 // expression of the user's intent than the GIDs themselves.
766
767 Teuchos::ArrayView<const global_ordinal_type> nodeGIDs = rowMap->getLocalElementList();
768 for (size_t myRow = 0; myRow < A.getLocalNumRows(); ++myRow) {
769 local_ordinal_type local_row = myRow;
770
771 // TODO JJH 4April2014 An optimization is to use getLocalRowView. Not all matrices support this,
772 // we'd need to check via the Tpetra::RowMatrix method supportsRowViews().
773 A.getLocalRowCopy(local_row, InI, InV, NumIn); // Get Values and Indices
774
775 // Split into L and U (we don't assume that indices are ordered).
776
777 NumL = 0;
778 NumU = 0;
779 DiagFound = false;
780
781 for (size_t j = 0; j < NumIn; ++j) {
782 const local_ordinal_type k = InI[j];
783
784 if (k == local_row) {
785 DiagFound = true;
786 // Store perturbed diagonal in Tpetra::Vector D_
787 DV(local_row) += Rthresh_ * InV[j] + IFPACK2_SGN(InV[j]) * Athresh_;
788 } else if (k < 0) { // Out of range
789 TEUCHOS_TEST_FOR_EXCEPTION(
790 true, std::runtime_error,
791 "Ifpack2::RILUK::initAllValues: current "
792 "GID k = "
793 << k << " < 0. I'm not sure why this is an error; it is "
794 "probably an artifact of the undocumented assumptions of the "
795 "original implementation (likely copied and pasted from Ifpack). "
796 "Nevertheless, the code I found here insisted on this being an error "
797 "state, so I will throw an exception here.");
798 } else if (k < local_row) {
799 LI[NumL] = k;
800 LV[NumL] = InV[j];
801 NumL++;
802 } else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
803 UI[NumU] = k;
804 UV[NumU] = InV[j];
805 NumU++;
806 }
807 }
808
809 // Check in things for this row of L and U
810
811 if (DiagFound) {
812 ++NumNonzeroDiags;
813 } else {
814 DV(local_row) = Athresh_;
815 }
816
817 if (NumL) {
818 if (ReplaceValues) {
819 L_->replaceLocalValues(local_row, LI(0, NumL), LV(0, NumL));
820 } else {
821 // FIXME JJH 24April2014 Is this correct? I believe this case is when there aren't already values
822 // FIXME in this row in the column locations corresponding to UI.
823 L_->insertLocalValues(local_row, LI(0, NumL), LV(0, NumL));
824 }
825 }
826
827 if (NumU) {
828 if (ReplaceValues) {
829 U_->replaceLocalValues(local_row, UI(0, NumU), UV(0, NumU));
830 } else {
831 // FIXME JJH 24April2014 Is this correct? I believe this case is when there aren't already values
832 // FIXME in this row in the column locations corresponding to UI.
833 U_->insertLocalValues(local_row, UI(0, NumU), UV(0, NumU));
834 }
835 }
836 }
837
838 // At this point L and U have the values of A in the structure of L
839 // and U, and diagonal vector D
840
841 isInitialized_ = true;
842}
843
844template <class MatrixType>
845void RILUK<MatrixType>::compute_serial() {
846 // Fill L and U with numbers. This supports nonzero pattern reuse by calling
847 // initialize() once and then compute() multiple times.
848 initAllValues(*A_local_);
849
850 // MinMachNum should be officially defined, for now pick something a little
851 // bigger than IEEE underflow value
852
853 const scalar_type MinDiagonalValue = STS::rmin();
854 const scalar_type MaxDiagonalValue = STS::one() / MinDiagonalValue;
855
856 size_t NumIn, NumL, NumU;
857
858 // Get Maximum Row length
859 const size_t MaxNumEntries =
860 L_->getLocalMaxNumRowEntries() + U_->getLocalMaxNumRowEntries() + 1;
861
862 Teuchos::Array<local_ordinal_type> InI(MaxNumEntries); // Allocate temp space
863 Teuchos::Array<scalar_type> InV(MaxNumEntries);
864 size_t num_cols = U_->getColMap()->getLocalNumElements();
865 Teuchos::Array<int> colflag(num_cols, -1);
866
867 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
868
869 // Now start the factorization.
870
871 using IST = typename row_matrix_type::impl_scalar_type;
872 for (size_t i = 0; i < L_->getLocalNumRows(); ++i) {
873 local_ordinal_type local_row = i;
874 // Need some integer workspace and pointers
875 size_t NumUU;
876 local_inds_host_view_type UUI;
877 values_host_view_type UUV;
878
879 // Fill InV, InI with current row of L, D and U combined
880
881 NumIn = MaxNumEntries;
882 nonconst_local_inds_host_view_type InI_v(InI.data(), MaxNumEntries);
883 nonconst_values_host_view_type InV_v(reinterpret_cast<IST*>(InV.data()), MaxNumEntries);
884
885 L_->getLocalRowCopy(local_row, InI_v, InV_v, NumL);
886
887 InV[NumL] = DV(i); // Put in diagonal
888 InI[NumL] = local_row;
889
890 nonconst_local_inds_host_view_type InI_sub(InI.data() + NumL + 1, MaxNumEntries - NumL - 1);
891 nonconst_values_host_view_type InV_sub(reinterpret_cast<IST*>(InV.data()) + NumL + 1, MaxNumEntries - NumL - 1);
892
893 U_->getLocalRowCopy(local_row, InI_sub, InV_sub, NumU);
894 NumIn = NumL + NumU + 1;
895
896 // Set column flags
897 for (size_t j = 0; j < NumIn; ++j) {
898 colflag[InI[j]] = j;
899 }
900
901 scalar_type diagmod = STS::zero(); // Off-diagonal accumulator
902
903 for (size_t jj = 0; jj < NumL; ++jj) {
904 local_ordinal_type j = InI[jj];
905 IST multiplier = InV[jj]; // current_mults++;
906
907 InV[jj] *= static_cast<scalar_type>(DV(j));
908
909 U_->getLocalRowView(j, UUI, UUV); // View of row above
910 NumUU = UUI.size();
911
912 if (RelaxValue_ == STM::zero()) {
913 for (size_t k = 0; k < NumUU; ++k) {
914 const int kk = colflag[UUI[k]];
915 // FIXME (mfh 23 Dec 2013) Wait a second, we just set
916 // colflag above using size_t (which is generally unsigned),
917 // but now we're querying it using int (which is signed).
918 if (kk > -1) {
919 InV[kk] -= static_cast<scalar_type>(multiplier * UUV[k]);
920 }
921 }
922
923 } else {
924 for (size_t k = 0; k < NumUU; ++k) {
925 // FIXME (mfh 23 Dec 2013) Wait a second, we just set
926 // colflag above using size_t (which is generally unsigned),
927 // but now we're querying it using int (which is signed).
928 const int kk = colflag[UUI[k]];
929 if (kk > -1) {
930 InV[kk] -= static_cast<scalar_type>(multiplier * UUV[k]);
931 } else {
932 diagmod -= static_cast<scalar_type>(multiplier * UUV[k]);
933 }
934 }
935 }
936 }
937
938 if (NumL) {
939 // Replace current row of L
940 L_->replaceLocalValues(local_row, InI(0, NumL), InV(0, NumL));
941 }
942
943 DV(i) = InV[NumL]; // Extract Diagonal value
944
945 if (RelaxValue_ != STM::zero()) {
946 DV(i) += RelaxValue_ * diagmod; // Add off diagonal modifications
947 }
948
949 if (STS::magnitude(DV(i)) > STS::magnitude(MaxDiagonalValue)) {
950 if (STS::real(DV(i)) < STM::zero()) {
951 DV(i) = -MinDiagonalValue;
952 } else {
953 DV(i) = MinDiagonalValue;
954 }
955 } else {
956 DV(i) = static_cast<impl_scalar_type>(STS::one()) / DV(i); // Invert diagonal value
957 }
958
959 for (size_t j = 0; j < NumU; ++j) {
960 InV[NumL + 1 + j] *= static_cast<scalar_type>(DV(i)); // Scale U by inverse of diagonal
961 }
962
963 if (NumU) {
964 // Replace current row of L and U
965 U_->replaceLocalValues(local_row, InI(NumL + 1, NumU), InV(NumL + 1, NumU));
966 }
967
968 // Reset column flags
969 for (size_t j = 0; j < NumIn; ++j) {
970 colflag[InI[j]] = -1;
971 }
972 }
973
974 // The domain of L and the range of U are exactly their own row maps
975 // (there is no communication). The domain of U and the range of L
976 // must be the same as those of the original matrix, However if the
977 // original matrix is a VbrMatrix, these two latter maps are
978 // translation from a block map to a point map.
979 // FIXME (mfh 23 Dec 2013) Do we know that the column Map of L_ is
980 // always one-to-one?
981 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
982 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
983
984 // If L_solver_ or U_solver store modified factors internally, we need to reset those
985 L_solver_->setMatrix(L_);
986 L_solver_->compute(); // NOTE: Only do compute if the pointer changed. Otherwise, do nothing
987 U_solver_->setMatrix(U_);
988 U_solver_->compute(); // NOTE: Only do compute if the pointer changed. Otherwise, do nothing
989}
990
991template <class MatrixType>
992void RILUK<MatrixType>::compute_kkspiluk() {
993 L_->resumeFill();
994 U_->resumeFill();
995
996 L_->setAllToScalar(STS::zero()); // Zero out L and U matrices
997 U_->setAllToScalar(STS::zero());
998
999 using row_map_type = typename crs_matrix_type::local_matrix_device_type::row_map_type;
1000 auto lclL = L_->getLocalMatrixDevice();
1001 row_map_type L_rowmap = lclL.graph.row_map;
1002 auto L_entries = lclL.graph.entries;
1003 auto L_values = lclL.values;
1004
1005 auto lclU = U_->getLocalMatrixDevice();
1006 row_map_type U_rowmap = lclU.graph.row_map;
1007 auto U_entries = lclU.graph.entries;
1008 auto U_values = lclU.values;
1009
1010 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1011 KokkosSparse::spiluk_numeric(KernelHandle_.getRawPtr(), LevelOfFill_,
1012 lclMtx.graph.row_map, lclMtx.graph.entries, lclMtx.values,
1013 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values);
1014
1015 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
1016 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
1017
1018 L_solver_->compute();
1019 U_solver_->compute();
1020}
1021
1022template <class MatrixType>
1023void RILUK<MatrixType>::compute_kkspiluk_stream() {
1024 std::vector<lno_row_view_t> L_rowmap_v(num_streams_);
1025 std::vector<lno_nonzero_view_t> L_entries_v(num_streams_);
1026 std::vector<scalar_nonzero_view_t> L_values_v(num_streams_);
1027 std::vector<lno_row_view_t> U_rowmap_v(num_streams_);
1028 std::vector<lno_nonzero_view_t> U_entries_v(num_streams_);
1029 std::vector<scalar_nonzero_view_t> U_values_v(num_streams_);
1030 std::vector<kk_handle_type*> KernelHandle_rawptr_v_(num_streams_);
1031 for (int i = 0; i < num_streams_; i++) {
1032 L_v_[i]->resumeFill();
1033 U_v_[i]->resumeFill();
1034
1035 L_v_[i]->setAllToScalar(STS::zero()); // Zero out L and U matrices
1036 U_v_[i]->setAllToScalar(STS::zero());
1037
1038 auto lclL = L_v_[i]->getLocalMatrixDevice();
1039 L_rowmap_v[i] = lclL.graph.row_map;
1040 L_entries_v[i] = lclL.graph.entries;
1041 L_values_v[i] = lclL.values;
1042
1043 auto lclU = U_v_[i]->getLocalMatrixDevice();
1044 U_rowmap_v[i] = lclU.graph.row_map;
1045 U_entries_v[i] = lclU.graph.entries;
1046 U_values_v[i] = lclU.values;
1047 KernelHandle_rawptr_v_[i] = KernelHandle_v_[i].getRawPtr();
1048 }
1049
1050 if (hasStreamReordered_) {
1051 // NOTE: For now, only do the value copying if RCM reordering is enabled in streams.
1052 // Otherwise, value copying was taken care prior to entering this function.
1053 // TODO: revisit this later.
1054 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1055 // A_local_diagblks was already setup during initialize, just copy the corresponding
1056 // values from A_local_crs_ in parallel now.
1057 using TeamPolicy = Kokkos::TeamPolicy<execution_space>;
1058 const auto A_nrows = lclMtx.numRows();
1059 auto rows_per_block = ((A_nrows % num_streams_) == 0)
1060 ? (A_nrows / num_streams_)
1061 : (A_nrows / num_streams_ + 1);
1062 for (int i = 0; i < num_streams_; i++) {
1063 const auto start_row_offset = i * rows_per_block;
1064 auto rowptrs = A_local_diagblks_rowmap_v_[i];
1065 auto colindices = A_local_diagblks_entries_v_[i];
1066 auto values = A_local_diagblks_values_v_[i];
1067 const bool reordered = hasStreamReordered_;
1068 typename lno_nonzero_view_t::non_const_type reverse_perm = hasStreamReordered_ ? reverse_perm_v_[i] : typename lno_nonzero_view_t::non_const_type{};
1069 TeamPolicy pol(exec_space_instances_[i], A_local_diagblks_rowmap_v_[i].extent(0) - 1, Kokkos::AUTO);
1070 Kokkos::parallel_for(
1071 pol, KOKKOS_LAMBDA(const typename TeamPolicy::member_type& team) {
1072 const auto irow = team.league_rank();
1073 const auto irow_A = start_row_offset + (reordered ? reverse_perm(irow) : irow);
1074 const auto A_local_crs_row = lclMtx.rowConst(irow_A);
1075 const auto begin_row = rowptrs(irow);
1076 const auto num_entries = rowptrs(irow + 1) - begin_row;
1077 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, num_entries), [&](const int j) {
1078 const auto colidx = colindices(begin_row + j);
1079 const auto colidx_A = start_row_offset + (reordered ? reverse_perm(colidx) : colidx);
1080 // Find colidx in A_local_crs_row
1081 const int offset = KokkosSparse::findRelOffset(
1082 &A_local_crs_row.colidx(0), A_local_crs_row.length, colidx_A, 0, false);
1083 values(begin_row + j) = A_local_crs_row.value(offset);
1084 });
1085 });
1086 }
1087 }
1088
1089 KokkosSparse::Experimental::spiluk_numeric_streams(exec_space_instances_, KernelHandle_rawptr_v_, LevelOfFill_,
1090 A_local_diagblks_rowmap_v_, A_local_diagblks_entries_v_, A_local_diagblks_values_v_,
1091 L_rowmap_v, L_entries_v, L_values_v,
1092 U_rowmap_v, U_entries_v, U_values_v);
1093
1094 for (int i = 0; i < num_streams_; i++) {
1095 L_v_[i]->fillComplete();
1096 U_v_[i]->fillComplete();
1097 }
1098
1099 L_solver_->compute();
1100 U_solver_->compute();
1101}
1102
1103template <class MatrixType>
1105 using Teuchos::Array;
1106 using Teuchos::ArrayView;
1107 using Teuchos::RCP;
1108 using Teuchos::rcp;
1109 using Teuchos::rcp_const_cast;
1110 using Teuchos::rcp_dynamic_cast;
1111 const char prefix[] = "Ifpack2::RILUK::compute: ";
1112
1113 // initialize() checks this too, but it's easier for users if the
1114 // error shows them the name of the method that they actually
1115 // called, rather than the name of some internally called method.
1116 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix << "The matrix is null. Please "
1117 "call setMatrix() with a nonnull input before calling this method.");
1118 TEUCHOS_TEST_FOR_EXCEPTION(!A_->isFillComplete(), std::runtime_error, prefix << "The matrix is not "
1119 "fill complete. You may not invoke initialize() or compute() with this "
1120 "matrix until the matrix is fill complete. If your matrix is a "
1121 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
1122 "range Maps, if appropriate) before calling this method.");
1123
1124 if (!isInitialized()) {
1125 initialize(); // Don't count this in the compute() time
1126 }
1127
1128 Teuchos::Time timer("RILUK::compute");
1129
1130 // Start timing
1131 Teuchos::TimeMonitor timeMon(timer);
1132 double startTime = timer.wallTime();
1133
1134 isComputed_ = false;
1135
1136 if (!this->isKokkosKernelsSpiluk_) {
1137 compute_serial();
1138 } else {
1139 // Make sure values in A is picked up even in case of pattern reuse
1140 if (!A_local_crs_nc_.is_null()) {
1141 // copy entries into A_local_crs
1142 A_local_crs_nc_->resumeFill();
1143 local_ordinal_type numRows = A_local_->getLocalNumRows();
1144 nonconst_local_inds_host_view_type indices("indices", A_local_->getLocalMaxNumRowEntries());
1145 nonconst_values_host_view_type values("values", A_local_->getLocalMaxNumRowEntries());
1146 for (local_ordinal_type i = 0; i < numRows; i++) {
1147 size_t numEntries = 0;
1148 A_local_->getLocalRowCopy(i, indices, values, numEntries);
1149 A_local_crs_nc_->replaceLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
1150 }
1151 A_local_crs_nc_->fillComplete(A_local_->getDomainMap(), A_local_->getRangeMap());
1152 }
1153
1154 if (!isKokkosKernelsStream_) {
1155 compute_kkspiluk();
1156 } else {
1157 // If streams are on, we potentially have to refresh A_local_diagblks_values_v_
1158 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1159 if (!hasStreamsWithRCB_) {
1160#if KOKKOS_VERSION >= 50100
1161 KokkosSparse::Experimental::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_, hasStreamReordered_);
1162#else
1163 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_, hasStreamReordered_);
1164#endif
1165 } else {
1166#if KOKKOS_VERSION >= 50100
1167 KokkosSparse::Experimental::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, perm_rcb_, reverse_perm_rcb_, partition_sizes_rcb_,
1168 A_local_diagblks_v_);
1169#else
1170 auto A_coordinates_lcl = A_coordinates_->getLocalViewDevice(Tpetra::Access::ReadOnly);
1171 Kokkos::deep_copy(coors_rcb_, A_coordinates_lcl);
1172 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, coors_rcb_,
1173 A_local_diagblks_v_, perm_rcb_);
1174#endif
1175 }
1176 for (int i = 0; i < num_streams_; i++) {
1177 A_local_diagblks_values_v_[i] = A_local_diagblks_v_[i].values;
1178 }
1179
1180 compute_kkspiluk_stream();
1181 }
1182 }
1183
1184 isComputed_ = true;
1185 ++numCompute_;
1186 computeTime_ += (timer.wallTime() - startTime);
1187}
1188
1189namespace Impl {
1190template <typename MV, typename Map>
1191void resetMultiVecIfNeeded(std::unique_ptr<MV>& mv_ptr, const Map& map, const size_t numVectors, bool initialize) {
1192 if (!mv_ptr || mv_ptr->getNumVectors() != numVectors) {
1193 mv_ptr.reset(new MV(map, numVectors, initialize));
1194 }
1195}
1196} // namespace Impl
1197
1198template <class MatrixType>
1200 apply(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1201 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1202 Teuchos::ETransp mode,
1203 scalar_type alpha,
1204 scalar_type beta) const {
1205 using Teuchos::RCP;
1206 using Teuchos::rcpFromRef;
1207
1208 TEUCHOS_TEST_FOR_EXCEPTION(
1209 A_.is_null(), std::runtime_error,
1210 "Ifpack2::RILUK::apply: The matrix is "
1211 "null. Please call setMatrix() with a nonnull input, then initialize() "
1212 "and compute(), before calling this method.");
1213 TEUCHOS_TEST_FOR_EXCEPTION(
1214 !isComputed(), std::runtime_error,
1215 "Ifpack2::RILUK::apply: If you have not yet called compute(), "
1216 "you must call compute() before calling this method.");
1217 TEUCHOS_TEST_FOR_EXCEPTION(
1218 X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
1219 "Ifpack2::RILUK::apply: X and Y do not have the same number of columns. "
1220 "X.getNumVectors() = "
1221 << X.getNumVectors()
1222 << " != Y.getNumVectors() = " << Y.getNumVectors() << ".");
1223 TEUCHOS_TEST_FOR_EXCEPTION(
1224 STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
1225 "Ifpack2::RILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1226 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1227 "fixed. There is a FIXME in this file about this very issue.");
1229 if (!isKokkosKernelsStream_) {
1230 const magnitude_type D_nrm1 = D_->norm1();
1231 TEUCHOS_TEST_FOR_EXCEPTION(STM::isnaninf(D_nrm1), std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf.");
1232 }
1233 Teuchos::Array<magnitude_type> norms(X.getNumVectors());
1234 X.norm1(norms());
1235 bool good = true;
1236 for (size_t j = 0; j < X.getNumVectors(); ++j) {
1237 if (STM::isnaninf(norms[j])) {
1238 good = false;
1239 break;
1240 }
1241 }
1242 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the input X is NaN or Inf.");
1243 }
1244
1245 const scalar_type one = STS::one();
1246 const scalar_type zero = STS::zero();
1247
1248 Teuchos::Time timer("RILUK::apply");
1249 double startTime = timer.wallTime();
1250 { // Start timing
1251 Teuchos::TimeMonitor timeMon(timer);
1252 if (alpha == one && beta == zero) {
1253 if (isKokkosKernelsSpiluk_ && isKokkosKernelsStream_ && (hasStreamReordered_ || hasStreamsWithRCB_)) {
1254 Impl::resetMultiVecIfNeeded(reordered_x_, X.getMap(), X.getNumVectors(), false);
1255 Impl::resetMultiVecIfNeeded(reordered_y_, Y.getMap(), Y.getNumVectors(), false);
1256 Kokkos::fence();
1257
1258 for (size_t j = 0; j < X.getNumVectors(); j++) {
1259 auto X_j = X.getVector(j);
1260 auto ReorderedX_j = reordered_x_->getVectorNonConst(j);
1261 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1262 auto ReorderedX_lcl = ReorderedX_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1263 if (hasStreamReordered_) {
1264 local_ordinal_type stream_begin = 0;
1265 local_ordinal_type stream_end;
1266 for (int i = 0; i < num_streams_; i++) {
1267 auto perm_i = perm_v_[i];
1268 stream_end = stream_begin + perm_i.extent(0);
1269 auto X_lcl_sub = Kokkos::subview(X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1270 auto ReorderedX_lcl_sub = Kokkos::subview(ReorderedX_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1271 Kokkos::parallel_for(
1272 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0, static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(const int& ii) {
1273 ReorderedX_lcl_sub(perm_i(ii)) = X_lcl_sub(ii);
1274 });
1275 stream_begin = stream_end;
1276 }
1277 } else { // hasStreamsWithRCB_
1278 auto perm = perm_rcb_;
1279 Kokkos::parallel_for(
1280 Kokkos::RangePolicy<execution_space>(0, static_cast<int>(X_lcl.extent(0))), KOKKOS_LAMBDA(const int& ii) {
1281 ReorderedX_lcl(perm(ii), 0) = X_lcl(ii, 0);
1282 });
1283 }
1284 }
1285 Kokkos::fence();
1286
1287 if (mode == Teuchos::NO_TRANS) { // Solve L (U Y) = X for Y.
1288 // Solve L Y = X for Y.
1289 L_solver_->apply(*reordered_x_, Y, mode);
1290 // Solve U Y = Y for Y.
1291 U_solver_->apply(Y, *reordered_y_, mode);
1292 } else { // Solve U^P (L^P Y) = X for Y (where P is * or T).
1293 // Solve U^P Y = X for Y.
1294 U_solver_->apply(*reordered_x_, Y, mode);
1295 // Solve L^P Y = Y for Y.
1296 L_solver_->apply(Y, *reordered_y_, mode);
1297 }
1298
1299 for (size_t j = 0; j < Y.getNumVectors(); j++) {
1300 auto Y_j = Y.getVectorNonConst(j);
1301 auto ReorderedY_j = reordered_y_->getVector(j);
1302 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1303 auto ReorderedY_lcl = ReorderedY_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1304 if (hasStreamReordered_) {
1305 local_ordinal_type stream_begin = 0;
1306 local_ordinal_type stream_end;
1307 for (int i = 0; i < num_streams_; i++) {
1308 auto perm_i = perm_v_[i];
1309 stream_end = stream_begin + perm_i.extent(0);
1310 auto Y_lcl_sub = Kokkos::subview(Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1311 auto ReorderedY_lcl_sub = Kokkos::subview(ReorderedY_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1312 Kokkos::parallel_for(
1313 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0, static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(const int& ii) {
1314 Y_lcl_sub(ii) = ReorderedY_lcl_sub(perm_i(ii));
1315 });
1316 stream_begin = stream_end;
1317 }
1318 } else { // hasStreamsWithRCB_
1319 auto perm = perm_rcb_;
1320 Kokkos::parallel_for(
1321 Kokkos::RangePolicy<execution_space>(0, static_cast<int>(Y_lcl.extent(0))), KOKKOS_LAMBDA(const int& ii) {
1322 Y_lcl(ii, 0) = ReorderedY_lcl(perm(ii), 0);
1323 });
1324 }
1325 }
1326 Kokkos::fence();
1327 } else {
1328 if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
1329#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1330 // NOTE (Nov-15-2022):
1331 // This is a workaround for Cuda >= 11.3 (using cusparseSpSV)
1332 // since cusparseSpSV_solve() does not support in-place computation
1333 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(), false);
1334
1335 // Start by solving L Y_tmp = X for Y_tmp.
1336 L_solver_->apply(X, *Y_tmp_, mode);
1337
1338 if (!this->isKokkosKernelsSpiluk_) {
1339 // Solve D Y = Y. The operation lets us do this in place in Y, so we can
1340 // write "solve D Y = Y for Y."
1341 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1342 }
1343
1344 U_solver_->apply(*Y_tmp_, Y, mode); // Solve U Y = Y_tmp.
1345#else
1346 // Start by solving L Y = X for Y.
1347 L_solver_->apply(X, Y, mode);
1348
1349 if (!this->isKokkosKernelsSpiluk_) {
1350 // Solve D Y = Y. The operation lets us do this in place in Y, so we can
1351 // write "solve D Y = Y for Y."
1352 Y.elementWiseMultiply(one, *D_, Y, zero);
1353 }
1354
1355 U_solver_->apply(Y, Y, mode); // Solve U Y = Y.
1356#endif
1357 } else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
1358#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1359 // NOTE (Nov-15-2022):
1360 // This is a workaround for Cuda >= 11.3 (using cusparseSpSV)
1361 // since cusparseSpSV_solve() does not support in-place computation
1362 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(), false);
1363
1364 // Start by solving U^P Y_tmp = X for Y_tmp.
1365 U_solver_->apply(X, *Y_tmp_, mode);
1366
1367 if (!this->isKokkosKernelsSpiluk_) {
1368 // Solve D^P Y = Y.
1369 //
1370 // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we
1371 // need to do an elementwise multiply with the conjugate of
1372 // D_, not just with D_ itself.
1373 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1374 }
1375
1376 L_solver_->apply(*Y_tmp_, Y, mode); // Solve L^P Y = Y_tmp.
1377#else
1378 // Start by solving U^P Y = X for Y.
1379 U_solver_->apply(X, Y, mode);
1380
1381 if (!this->isKokkosKernelsSpiluk_) {
1382 // Solve D^P Y = Y.
1383 //
1384 // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we
1385 // need to do an elementwise multiply with the conjugate of
1386 // D_, not just with D_ itself.
1387 Y.elementWiseMultiply(one, *D_, Y, zero);
1388 }
1389
1390 L_solver_->apply(Y, Y, mode); // Solve L^P Y = Y.
1391#endif
1392 }
1393 }
1394 } else { // alpha != 1 or beta != 0
1395 if (alpha == zero) {
1396 // The special case for beta == 0 ensures that if Y contains Inf
1397 // or NaN values, we replace them with 0 (following BLAS
1398 // convention), rather than multiplying them by 0 to get NaN.
1399 if (beta == zero) {
1400 Y.putScalar(zero);
1401 } else {
1402 Y.scale(beta);
1403 }
1404 } else { // alpha != zero
1405 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(), false);
1406 apply(X, *Y_tmp_, mode);
1407 Y.update(alpha, *Y_tmp_, beta);
1408 }
1409 }
1410 } // end timing
1411
1413 Teuchos::Array<magnitude_type> norms(Y.getNumVectors());
1414 Y.norm1(norms());
1415 bool good = true;
1416 for (size_t j = 0; j < Y.getNumVectors(); ++j) {
1417 if (STM::isnaninf(norms[j])) {
1418 good = false;
1419 break;
1420 }
1421 }
1422 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the output Y is NaN or Inf.");
1423 }
1424
1425 ++numApply_;
1426 applyTime_ += (timer.wallTime() - startTime);
1427}
1428
1429// VINH comment out since multiply() is not needed anywhere
1430// template<class MatrixType>
1431// void RILUK<MatrixType>::
1432// multiply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1433// Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1434// const Teuchos::ETransp mode) const
1435//{
1436// const scalar_type zero = STS::zero ();
1437// const scalar_type one = STS::one ();
1438//
1439// if (mode != Teuchos::NO_TRANS) {
1440// U_->apply (X, Y, mode); //
1441// Y.update (one, X, one); // Y = Y + X (account for implicit unit diagonal)
1442//
1443// // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we need
1444// // to do an elementwise multiply with the conjugate of D_, not
1445// // just with D_ itself.
1446// Y.elementWiseMultiply (one, *D_, Y, zero); // y = D*y (D_ has inverse of diagonal)
1447//
1448// MV Y_tmp (Y, Teuchos::Copy); // Need a temp copy of Y
1449// L_->apply (Y_tmp, Y, mode);
1450// Y.update (one, Y_tmp, one); // (account for implicit unit diagonal)
1451// }
1452// else {
1453// L_->apply (X, Y, mode);
1454// Y.update (one, X, one); // Y = Y + X (account for implicit unit diagonal)
1455// Y.elementWiseMultiply (one, *D_, Y, zero); // y = D*y (D_ has inverse of diagonal)
1456// MV Y_tmp (Y, Teuchos::Copy); // Need a temp copy of Y1
1457// U_->apply (Y_tmp, Y, mode);
1458// Y.update (one, Y_tmp, one); // (account for implicit unit diagonal)
1459// }
1460// }
1461
1462template <class MatrixType>
1464 std::ostringstream os;
1465
1466 // Output is a valid YAML dictionary in flow style. If you don't
1467 // like everything on a single line, you should call describe()
1468 // instead.
1469 os << "\"Ifpack2::RILUK\": {";
1470 os << "Initialized: " << (isInitialized() ? "true" : "false") << ", "
1471 << "Computed: " << (isComputed() ? "true" : "false") << ", ";
1472
1473 os << "Level-of-fill: " << getLevelOfFill() << ", ";
1474
1475 if (isKokkosKernelsSpiluk_) os << "KK-SPILUK, ";
1476 if (isKokkosKernelsStream_) os << "KK-Stream, ";
1477
1478 if (A_.is_null()) {
1479 os << "Matrix: null";
1480 } else {
1481 os << "Global matrix dimensions: ["
1482 << A_->getGlobalNumRows() << ", " << A_->getGlobalNumCols() << "]"
1483 << ", Global nnz: " << A_->getGlobalNumEntries();
1484 }
1485
1486 if (!L_solver_.is_null()) os << ", " << L_solver_->description();
1487 if (!U_solver_.is_null()) os << ", " << U_solver_->description();
1488
1489 os << "}";
1490 return os.str();
1491}
1492
1493} // namespace Ifpack2
1494
1495#define IFPACK2_RILUK_INSTANT(S, LO, GO, N) \
1496 template class Ifpack2::RILUK<Tpetra::RowMatrix<S, LO, GO, N> >;
1497
1498#endif
Declaration of Ifpack2::Details::Behavior, a class that describes Ifpack2's run-time behavior.
Declaration of MDF interface.
static bool debug()
Whether Ifpack2 is in debug mode.
Definition Ifpack2_Details_Behavior.cpp:31
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition Ifpack2_IlukGraph.hpp:67
Access only local rows and columns of a sparse matrix.
Definition Ifpack2_LocalFilter_decl.hpp:128
"Preconditioner" that solves local sparse triangular systems.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:54
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition Ifpack2_RILUK_decl.hpp:218
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition Ifpack2_RILUK_def.hpp:469
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition Ifpack2_RILUK_decl.hpp:255
virtual ~RILUK()
Destructor (declared virtual for memory safety).
Definition Ifpack2_RILUK_def.hpp:100
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition Ifpack2_RILUK_def.hpp:1200
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition Ifpack2_RILUK_decl.hpp:233
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition Ifpack2_RILUK_def.hpp:241
const crs_matrix_type & getU() const
Return the U factor of the ILU factorization.
Definition Ifpack2_RILUK_def.hpp:192
std::string description() const
A one-line description of this object.
Definition Ifpack2_RILUK_def.hpp:1463
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition Ifpack2_RILUK_def.hpp:123
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition Ifpack2_RILUK_decl.hpp:221
Teuchos::RCP< const coord_type > getCoord() const
Get the coordinates associated with the input matrix's rows.
Definition Ifpack2_RILUK_def.hpp:433
const Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > & getD() const
Return the diagonal entries of the ILU factorization.
Definition Ifpack2_RILUK_def.hpp:179
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition Ifpack2_RILUK_decl.hpp:230
void setCoord(const Teuchos::RCP< const coord_type > &A_coordinates)
Set the matrix rows' coordinates.
Definition Ifpack2_RILUK_def.hpp:155
Teuchos::RCP< const row_matrix_type > getMatrix() const
Get the input matrix.
Definition Ifpack2_RILUK_def.hpp:421
Teuchos::RCP< const crs_matrix_type > getCrsMatrix() const
Return the input matrix A as a Tpetra::CrsMatrix, if possible; else throws.
Definition Ifpack2_RILUK_def.hpp:427
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition Ifpack2_RILUK_def.hpp:221
void setParameters(const Teuchos::ParameterList &params)
Definition Ifpack2_RILUK_def.hpp:300
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition Ifpack2_RILUK_decl.hpp:224
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition Ifpack2_RILUK_decl.hpp:227
const crs_matrix_type & getL() const
Return the L factor of the ILU factorization.
Definition Ifpack2_RILUK_def.hpp:163
node_type::execution_space execution_space
The Kokkos execution space of the input MatrixType.
Definition Ifpack2_RILUK_decl.hpp:239
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition Ifpack2_RILUK_def.hpp:204
void compute()
Compute the (numeric) incomplete factorization.
Definition Ifpack2_RILUK_def.hpp:1104
static Teuchos::RCP< const row_matrix_type > makeLocalFilter(const Teuchos::RCP< const row_matrix_type > &A)
Return A, wrapped in a LocalFilter, if necessary.
Definition Ifpack2_RILUK_def.hpp:439
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40