Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_RILUK_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_CRSRILUK_DEF_HPP
11#define IFPACK2_CRSRILUK_DEF_HPP
12
14#include "Ifpack2_LocalFilter.hpp"
15#include "Tpetra_CrsMatrix.hpp"
16#include "Teuchos_StandardParameterEntryValidators.hpp"
17#include "Ifpack2_LocalSparseTriangularSolver.hpp"
18#include "Ifpack2_Details_getParamTryingTypes.hpp"
19#include "Ifpack2_Details_getCrsMatrix.hpp"
20#include "Kokkos_Sort.hpp"
21#include "KokkosSparse_Utils.hpp"
22#include "KokkosKernels_Sorting.hpp"
23#include "KokkosSparse_IOUtils.hpp"
24
25namespace Ifpack2 {
26
27namespace Details {
28struct IlukImplType {
29 enum Enum {
30 Serial,
31 KSPILUK
32 };
33
34 static void loadPLTypeOption(Teuchos::Array<std::string>& type_strs, Teuchos::Array<Enum>& type_enums) {
35 type_strs.resize(2);
36 type_strs[0] = "Serial";
37 type_strs[1] = "KSPILUK";
38 type_enums.resize(2);
39 type_enums[0] = Serial;
40 type_enums[1] = KSPILUK;
41 }
42};
43} // namespace Details
44
45template <class MatrixType>
46RILUK<MatrixType>::RILUK(const Teuchos::RCP<const row_matrix_type>& Matrix_in, const Teuchos::RCP<const coord_type>& Matrix_in_coordinates)
47 : A_(Matrix_in)
48 , A_coordinates_(Matrix_in_coordinates)
49 , LevelOfFill_(0)
50 , Overalloc_(2.)
51 , isAllocated_(false)
52 , isInitialized_(false)
53 , isComputed_(false)
54 , numInitialize_(0)
55 , numCompute_(0)
56 , numApply_(0)
57 , initializeTime_(0.0)
58 , computeTime_(0.0)
59 , applyTime_(0.0)
60 , RelaxValue_(Teuchos::ScalarTraits<magnitude_type>::zero())
61 , Athresh_(Teuchos::ScalarTraits<magnitude_type>::zero())
62 , Rthresh_(Teuchos::ScalarTraits<magnitude_type>::one())
63 , isKokkosKernelsSpiluk_(false)
64 , isKokkosKernelsStream_(false)
65 , num_streams_(0)
66 , hasStreamReordered_(false)
67 , hasStreamsWithRCB_(false) {
68 allocateSolvers();
69}
70
71template <class MatrixType>
72RILUK<MatrixType>::RILUK(const Teuchos::RCP<const crs_matrix_type>& Matrix_in, const Teuchos::RCP<const coord_type>& Matrix_in_coordinates)
73 : A_(Matrix_in)
74 , A_coordinates_(Matrix_in_coordinates)
75 , LevelOfFill_(0)
76 , Overalloc_(2.)
77 , isAllocated_(false)
78 , isInitialized_(false)
79 , isComputed_(false)
80 , numInitialize_(0)
81 , numCompute_(0)
82 , numApply_(0)
83 , initializeTime_(0.0)
84 , computeTime_(0.0)
85 , applyTime_(0.0)
86 , RelaxValue_(Teuchos::ScalarTraits<magnitude_type>::zero())
87 , Athresh_(Teuchos::ScalarTraits<magnitude_type>::zero())
88 , Rthresh_(Teuchos::ScalarTraits<magnitude_type>::one())
89 , isKokkosKernelsSpiluk_(false)
90 , isKokkosKernelsStream_(false)
91 , num_streams_(0)
92 , hasStreamReordered_(false)
93 , hasStreamsWithRCB_(false) {
94 allocateSolvers();
95}
96
97template <class MatrixType>
99 if (!isKokkosKernelsStream_) {
100 if (Teuchos::nonnull(KernelHandle_)) {
101 KernelHandle_->destroy_spiluk_handle();
102 }
103 } else {
104 for (size_t i = 0; i < KernelHandle_v_.size(); i++) {
105 if (Teuchos::nonnull(KernelHandle_v_[i])) {
106 KernelHandle_v_[i]->destroy_spiluk_handle();
107 }
108 }
109 }
110}
111
112template <class MatrixType>
114 L_solver_ = Teuchos::rcp(new LocalSparseTriangularSolver<row_matrix_type>());
115 L_solver_->setObjectLabel("lower");
116 U_solver_ = Teuchos::rcp(new LocalSparseTriangularSolver<row_matrix_type>());
117 U_solver_->setObjectLabel("upper");
118}
119
120template <class MatrixType>
121void RILUK<MatrixType>::setMatrix(const Teuchos::RCP<const row_matrix_type>& A) {
122 // It's legal for A to be null; in that case, you may not call
123 // initialize() until calling setMatrix() with a nonnull input.
124 // Regardless, setting the matrix invalidates any previous
125 // factorization.
126 if (A.getRawPtr() != A_.getRawPtr()) {
127 isAllocated_ = false;
128 isInitialized_ = false;
129 isComputed_ = false;
130 A_local_ = Teuchos::null;
131 Graph_ = Teuchos::null;
132
133 // The sparse triangular solvers get a triangular factor as their
134 // input matrix. The triangular factors L_ and U_ are getting
135 // reset, so we reset the solvers' matrices to null. Do that
136 // before setting L_ and U_ to null, so that latter step actually
137 // frees the factors.
138 if (!L_solver_.is_null()) {
139 L_solver_->setMatrix(Teuchos::null);
140 }
141 if (!U_solver_.is_null()) {
142 U_solver_->setMatrix(Teuchos::null);
143 }
144
145 L_ = Teuchos::null;
146 U_ = Teuchos::null;
147 D_ = Teuchos::null;
148 A_ = A;
149 }
150}
151
152template <class MatrixType>
153void RILUK<MatrixType>::setCoord(const Teuchos::RCP<const coord_type>& A_coordinates) {
154 if (A_coordinates.getRawPtr() != A_coordinates_.getRawPtr()) {
155 A_coordinates_ = A_coordinates;
156 }
157}
158
159template <class MatrixType>
162 TEUCHOS_TEST_FOR_EXCEPTION(
163 L_.is_null(), std::runtime_error,
164 "Ifpack2::RILUK::getL: The L factor "
165 "is null. Please call initialize() (and preferably also compute()) "
166 "before calling this method. If the input matrix has not yet been set, "
167 "you must first call setMatrix() with a nonnull input matrix before you "
168 "may call initialize() or compute().");
169 return *L_;
170}
171
172template <class MatrixType>
173const Tpetra::Vector<typename RILUK<MatrixType>::scalar_type,
178 TEUCHOS_TEST_FOR_EXCEPTION(
179 D_.is_null(), std::runtime_error,
180 "Ifpack2::RILUK::getD: The D factor "
181 "(of diagonal entries) is null. Please call initialize() (and "
182 "preferably also compute()) before calling this method. If the input "
183 "matrix has not yet been set, you must first call setMatrix() with a "
184 "nonnull input matrix before you may call initialize() or compute().");
185 return *D_;
186}
187
188template <class MatrixType>
191 TEUCHOS_TEST_FOR_EXCEPTION(
192 U_.is_null(), std::runtime_error,
193 "Ifpack2::RILUK::getU: The U factor "
194 "is null. Please call initialize() (and preferably also compute()) "
195 "before calling this method. If the input matrix has not yet been set, "
196 "you must first call setMatrix() with a nonnull input matrix before you "
197 "may call initialize() or compute().");
198 return *U_;
199}
200
201template <class MatrixType>
203 TEUCHOS_TEST_FOR_EXCEPTION(
204 A_.is_null(), std::runtime_error,
205 "Ifpack2::RILUK::getNodeSmootherComplexity: "
206 "The input matrix A is null. Please call setMatrix() with a nonnull "
207 "input matrix, then call compute(), before calling this method.");
208 // RILUK methods cost roughly one apply + the nnz in the upper+lower triangles
209 if (!L_.is_null() && !U_.is_null())
210 return A_->getLocalNumEntries() + L_->getLocalNumEntries() + U_->getLocalNumEntries();
211 else
212 return 0;
213}
214
215template <class MatrixType>
216Teuchos::RCP<const Tpetra::Map<typename RILUK<MatrixType>::local_ordinal_type,
220 TEUCHOS_TEST_FOR_EXCEPTION(
221 A_.is_null(), std::runtime_error,
222 "Ifpack2::RILUK::getDomainMap: "
223 "The matrix is null. Please call setMatrix() with a nonnull input "
224 "before calling this method.");
225
226 // FIXME (mfh 25 Jan 2014) Shouldn't this just come from A_?
227 TEUCHOS_TEST_FOR_EXCEPTION(
228 Graph_.is_null(), std::runtime_error,
229 "Ifpack2::RILUK::getDomainMap: "
230 "The computed graph is null. Please call initialize() before calling "
231 "this method.");
232 return Graph_->getL_Graph()->getDomainMap();
233}
234
235template <class MatrixType>
236Teuchos::RCP<const Tpetra::Map<typename RILUK<MatrixType>::local_ordinal_type,
240 TEUCHOS_TEST_FOR_EXCEPTION(
241 A_.is_null(), std::runtime_error,
242 "Ifpack2::RILUK::getRangeMap: "
243 "The matrix is null. Please call setMatrix() with a nonnull input "
244 "before calling this method.");
245
246 // FIXME (mfh 25 Jan 2014) Shouldn't this just come from A_?
247 TEUCHOS_TEST_FOR_EXCEPTION(
248 Graph_.is_null(), std::runtime_error,
249 "Ifpack2::RILUK::getRangeMap: "
250 "The computed graph is null. Please call initialize() before calling "
251 "this method.");
252 return Graph_->getL_Graph()->getRangeMap();
253}
254
255template <class MatrixType>
257 using Teuchos::null;
258 using Teuchos::rcp;
259
260 if (!isAllocated_) {
261 if (!isKokkosKernelsStream_) {
262 // Deallocate any existing storage. This avoids storing 2x
263 // memory, since RCP op= does not deallocate until after the
264 // assignment.
265 L_ = null;
266 U_ = null;
267 D_ = null;
268
269 // Allocate Matrix using ILUK graphs
270 L_ = rcp(new crs_matrix_type(Graph_->getL_Graph()));
271 U_ = rcp(new crs_matrix_type(Graph_->getU_Graph()));
272 L_->setAllToScalar(STS::zero()); // Zero out L and U matrices
273 U_->setAllToScalar(STS::zero());
274
275 // FIXME (mfh 24 Jan 2014) This assumes domain == range Map for L and U.
276 L_->fillComplete();
277 U_->fillComplete();
278 D_ = rcp(new vec_type(Graph_->getL_Graph()->getRowMap()));
279 } else {
280 L_v_ = std::vector<Teuchos::RCP<crs_matrix_type> >(num_streams_, null);
281 U_v_ = std::vector<Teuchos::RCP<crs_matrix_type> >(num_streams_, null);
282 for (int i = 0; i < num_streams_; i++) {
283 L_v_[i] = rcp(new crs_matrix_type(Graph_v_[i]->getL_Graph()));
284 U_v_[i] = rcp(new crs_matrix_type(Graph_v_[i]->getU_Graph()));
285 L_v_[i]->setAllToScalar(STS::zero()); // Zero out L and U matrices
286 U_v_[i]->setAllToScalar(STS::zero());
287
288 L_v_[i]->fillComplete();
289 U_v_[i]->fillComplete();
290 }
291 }
292 }
293 isAllocated_ = true;
294}
295
296template <class MatrixType>
298 setParameters(const Teuchos::ParameterList& params) {
299 using Details::getParamTryingTypes;
300 using Teuchos::Array;
301 using Teuchos::ParameterList;
302 using Teuchos::RCP;
303 const char prefix[] = "Ifpack2::RILUK: ";
304
305 // Default values of the various parameters.
306 int fillLevel = 0;
307 magnitude_type absThresh = STM::zero();
308 magnitude_type relThresh = STM::one();
309 magnitude_type relaxValue = STM::zero();
310 double overalloc = 2.;
311 int nstreams = 0;
312
313 // "fact: iluk level-of-fill" parsing is more complicated, because
314 // we want to allow as many types as make sense. int is the native
315 // type, but we also want to accept double (for backwards
316 // compatibilty with ILUT). You can't cast arbitrary magnitude_type
317 // (e.g., Sacado::MP::Vector) to int, so we use float instead, to
318 // get coverage of the most common magnitude_type cases. Weirdly,
319 // there's an Ifpack2 test that sets the fill level as a
320 // global_ordinal_type.
321 {
322 const std::string paramName("fact: iluk level-of-fill");
323 getParamTryingTypes<int, int, global_ordinal_type, double, float>(fillLevel, params, paramName, prefix);
324 }
325 // For the other parameters, we prefer magnitude_type, but allow
326 // double for backwards compatibility.
328 const std::string paramName("fact: absolute threshold");
329 getParamTryingTypes<magnitude_type, magnitude_type, double>(absThresh, params, paramName, prefix);
331 {
332 const std::string paramName("fact: relative threshold");
333 getParamTryingTypes<magnitude_type, magnitude_type, double>(relThresh, params, paramName, prefix);
334 }
335 {
336 const std::string paramName("fact: relax value");
337 getParamTryingTypes<magnitude_type, magnitude_type, double>(relaxValue, params, paramName, prefix);
338 }
339 {
340 const std::string paramName("fact: iluk overalloc");
341 getParamTryingTypes<double, double>(overalloc, params, paramName, prefix);
342 }
343
344 // Parsing implementation type
345 Details::IlukImplType::Enum ilukimplType = Details::IlukImplType::Serial;
346 do {
347 static const char typeName[] = "fact: type";
348
349 if (!params.isType<std::string>(typeName)) break;
350
351 // Map std::string <-> IlukImplType::Enum.
352 Array<std::string> ilukimplTypeStrs;
353 Array<Details::IlukImplType::Enum> ilukimplTypeEnums;
354 Details::IlukImplType::loadPLTypeOption(ilukimplTypeStrs, ilukimplTypeEnums);
355 Teuchos::StringToIntegralParameterEntryValidator<Details::IlukImplType::Enum>
356 s2i(ilukimplTypeStrs(), ilukimplTypeEnums(), typeName, false);
357
358 ilukimplType = s2i.getIntegralValue(params.get<std::string>(typeName));
359 } while (0);
360
361 if (ilukimplType == Details::IlukImplType::KSPILUK) {
362 this->isKokkosKernelsSpiluk_ = true;
363 } else {
364 this->isKokkosKernelsSpiluk_ = false;
365 }
366
367 {
368 const std::string paramName("fact: kspiluk number-of-streams");
369 getParamTryingTypes<int, int, global_ordinal_type>(nstreams, params, paramName, prefix);
370 }
371
372 // Forward to trisolvers.
373 L_solver_->setParameters(params);
374 U_solver_->setParameters(params);
375
376 // "Commit" the values only after validating all of them. This
377 // ensures that there are no side effects if this routine throws an
378 // exception.
379
380 LevelOfFill_ = fillLevel;
381 Overalloc_ = overalloc;
382#ifdef KOKKOS_ENABLE_OPENMP
383 if constexpr (std::is_same_v<execution_space, Kokkos::OpenMP>) {
384 nstreams = std::min(nstreams, execution_space{}.concurrency());
385 }
386#endif
387 num_streams_ = nstreams;
388
389 if (num_streams_ >= 1) {
390 this->isKokkosKernelsStream_ = true;
391 // Will we do reordering in streams?
392 if (params.isParameter("fact: kspiluk reordering in streams"))
393 hasStreamReordered_ = params.get<bool>("fact: kspiluk reordering in streams");
394
395 if (params.isParameter("fact: kspiluk streams use rcb"))
396 hasStreamsWithRCB_ = params.get<bool>("fact: kspiluk streams use rcb");
397 } else {
398 this->isKokkosKernelsStream_ = false;
399 }
400
401 // mfh 28 Nov 2012: The previous code would not assign Athresh_,
402 // Rthresh_, or RelaxValue_, if the read-in value was -1. I don't
403 // know if keeping this behavior is correct, but I'll keep it just
404 // so as not to change previous behavior.
405
406 if (absThresh != -STM::one()) {
407 Athresh_ = absThresh;
408 }
409 if (relThresh != -STM::one()) {
410 Rthresh_ = relThresh;
411 }
412 if (relaxValue != -STM::one()) {
413 RelaxValue_ = relaxValue;
414 }
415}
416
417template <class MatrixType>
418Teuchos::RCP<const typename RILUK<MatrixType>::row_matrix_type>
420 return Teuchos::rcp_implicit_cast<const row_matrix_type>(A_);
421}
422
423template <class MatrixType>
424Teuchos::RCP<const typename RILUK<MatrixType>::crs_matrix_type>
426 return Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_, true);
428
429template <class MatrixType>
430Teuchos::RCP<const typename RILUK<MatrixType>::coord_type>
432 return A_coordinates_;
433}
434
435template <class MatrixType>
436Teuchos::RCP<const typename RILUK<MatrixType>::row_matrix_type>
437RILUK<MatrixType>::makeLocalFilter(const Teuchos::RCP<const row_matrix_type>& A) {
438 using Teuchos::RCP;
439 using Teuchos::rcp;
440 using Teuchos::rcp_dynamic_cast;
441 using Teuchos::rcp_implicit_cast;
442
443 // If A_'s communicator only has one process, or if its column and
444 // row Maps are the same, then it is already local, so use it
445 // directly.
446 if (A->getRowMap()->getComm()->getSize() == 1 ||
447 A->getRowMap()->isSameAs(*(A->getColMap()))) {
448 return A;
449 }
450
451 // If A_ is already a LocalFilter, then use it directly. This
452 // should be the case if RILUK is being used through
453 // AdditiveSchwarz, for example.
454 RCP<const LocalFilter<row_matrix_type> > A_lf_r =
455 rcp_dynamic_cast<const LocalFilter<row_matrix_type> >(A);
456 if (!A_lf_r.is_null()) {
457 return rcp_implicit_cast<const row_matrix_type>(A_lf_r);
458 } else {
459 // A_'s communicator has more than one process, its row Map and
460 // its column Map differ, and A_ is not a LocalFilter. Thus, we
461 // have to wrap it in a LocalFilter.
462 return rcp(new LocalFilter<row_matrix_type>(A));
464}
465
466template <class MatrixType>
468 using Teuchos::Array;
469 using Teuchos::ArrayView;
470 using Teuchos::RCP;
471 using Teuchos::rcp;
472 using Teuchos::rcp_const_cast;
473 using Teuchos::rcp_dynamic_cast;
474 using Teuchos::rcp_implicit_cast;
475 typedef Tpetra::CrsGraph<local_ordinal_type,
477 node_type>
478 crs_graph_type;
479 typedef Tpetra::Map<local_ordinal_type,
481 node_type>
482 crs_map_type;
483 const char prefix[] = "Ifpack2::RILUK::initialize: ";
484
485 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix << "The matrix is null. Please "
486 "call setMatrix() with a nonnull input before calling this method.");
487 TEUCHOS_TEST_FOR_EXCEPTION(!A_->isFillComplete(), std::runtime_error, prefix << "The matrix is not "
488 "fill complete. You may not invoke initialize() or compute() with this "
489 "matrix until the matrix is fill complete. If your matrix is a "
490 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
491 "range Maps, if appropriate) before calling this method.");
492
493 Teuchos::Time timer("RILUK::initialize");
494 double startTime = timer.wallTime();
495 { // Start timing
496 Teuchos::TimeMonitor timeMon(timer);
497
498 // Calling initialize() means that the user asserts that the graph
499 // of the sparse matrix may have changed. We must not just reuse
500 // the previous graph in that case.
501 //
502 // Regarding setting isAllocated_ to false: Eventually, we may want
503 // some kind of clever memory reuse strategy, but it's always
504 // correct just to blow everything away and start over.
505 isInitialized_ = false;
506 isAllocated_ = false;
507 isComputed_ = false;
508 Graph_ = Teuchos::null;
509 Y_tmp_ = nullptr;
510 reordered_x_ = nullptr;
511 reordered_y_ = nullptr;
512
513 if (isKokkosKernelsStream_) {
514 Graph_v_ = std::vector<Teuchos::RCP<iluk_graph_type> >(num_streams_);
515 A_local_diagblks_v_ = std::vector<local_matrix_device_type>(num_streams_);
516 for (int i = 0; i < num_streams_; i++) {
517 Graph_v_[i] = Teuchos::null;
518 }
519 }
520
521 A_local_ = makeLocalFilter(A_);
522
523 TEUCHOS_TEST_FOR_EXCEPTION(
524 A_local_.is_null(), std::logic_error,
525 "Ifpack2::RILUK::initialize: "
526 "makeLocalFilter returned null; it failed to compute A_local. "
527 "Please report this bug to the Ifpack2 developers.");
528
529 // FIXME (mfh 24 Jan 2014, 26 Mar 2014) It would be more efficient
530 // to rewrite RILUK so that it works with any RowMatrix input, not
531 // just CrsMatrix. (That would require rewriting IlukGraph to
532 // handle a Tpetra::RowGraph.) However, to make it work for now,
533 // we just copy the input matrix if it's not a CrsMatrix.
534
535 {
536 A_local_crs_ = Details::getCrsMatrix(A_local_);
537 if (A_local_crs_.is_null()) {
538 local_ordinal_type numRows = A_local_->getLocalNumRows();
539 Array<size_t> entriesPerRow(numRows);
540 for (local_ordinal_type i = 0; i < numRows; i++) {
541 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
543 A_local_crs_nc_ =
544 rcp(new crs_matrix_type(A_local_->getRowMap(),
545 A_local_->getColMap(),
546 entriesPerRow()));
547 // copy entries into A_local_crs
548 nonconst_local_inds_host_view_type indices("indices", A_local_->getLocalMaxNumRowEntries());
549 nonconst_values_host_view_type values("values", A_local_->getLocalMaxNumRowEntries());
550 for (local_ordinal_type i = 0; i < numRows; i++) {
551 size_t numEntries = 0;
552 A_local_->getLocalRowCopy(i, indices, values, numEntries);
553 A_local_crs_nc_->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
554 }
555 A_local_crs_nc_->fillComplete(A_local_->getDomainMap(), A_local_->getRangeMap());
556 A_local_crs_ = rcp_const_cast<const crs_matrix_type>(A_local_crs_nc_);
557 }
558 if (!isKokkosKernelsStream_) {
559 Graph_ = rcp(new Ifpack2::IlukGraph<crs_graph_type, kk_handle_type>(A_local_crs_->getCrsGraph(),
560 LevelOfFill_, 0, Overalloc_));
561 } else {
562 std::vector<int> weights(num_streams_);
563 std::fill(weights.begin(), weights.end(), 1);
564 exec_space_instances_ = Kokkos::Experimental::partition_space(execution_space(), weights);
565
566 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
567 if (!hasStreamReordered_) {
568 if (hasStreamsWithRCB_) {
569 TEUCHOS_TEST_FOR_EXCEPTION(A_coordinates_.is_null(), std::runtime_error, prefix << "The coordinates associated with rows of the input matrix is null while RILUK uses streams with RCB. Please call setCoord() with a nonnull input before calling this method.");
570 auto A_coordinates_lcl = A_coordinates_->getLocalViewDevice(Tpetra::Access::ReadOnly);
571 perm_rcb_ = perm_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_rcb_"), A_coordinates_lcl.extent(0));
572 coors_rcb_ = coors_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "coors_rcb_"), A_coordinates_lcl.extent(0), A_coordinates_lcl.extent(1));
573 Kokkos::deep_copy(coors_rcb_, A_coordinates_lcl);
574 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, coors_rcb_,
575 A_local_diagblks_v_, perm_rcb_);
576 } else {
577 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_);
578 }
579 } else {
580 perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_, true);
581 reverse_perm_v_.resize(perm_v_.size());
582 for (size_t istream = 0; istream < perm_v_.size(); ++istream) {
583 using perm_type = typename lno_nonzero_view_t::non_const_type;
584 const auto perm = perm_v_[istream];
585 const auto perm_length = perm.extent(0);
586 perm_type reverse_perm(
587 Kokkos::view_alloc(Kokkos::WithoutInitializing, "reverse_perm"),
588 perm_length);
589 Kokkos::parallel_for(
590 Kokkos::RangePolicy<execution_space>(exec_space_instances_[istream], 0, perm_length),
591 KOKKOS_LAMBDA(const local_ordinal_type ii) {
592 reverse_perm(perm(ii)) = ii;
593 });
594 reverse_perm_v_[istream] = reverse_perm;
595 }
596 }
597
598 A_local_diagblks_rowmap_v_ = std::vector<lno_row_view_t>(num_streams_);
599 A_local_diagblks_entries_v_ = std::vector<lno_nonzero_view_t>(num_streams_);
600 A_local_diagblks_values_v_ = std::vector<scalar_nonzero_view_t>(num_streams_);
601
602 for (int i = 0; i < num_streams_; i++) {
603 A_local_diagblks_rowmap_v_[i] = A_local_diagblks_v_[i].graph.row_map;
604 A_local_diagblks_entries_v_[i] = A_local_diagblks_v_[i].graph.entries;
605 A_local_diagblks_values_v_[i] = A_local_diagblks_v_[i].values;
606
607 Teuchos::RCP<const crs_map_type> A_local_diagblks_RowMap = rcp(new crs_map_type(A_local_diagblks_v_[i].numRows(),
608 A_local_diagblks_v_[i].numRows(),
609 A_local_crs_->getRowMap()->getComm()));
610 Teuchos::RCP<const crs_map_type> A_local_diagblks_ColMap = rcp(new crs_map_type(A_local_diagblks_v_[i].numCols(),
611 A_local_diagblks_v_[i].numCols(),
612 A_local_crs_->getColMap()->getComm()));
613 Teuchos::RCP<crs_matrix_type> A_local_diagblks = rcp(new crs_matrix_type(A_local_diagblks_RowMap,
614 A_local_diagblks_ColMap,
615 A_local_diagblks_v_[i]));
616 Graph_v_[i] = rcp(new Ifpack2::IlukGraph<crs_graph_type, kk_handle_type>(A_local_diagblks->getCrsGraph(),
617 LevelOfFill_, 0, Overalloc_));
618 }
619 }
620 }
621
622 if (this->isKokkosKernelsSpiluk_) {
623 if (!isKokkosKernelsStream_) {
624 this->KernelHandle_ = Teuchos::rcp(new kk_handle_type());
625 KernelHandle_->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
626 A_local_->getLocalNumRows(),
627 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1),
628 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1));
629 Graph_->initialize(KernelHandle_); // this calls spiluk_symbolic
630 } else {
631 KernelHandle_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(num_streams_);
632 for (int i = 0; i < num_streams_; i++) {
633 KernelHandle_v_[i] = Teuchos::rcp(new kk_handle_type());
634 KernelHandle_v_[i]->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
635 A_local_diagblks_v_[i].numRows(),
636 2 * A_local_diagblks_v_[i].nnz() * (LevelOfFill_ + 1),
637 2 * A_local_diagblks_v_[i].nnz() * (LevelOfFill_ + 1));
638 Graph_v_[i]->initialize(KernelHandle_v_[i]); // this calls spiluk_symbolic
639 }
640 }
641 } else {
642 Graph_->initialize();
643 }
644
645 allocate_L_and_U();
646 checkOrderingConsistency(*A_local_);
647 if (!isKokkosKernelsStream_) {
648 L_solver_->setMatrix(L_);
649 } else {
650 L_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
651 L_solver_->setMatrices(L_v_);
652 }
653 L_solver_->initialize();
654
655 if (!isKokkosKernelsStream_) {
656 U_solver_->setMatrix(U_);
657 } else {
658 U_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
659 U_solver_->setMatrices(U_v_);
660 }
661 U_solver_->initialize();
662
663 // Do not call initAllValues. compute() always calls initAllValues to
664 // fill L and U with possibly new numbers. initialize() is concerned
665 // only with the nonzero pattern.
666 } // Stop timing
667
668 isInitialized_ = true;
669 ++numInitialize_;
670 initializeTime_ += (timer.wallTime() - startTime);
671}
672
673template <class MatrixType>
674void RILUK<MatrixType>::
675 checkOrderingConsistency(const row_matrix_type& A) {
676 // First check that the local row map ordering is the same as the local portion of the column map.
677 // The extraction of the strictly lower/upper parts of A, as well as the factorization,
678 // implicitly assume that this is the case.
679 Teuchos::ArrayView<const global_ordinal_type> rowGIDs = A.getRowMap()->getLocalElementList();
680 Teuchos::ArrayView<const global_ordinal_type> colGIDs = A.getColMap()->getLocalElementList();
681 bool gidsAreConsistentlyOrdered = true;
682 global_ordinal_type indexOfInconsistentGID = 0;
683 for (global_ordinal_type i = 0; i < rowGIDs.size(); ++i) {
684 if (rowGIDs[i] != colGIDs[i]) {
685 gidsAreConsistentlyOrdered = false;
686 indexOfInconsistentGID = i;
687 break;
688 }
689 }
690 TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered == false, std::runtime_error,
691 "The ordering of the local GIDs in the row and column maps is not the same"
692 << std::endl
693 << "at index " << indexOfInconsistentGID
694 << ". Consistency is required, as all calculations are done with"
695 << std::endl
696 << "local indexing.");
697}
698
699template <class MatrixType>
700void RILUK<MatrixType>::
701 initAllValues(const row_matrix_type& A) {
702 using Teuchos::ArrayRCP;
703 using Teuchos::Comm;
704 using Teuchos::ptr;
705 using Teuchos::RCP;
706 using Teuchos::REDUCE_SUM;
707 using Teuchos::reduceAll;
708 typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
709
710 size_t NumIn = 0, NumL = 0, NumU = 0;
711 bool DiagFound = false;
712 size_t NumNonzeroDiags = 0;
713 size_t MaxNumEntries = A.getGlobalMaxNumRowEntries();
714
715 // Allocate temporary space for extracting the strictly
716 // lower and upper parts of the matrix A.
717 nonconst_local_inds_host_view_type InI("InI", MaxNumEntries);
718 Teuchos::Array<local_ordinal_type> LI(MaxNumEntries);
719 Teuchos::Array<local_ordinal_type> UI(MaxNumEntries);
720 nonconst_values_host_view_type InV("InV", MaxNumEntries);
721 Teuchos::Array<scalar_type> LV(MaxNumEntries);
722 Teuchos::Array<scalar_type> UV(MaxNumEntries);
723
724 // Check if values should be inserted or replaced
725 const bool ReplaceValues = L_->isStaticGraph() || L_->isLocallyIndexed();
726
727 L_->resumeFill();
728 U_->resumeFill();
729 if (ReplaceValues) {
730 L_->setAllToScalar(STS::zero()); // Zero out L and U matrices
731 U_->setAllToScalar(STS::zero());
732 }
733
734 D_->putScalar(STS::zero()); // Set diagonal values to zero
735 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
736
737 RCP<const map_type> rowMap = L_->getRowMap();
738
739 // First we copy the user's matrix into L and U, regardless of fill level.
740 // It is important to note that L and U are populated using local indices.
741 // This means that if the row map GIDs are not monotonically increasing
742 // (i.e., permuted or gappy), then the strictly lower (upper) part of the
743 // matrix is not the one that you would get if you based L (U) on GIDs.
744 // This is ok, as the *order* of the GIDs in the rowmap is a better
745 // expression of the user's intent than the GIDs themselves.
746
747 Teuchos::ArrayView<const global_ordinal_type> nodeGIDs = rowMap->getLocalElementList();
748 for (size_t myRow = 0; myRow < A.getLocalNumRows(); ++myRow) {
749 local_ordinal_type local_row = myRow;
750
751 // TODO JJH 4April2014 An optimization is to use getLocalRowView. Not all matrices support this,
752 // we'd need to check via the Tpetra::RowMatrix method supportsRowViews().
753 A.getLocalRowCopy(local_row, InI, InV, NumIn); // Get Values and Indices
754
755 // Split into L and U (we don't assume that indices are ordered).
756
757 NumL = 0;
758 NumU = 0;
759 DiagFound = false;
760
761 for (size_t j = 0; j < NumIn; ++j) {
762 const local_ordinal_type k = InI[j];
763
764 if (k == local_row) {
765 DiagFound = true;
766 // Store perturbed diagonal in Tpetra::Vector D_
767 DV(local_row) += Rthresh_ * InV[j] + IFPACK2_SGN(InV[j]) * Athresh_;
768 } else if (k < 0) { // Out of range
769 TEUCHOS_TEST_FOR_EXCEPTION(
770 true, std::runtime_error,
771 "Ifpack2::RILUK::initAllValues: current "
772 "GID k = "
773 << k << " < 0. I'm not sure why this is an error; it is "
774 "probably an artifact of the undocumented assumptions of the "
775 "original implementation (likely copied and pasted from Ifpack). "
776 "Nevertheless, the code I found here insisted on this being an error "
777 "state, so I will throw an exception here.");
778 } else if (k < local_row) {
779 LI[NumL] = k;
780 LV[NumL] = InV[j];
781 NumL++;
782 } else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
783 UI[NumU] = k;
784 UV[NumU] = InV[j];
785 NumU++;
786 }
787 }
788
789 // Check in things for this row of L and U
790
791 if (DiagFound) {
792 ++NumNonzeroDiags;
793 } else {
794 DV(local_row) = Athresh_;
795 }
796
797 if (NumL) {
798 if (ReplaceValues) {
799 L_->replaceLocalValues(local_row, LI(0, NumL), LV(0, NumL));
800 } else {
801 // FIXME JJH 24April2014 Is this correct? I believe this case is when there aren't already values
802 // FIXME in this row in the column locations corresponding to UI.
803 L_->insertLocalValues(local_row, LI(0, NumL), LV(0, NumL));
804 }
805 }
806
807 if (NumU) {
808 if (ReplaceValues) {
809 U_->replaceLocalValues(local_row, UI(0, NumU), UV(0, NumU));
810 } else {
811 // FIXME JJH 24April2014 Is this correct? I believe this case is when there aren't already values
812 // FIXME in this row in the column locations corresponding to UI.
813 U_->insertLocalValues(local_row, UI(0, NumU), UV(0, NumU));
814 }
815 }
816 }
817
818 // At this point L and U have the values of A in the structure of L
819 // and U, and diagonal vector D
820
821 isInitialized_ = true;
822}
823
824template <class MatrixType>
825void RILUK<MatrixType>::compute_serial() {
826 // Fill L and U with numbers. This supports nonzero pattern reuse by calling
827 // initialize() once and then compute() multiple times.
828 initAllValues(*A_local_);
829
830 // MinMachNum should be officially defined, for now pick something a little
831 // bigger than IEEE underflow value
832
833 const scalar_type MinDiagonalValue = STS::rmin();
834 const scalar_type MaxDiagonalValue = STS::one() / MinDiagonalValue;
835
836 size_t NumIn, NumL, NumU;
837
838 // Get Maximum Row length
839 const size_t MaxNumEntries =
840 L_->getLocalMaxNumRowEntries() + U_->getLocalMaxNumRowEntries() + 1;
841
842 Teuchos::Array<local_ordinal_type> InI(MaxNumEntries); // Allocate temp space
843 Teuchos::Array<scalar_type> InV(MaxNumEntries);
844 size_t num_cols = U_->getColMap()->getLocalNumElements();
845 Teuchos::Array<int> colflag(num_cols, -1);
846
847 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
848
849 // Now start the factorization.
850
851 using IST = typename row_matrix_type::impl_scalar_type;
852 for (size_t i = 0; i < L_->getLocalNumRows(); ++i) {
853 local_ordinal_type local_row = i;
854 // Need some integer workspace and pointers
855 size_t NumUU;
856 local_inds_host_view_type UUI;
857 values_host_view_type UUV;
858
859 // Fill InV, InI with current row of L, D and U combined
860
861 NumIn = MaxNumEntries;
862 nonconst_local_inds_host_view_type InI_v(InI.data(), MaxNumEntries);
863 nonconst_values_host_view_type InV_v(reinterpret_cast<IST*>(InV.data()), MaxNumEntries);
864
865 L_->getLocalRowCopy(local_row, InI_v, InV_v, NumL);
866
867 InV[NumL] = DV(i); // Put in diagonal
868 InI[NumL] = local_row;
869
870 nonconst_local_inds_host_view_type InI_sub(InI.data() + NumL + 1, MaxNumEntries - NumL - 1);
871 nonconst_values_host_view_type InV_sub(reinterpret_cast<IST*>(InV.data()) + NumL + 1, MaxNumEntries - NumL - 1);
872
873 U_->getLocalRowCopy(local_row, InI_sub, InV_sub, NumU);
874 NumIn = NumL + NumU + 1;
875
876 // Set column flags
877 for (size_t j = 0; j < NumIn; ++j) {
878 colflag[InI[j]] = j;
879 }
880
881 scalar_type diagmod = STS::zero(); // Off-diagonal accumulator
882
883 for (size_t jj = 0; jj < NumL; ++jj) {
884 local_ordinal_type j = InI[jj];
885 IST multiplier = InV[jj]; // current_mults++;
886
887 InV[jj] *= static_cast<scalar_type>(DV(j));
888
889 U_->getLocalRowView(j, UUI, UUV); // View of row above
890 NumUU = UUI.size();
891
892 if (RelaxValue_ == STM::zero()) {
893 for (size_t k = 0; k < NumUU; ++k) {
894 const int kk = colflag[UUI[k]];
895 // FIXME (mfh 23 Dec 2013) Wait a second, we just set
896 // colflag above using size_t (which is generally unsigned),
897 // but now we're querying it using int (which is signed).
898 if (kk > -1) {
899 InV[kk] -= static_cast<scalar_type>(multiplier * UUV[k]);
900 }
901 }
902
903 } else {
904 for (size_t k = 0; k < NumUU; ++k) {
905 // FIXME (mfh 23 Dec 2013) Wait a second, we just set
906 // colflag above using size_t (which is generally unsigned),
907 // but now we're querying it using int (which is signed).
908 const int kk = colflag[UUI[k]];
909 if (kk > -1) {
910 InV[kk] -= static_cast<scalar_type>(multiplier * UUV[k]);
911 } else {
912 diagmod -= static_cast<scalar_type>(multiplier * UUV[k]);
913 }
914 }
915 }
916 }
917
918 if (NumL) {
919 // Replace current row of L
920 L_->replaceLocalValues(local_row, InI(0, NumL), InV(0, NumL));
921 }
922
923 DV(i) = InV[NumL]; // Extract Diagonal value
924
925 if (RelaxValue_ != STM::zero()) {
926 DV(i) += RelaxValue_ * diagmod; // Add off diagonal modifications
927 }
928
929 if (STS::magnitude(DV(i)) > STS::magnitude(MaxDiagonalValue)) {
930 if (STS::real(DV(i)) < STM::zero()) {
931 DV(i) = -MinDiagonalValue;
932 } else {
933 DV(i) = MinDiagonalValue;
934 }
935 } else {
936 DV(i) = static_cast<impl_scalar_type>(STS::one()) / DV(i); // Invert diagonal value
937 }
938
939 for (size_t j = 0; j < NumU; ++j) {
940 InV[NumL + 1 + j] *= static_cast<scalar_type>(DV(i)); // Scale U by inverse of diagonal
941 }
942
943 if (NumU) {
944 // Replace current row of L and U
945 U_->replaceLocalValues(local_row, InI(NumL + 1, NumU), InV(NumL + 1, NumU));
946 }
947
948 // Reset column flags
949 for (size_t j = 0; j < NumIn; ++j) {
950 colflag[InI[j]] = -1;
951 }
952 }
953
954 // The domain of L and the range of U are exactly their own row maps
955 // (there is no communication). The domain of U and the range of L
956 // must be the same as those of the original matrix, However if the
957 // original matrix is a VbrMatrix, these two latter maps are
958 // translation from a block map to a point map.
959 // FIXME (mfh 23 Dec 2013) Do we know that the column Map of L_ is
960 // always one-to-one?
961 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
962 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
963
964 // If L_solver_ or U_solver store modified factors internally, we need to reset those
965 L_solver_->setMatrix(L_);
966 L_solver_->compute(); // NOTE: Only do compute if the pointer changed. Otherwise, do nothing
967 U_solver_->setMatrix(U_);
968 U_solver_->compute(); // NOTE: Only do compute if the pointer changed. Otherwise, do nothing
969}
970
971template <class MatrixType>
972void RILUK<MatrixType>::compute_kkspiluk() {
973 L_->resumeFill();
974 U_->resumeFill();
975
976 L_->setAllToScalar(STS::zero()); // Zero out L and U matrices
977 U_->setAllToScalar(STS::zero());
978
979 using row_map_type = typename crs_matrix_type::local_matrix_device_type::row_map_type;
980 auto lclL = L_->getLocalMatrixDevice();
981 row_map_type L_rowmap = lclL.graph.row_map;
982 auto L_entries = lclL.graph.entries;
983 auto L_values = lclL.values;
984
985 auto lclU = U_->getLocalMatrixDevice();
986 row_map_type U_rowmap = lclU.graph.row_map;
987 auto U_entries = lclU.graph.entries;
988 auto U_values = lclU.values;
989
990 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
991 KokkosSparse::spiluk_numeric(KernelHandle_.getRawPtr(), LevelOfFill_,
992 lclMtx.graph.row_map, lclMtx.graph.entries, lclMtx.values,
993 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values);
994
995 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
996 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
997
998 L_solver_->compute();
999 U_solver_->compute();
1000}
1001
1002template <class MatrixType>
1003void RILUK<MatrixType>::compute_kkspiluk_stream() {
1004 std::vector<lno_row_view_t> L_rowmap_v(num_streams_);
1005 std::vector<lno_nonzero_view_t> L_entries_v(num_streams_);
1006 std::vector<scalar_nonzero_view_t> L_values_v(num_streams_);
1007 std::vector<lno_row_view_t> U_rowmap_v(num_streams_);
1008 std::vector<lno_nonzero_view_t> U_entries_v(num_streams_);
1009 std::vector<scalar_nonzero_view_t> U_values_v(num_streams_);
1010 std::vector<kk_handle_type*> KernelHandle_rawptr_v_(num_streams_);
1011 for (int i = 0; i < num_streams_; i++) {
1012 L_v_[i]->resumeFill();
1013 U_v_[i]->resumeFill();
1014
1015 L_v_[i]->setAllToScalar(STS::zero()); // Zero out L and U matrices
1016 U_v_[i]->setAllToScalar(STS::zero());
1017
1018 auto lclL = L_v_[i]->getLocalMatrixDevice();
1019 L_rowmap_v[i] = lclL.graph.row_map;
1020 L_entries_v[i] = lclL.graph.entries;
1021 L_values_v[i] = lclL.values;
1022
1023 auto lclU = U_v_[i]->getLocalMatrixDevice();
1024 U_rowmap_v[i] = lclU.graph.row_map;
1025 U_entries_v[i] = lclU.graph.entries;
1026 U_values_v[i] = lclU.values;
1027 KernelHandle_rawptr_v_[i] = KernelHandle_v_[i].getRawPtr();
1028 }
1029
1030 if (hasStreamReordered_) {
1031 // NOTE: For now, only do the value copying if RCM reordering is enabled in streams.
1032 // Otherwise, value copying was taken care prior to entering this function.
1033 // TODO: revisit this later.
1034 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1035 // A_local_diagblks was already setup during initialize, just copy the corresponding
1036 // values from A_local_crs_ in parallel now.
1037 using TeamPolicy = Kokkos::TeamPolicy<execution_space>;
1038 const auto A_nrows = lclMtx.numRows();
1039 auto rows_per_block = ((A_nrows % num_streams_) == 0)
1040 ? (A_nrows / num_streams_)
1041 : (A_nrows / num_streams_ + 1);
1042 for (int i = 0; i < num_streams_; i++) {
1043 const auto start_row_offset = i * rows_per_block;
1044 auto rowptrs = A_local_diagblks_rowmap_v_[i];
1045 auto colindices = A_local_diagblks_entries_v_[i];
1046 auto values = A_local_diagblks_values_v_[i];
1047 const bool reordered = hasStreamReordered_;
1048 typename lno_nonzero_view_t::non_const_type reverse_perm = hasStreamReordered_ ? reverse_perm_v_[i] : typename lno_nonzero_view_t::non_const_type{};
1049 TeamPolicy pol(exec_space_instances_[i], A_local_diagblks_rowmap_v_[i].extent(0) - 1, Kokkos::AUTO);
1050 Kokkos::parallel_for(
1051 pol, KOKKOS_LAMBDA(const typename TeamPolicy::member_type& team) {
1052 const auto irow = team.league_rank();
1053 const auto irow_A = start_row_offset + (reordered ? reverse_perm(irow) : irow);
1054 const auto A_local_crs_row = lclMtx.rowConst(irow_A);
1055 const auto begin_row = rowptrs(irow);
1056 const auto num_entries = rowptrs(irow + 1) - begin_row;
1057 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, num_entries), [&](const int j) {
1058 const auto colidx = colindices(begin_row + j);
1059 const auto colidx_A = start_row_offset + (reordered ? reverse_perm(colidx) : colidx);
1060 // Find colidx in A_local_crs_row
1061 const int offset = KokkosSparse::findRelOffset(
1062 &A_local_crs_row.colidx(0), A_local_crs_row.length, colidx_A, 0, false);
1063 values(begin_row + j) = A_local_crs_row.value(offset);
1064 });
1065 });
1066 }
1067 }
1068
1069 KokkosSparse::Experimental::spiluk_numeric_streams(exec_space_instances_, KernelHandle_rawptr_v_, LevelOfFill_,
1070 A_local_diagblks_rowmap_v_, A_local_diagblks_entries_v_, A_local_diagblks_values_v_,
1071 L_rowmap_v, L_entries_v, L_values_v,
1072 U_rowmap_v, U_entries_v, U_values_v);
1073
1074 for (int i = 0; i < num_streams_; i++) {
1075 L_v_[i]->fillComplete();
1076 U_v_[i]->fillComplete();
1077 }
1078
1079 L_solver_->compute();
1080 U_solver_->compute();
1081}
1082
1083template <class MatrixType>
1085 using Teuchos::Array;
1086 using Teuchos::ArrayView;
1087 using Teuchos::RCP;
1088 using Teuchos::rcp;
1089 using Teuchos::rcp_const_cast;
1090 using Teuchos::rcp_dynamic_cast;
1091 const char prefix[] = "Ifpack2::RILUK::compute: ";
1092
1093 // initialize() checks this too, but it's easier for users if the
1094 // error shows them the name of the method that they actually
1095 // called, rather than the name of some internally called method.
1096 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix << "The matrix is null. Please "
1097 "call setMatrix() with a nonnull input before calling this method.");
1098 TEUCHOS_TEST_FOR_EXCEPTION(!A_->isFillComplete(), std::runtime_error, prefix << "The matrix is not "
1099 "fill complete. You may not invoke initialize() or compute() with this "
1100 "matrix until the matrix is fill complete. If your matrix is a "
1101 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
1102 "range Maps, if appropriate) before calling this method.");
1103
1104 if (!isInitialized()) {
1105 initialize(); // Don't count this in the compute() time
1106 }
1107
1108 Teuchos::Time timer("RILUK::compute");
1109
1110 // Start timing
1111 Teuchos::TimeMonitor timeMon(timer);
1112 double startTime = timer.wallTime();
1113
1114 isComputed_ = false;
1115
1116 if (!this->isKokkosKernelsSpiluk_) {
1117 compute_serial();
1118 } else {
1119 // Make sure values in A is picked up even in case of pattern reuse
1120 if (!A_local_crs_nc_.is_null()) {
1121 // copy entries into A_local_crs
1122 A_local_crs_nc_->resumeFill();
1123 local_ordinal_type numRows = A_local_->getLocalNumRows();
1124 nonconst_local_inds_host_view_type indices("indices", A_local_->getLocalMaxNumRowEntries());
1125 nonconst_values_host_view_type values("values", A_local_->getLocalMaxNumRowEntries());
1126 for (local_ordinal_type i = 0; i < numRows; i++) {
1127 size_t numEntries = 0;
1128 A_local_->getLocalRowCopy(i, indices, values, numEntries);
1129 A_local_crs_nc_->replaceLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
1130 }
1131 A_local_crs_nc_->fillComplete(A_local_->getDomainMap(), A_local_->getRangeMap());
1132 }
1133
1134 if (!isKokkosKernelsStream_) {
1135 compute_kkspiluk();
1136 } else {
1137 // If streams are on, we potentially have to refresh A_local_diagblks_values_v_
1138 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1139 if (!hasStreamsWithRCB_) {
1140 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_, hasStreamReordered_);
1141 } else {
1142 auto A_coordinates_lcl = A_coordinates_->getLocalViewDevice(Tpetra::Access::ReadOnly);
1143 Kokkos::deep_copy(coors_rcb_, A_coordinates_lcl);
1144 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, coors_rcb_,
1145 A_local_diagblks_v_, perm_rcb_);
1146 }
1147 for (int i = 0; i < num_streams_; i++) {
1148 A_local_diagblks_values_v_[i] = A_local_diagblks_v_[i].values;
1149 }
1150
1151 compute_kkspiluk_stream();
1152 }
1153 }
1154
1155 isComputed_ = true;
1156 ++numCompute_;
1157 computeTime_ += (timer.wallTime() - startTime);
1158}
1159
1160namespace Impl {
1161template <typename MV, typename Map>
1162void resetMultiVecIfNeeded(std::unique_ptr<MV>& mv_ptr, const Map& map, const size_t numVectors, bool initialize) {
1163 if (!mv_ptr || mv_ptr->getNumVectors() != numVectors) {
1164 mv_ptr.reset(new MV(map, numVectors, initialize));
1165 }
1166}
1167} // namespace Impl
1168
1169template <class MatrixType>
1171 apply(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1172 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1173 Teuchos::ETransp mode,
1174 scalar_type alpha,
1175 scalar_type beta) const {
1176 using Teuchos::RCP;
1177 using Teuchos::rcpFromRef;
1178
1179 TEUCHOS_TEST_FOR_EXCEPTION(
1180 A_.is_null(), std::runtime_error,
1181 "Ifpack2::RILUK::apply: The matrix is "
1182 "null. Please call setMatrix() with a nonnull input, then initialize() "
1183 "and compute(), before calling this method.");
1184 TEUCHOS_TEST_FOR_EXCEPTION(
1185 !isComputed(), std::runtime_error,
1186 "Ifpack2::RILUK::apply: If you have not yet called compute(), "
1187 "you must call compute() before calling this method.");
1188 TEUCHOS_TEST_FOR_EXCEPTION(
1189 X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
1190 "Ifpack2::RILUK::apply: X and Y do not have the same number of columns. "
1191 "X.getNumVectors() = "
1192 << X.getNumVectors()
1193 << " != Y.getNumVectors() = " << Y.getNumVectors() << ".");
1194 TEUCHOS_TEST_FOR_EXCEPTION(
1195 STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
1196 "Ifpack2::RILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1197 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1198 "fixed. There is a FIXME in this file about this very issue.");
1199#ifdef HAVE_IFPACK2_DEBUG
1200 {
1201 if (!isKokkosKernelsStream_) {
1202 const magnitude_type D_nrm1 = D_->norm1();
1203 TEUCHOS_TEST_FOR_EXCEPTION(STM::isnaninf(D_nrm1), std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf.");
1204 }
1205 Teuchos::Array<magnitude_type> norms(X.getNumVectors());
1206 X.norm1(norms());
1207 bool good = true;
1208 for (size_t j = 0; j < X.getNumVectors(); ++j) {
1209 if (STM::isnaninf(norms[j])) {
1210 good = false;
1211 break;
1212 }
1213 }
1214 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the input X is NaN or Inf.");
1215 }
1216#endif // HAVE_IFPACK2_DEBUG
1217
1218 const scalar_type one = STS::one();
1219 const scalar_type zero = STS::zero();
1220
1221 Teuchos::Time timer("RILUK::apply");
1222 double startTime = timer.wallTime();
1223 { // Start timing
1224 Teuchos::TimeMonitor timeMon(timer);
1225 if (alpha == one && beta == zero) {
1226 if (isKokkosKernelsSpiluk_ && isKokkosKernelsStream_ && (hasStreamReordered_ || hasStreamsWithRCB_)) {
1227 Impl::resetMultiVecIfNeeded(reordered_x_, X.getMap(), X.getNumVectors(), false);
1228 Impl::resetMultiVecIfNeeded(reordered_y_, Y.getMap(), Y.getNumVectors(), false);
1229 Kokkos::fence();
1230
1231 for (size_t j = 0; j < X.getNumVectors(); j++) {
1232 auto X_j = X.getVector(j);
1233 auto ReorderedX_j = reordered_x_->getVectorNonConst(j);
1234 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1235 auto ReorderedX_lcl = ReorderedX_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1236 if (hasStreamReordered_) {
1237 local_ordinal_type stream_begin = 0;
1238 local_ordinal_type stream_end;
1239 for (int i = 0; i < num_streams_; i++) {
1240 auto perm_i = perm_v_[i];
1241 stream_end = stream_begin + perm_i.extent(0);
1242 auto X_lcl_sub = Kokkos::subview(X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1243 auto ReorderedX_lcl_sub = Kokkos::subview(ReorderedX_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1244 Kokkos::parallel_for(
1245 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0, static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(const int& ii) {
1246 ReorderedX_lcl_sub(perm_i(ii)) = X_lcl_sub(ii);
1247 });
1248 stream_begin = stream_end;
1249 }
1250 } else { // hasStreamsWithRCB_
1251 auto perm = perm_rcb_;
1252 Kokkos::parallel_for(
1253 Kokkos::RangePolicy<execution_space>(0, static_cast<int>(X_lcl.extent(0))), KOKKOS_LAMBDA(const int& ii) {
1254 ReorderedX_lcl(perm(ii), 0) = X_lcl(ii, 0);
1255 });
1256 }
1257 }
1258 Kokkos::fence();
1259
1260 if (mode == Teuchos::NO_TRANS) { // Solve L (U Y) = X for Y.
1261 // Solve L Y = X for Y.
1262 L_solver_->apply(*reordered_x_, Y, mode);
1263 // Solve U Y = Y for Y.
1264 U_solver_->apply(Y, *reordered_y_, mode);
1265 } else { // Solve U^P (L^P Y) = X for Y (where P is * or T).
1266 // Solve U^P Y = X for Y.
1267 U_solver_->apply(*reordered_x_, Y, mode);
1268 // Solve L^P Y = Y for Y.
1269 L_solver_->apply(Y, *reordered_y_, mode);
1270 }
1271
1272 for (size_t j = 0; j < Y.getNumVectors(); j++) {
1273 auto Y_j = Y.getVectorNonConst(j);
1274 auto ReorderedY_j = reordered_y_->getVector(j);
1275 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1276 auto ReorderedY_lcl = ReorderedY_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1277 if (hasStreamReordered_) {
1278 local_ordinal_type stream_begin = 0;
1279 local_ordinal_type stream_end;
1280 for (int i = 0; i < num_streams_; i++) {
1281 auto perm_i = perm_v_[i];
1282 stream_end = stream_begin + perm_i.extent(0);
1283 auto Y_lcl_sub = Kokkos::subview(Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1284 auto ReorderedY_lcl_sub = Kokkos::subview(ReorderedY_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1285 Kokkos::parallel_for(
1286 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0, static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(const int& ii) {
1287 Y_lcl_sub(ii) = ReorderedY_lcl_sub(perm_i(ii));
1288 });
1289 stream_begin = stream_end;
1290 }
1291 } else { // hasStreamsWithRCB_
1292 auto perm = perm_rcb_;
1293 Kokkos::parallel_for(
1294 Kokkos::RangePolicy<execution_space>(0, static_cast<int>(Y_lcl.extent(0))), KOKKOS_LAMBDA(const int& ii) {
1295 Y_lcl(ii, 0) = ReorderedY_lcl(perm(ii), 0);
1296 });
1297 }
1298 }
1299 Kokkos::fence();
1300 } else {
1301 if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
1302#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1303 // NOTE (Nov-15-2022):
1304 // This is a workaround for Cuda >= 11.3 (using cusparseSpSV)
1305 // since cusparseSpSV_solve() does not support in-place computation
1306 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(), false);
1307
1308 // Start by solving L Y_tmp = X for Y_tmp.
1309 L_solver_->apply(X, *Y_tmp_, mode);
1310
1311 if (!this->isKokkosKernelsSpiluk_) {
1312 // Solve D Y = Y. The operation lets us do this in place in Y, so we can
1313 // write "solve D Y = Y for Y."
1314 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1315 }
1316
1317 U_solver_->apply(*Y_tmp_, Y, mode); // Solve U Y = Y_tmp.
1318#else
1319 // Start by solving L Y = X for Y.
1320 L_solver_->apply(X, Y, mode);
1321
1322 if (!this->isKokkosKernelsSpiluk_) {
1323 // Solve D Y = Y. The operation lets us do this in place in Y, so we can
1324 // write "solve D Y = Y for Y."
1325 Y.elementWiseMultiply(one, *D_, Y, zero);
1326 }
1327
1328 U_solver_->apply(Y, Y, mode); // Solve U Y = Y.
1329#endif
1330 } else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
1331#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1332 // NOTE (Nov-15-2022):
1333 // This is a workaround for Cuda >= 11.3 (using cusparseSpSV)
1334 // since cusparseSpSV_solve() does not support in-place computation
1335 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(), false);
1336
1337 // Start by solving U^P Y_tmp = X for Y_tmp.
1338 U_solver_->apply(X, *Y_tmp_, mode);
1339
1340 if (!this->isKokkosKernelsSpiluk_) {
1341 // Solve D^P Y = Y.
1342 //
1343 // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we
1344 // need to do an elementwise multiply with the conjugate of
1345 // D_, not just with D_ itself.
1346 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1347 }
1348
1349 L_solver_->apply(*Y_tmp_, Y, mode); // Solve L^P Y = Y_tmp.
1350#else
1351 // Start by solving U^P Y = X for Y.
1352 U_solver_->apply(X, Y, mode);
1353
1354 if (!this->isKokkosKernelsSpiluk_) {
1355 // Solve D^P Y = Y.
1356 //
1357 // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we
1358 // need to do an elementwise multiply with the conjugate of
1359 // D_, not just with D_ itself.
1360 Y.elementWiseMultiply(one, *D_, Y, zero);
1361 }
1362
1363 L_solver_->apply(Y, Y, mode); // Solve L^P Y = Y.
1364#endif
1365 }
1366 }
1367 } else { // alpha != 1 or beta != 0
1368 if (alpha == zero) {
1369 // The special case for beta == 0 ensures that if Y contains Inf
1370 // or NaN values, we replace them with 0 (following BLAS
1371 // convention), rather than multiplying them by 0 to get NaN.
1372 if (beta == zero) {
1373 Y.putScalar(zero);
1374 } else {
1375 Y.scale(beta);
1376 }
1377 } else { // alpha != zero
1378 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(), false);
1379 apply(X, *Y_tmp_, mode);
1380 Y.update(alpha, *Y_tmp_, beta);
1381 }
1382 }
1383 } // end timing
1384
1385#ifdef HAVE_IFPACK2_DEBUG
1386 {
1387 Teuchos::Array<magnitude_type> norms(Y.getNumVectors());
1388 Y.norm1(norms());
1389 bool good = true;
1390 for (size_t j = 0; j < Y.getNumVectors(); ++j) {
1391 if (STM::isnaninf(norms[j])) {
1392 good = false;
1393 break;
1394 }
1395 }
1396 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the output Y is NaN or Inf.");
1397 }
1398#endif // HAVE_IFPACK2_DEBUG
1399
1400 ++numApply_;
1401 applyTime_ += (timer.wallTime() - startTime);
1402}
1403
1404// VINH comment out since multiply() is not needed anywhere
1405// template<class MatrixType>
1406// void RILUK<MatrixType>::
1407// multiply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1408// Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1409// const Teuchos::ETransp mode) const
1410//{
1411// const scalar_type zero = STS::zero ();
1412// const scalar_type one = STS::one ();
1413//
1414// if (mode != Teuchos::NO_TRANS) {
1415// U_->apply (X, Y, mode); //
1416// Y.update (one, X, one); // Y = Y + X (account for implicit unit diagonal)
1417//
1418// // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we need
1419// // to do an elementwise multiply with the conjugate of D_, not
1420// // just with D_ itself.
1421// Y.elementWiseMultiply (one, *D_, Y, zero); // y = D*y (D_ has inverse of diagonal)
1422//
1423// MV Y_tmp (Y, Teuchos::Copy); // Need a temp copy of Y
1424// L_->apply (Y_tmp, Y, mode);
1425// Y.update (one, Y_tmp, one); // (account for implicit unit diagonal)
1426// }
1427// else {
1428// L_->apply (X, Y, mode);
1429// Y.update (one, X, one); // Y = Y + X (account for implicit unit diagonal)
1430// Y.elementWiseMultiply (one, *D_, Y, zero); // y = D*y (D_ has inverse of diagonal)
1431// MV Y_tmp (Y, Teuchos::Copy); // Need a temp copy of Y1
1432// U_->apply (Y_tmp, Y, mode);
1433// Y.update (one, Y_tmp, one); // (account for implicit unit diagonal)
1434// }
1435// }
1436
1437template <class MatrixType>
1439 std::ostringstream os;
1440
1441 // Output is a valid YAML dictionary in flow style. If you don't
1442 // like everything on a single line, you should call describe()
1443 // instead.
1444 os << "\"Ifpack2::RILUK\": {";
1445 os << "Initialized: " << (isInitialized() ? "true" : "false") << ", "
1446 << "Computed: " << (isComputed() ? "true" : "false") << ", ";
1447
1448 os << "Level-of-fill: " << getLevelOfFill() << ", ";
1449
1450 if (isKokkosKernelsSpiluk_) os << "KK-SPILUK, ";
1451 if (isKokkosKernelsStream_) os << "KK-Stream, ";
1452
1453 if (A_.is_null()) {
1454 os << "Matrix: null";
1455 } else {
1456 os << "Global matrix dimensions: ["
1457 << A_->getGlobalNumRows() << ", " << A_->getGlobalNumCols() << "]"
1458 << ", Global nnz: " << A_->getGlobalNumEntries();
1459 }
1460
1461 if (!L_solver_.is_null()) os << ", " << L_solver_->description();
1462 if (!U_solver_.is_null()) os << ", " << U_solver_->description();
1463
1464 os << "}";
1465 return os.str();
1466}
1467
1468} // namespace Ifpack2
1469
1470#define IFPACK2_RILUK_INSTANT(S, LO, GO, N) \
1471 template class Ifpack2::RILUK<Tpetra::RowMatrix<S, LO, GO, N> >;
1472
1473#endif
Declaration of MDF interface.
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition Ifpack2_IlukGraph.hpp:67
Access only local rows and columns of a sparse matrix.
Definition Ifpack2_LocalFilter_decl.hpp:128
"Preconditioner" that solves local sparse triangular systems.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:53
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition Ifpack2_RILUK_decl.hpp:220
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition Ifpack2_RILUK_def.hpp:467
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition Ifpack2_RILUK_decl.hpp:257
virtual ~RILUK()
Destructor (declared virtual for memory safety).
Definition Ifpack2_RILUK_def.hpp:98
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition Ifpack2_RILUK_def.hpp:1171
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition Ifpack2_RILUK_decl.hpp:235
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition Ifpack2_RILUK_def.hpp:239
const crs_matrix_type & getU() const
Return the U factor of the ILU factorization.
Definition Ifpack2_RILUK_def.hpp:190
std::string description() const
A one-line description of this object.
Definition Ifpack2_RILUK_def.hpp:1438
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition Ifpack2_RILUK_def.hpp:121
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition Ifpack2_RILUK_decl.hpp:223
Teuchos::RCP< const coord_type > getCoord() const
Get the coordinates associated with the input matrix's rows.
Definition Ifpack2_RILUK_def.hpp:431
const Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > & getD() const
Return the diagonal entries of the ILU factorization.
Definition Ifpack2_RILUK_def.hpp:177
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition Ifpack2_RILUK_decl.hpp:232
void setCoord(const Teuchos::RCP< const coord_type > &A_coordinates)
Set the matrix rows' coordinates.
Definition Ifpack2_RILUK_def.hpp:153
Teuchos::RCP< const row_matrix_type > getMatrix() const
Get the input matrix.
Definition Ifpack2_RILUK_def.hpp:419
Teuchos::RCP< const crs_matrix_type > getCrsMatrix() const
Return the input matrix A as a Tpetra::CrsMatrix, if possible; else throws.
Definition Ifpack2_RILUK_def.hpp:425
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition Ifpack2_RILUK_def.hpp:219
void setParameters(const Teuchos::ParameterList &params)
Definition Ifpack2_RILUK_def.hpp:298
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition Ifpack2_RILUK_decl.hpp:226
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition Ifpack2_RILUK_decl.hpp:229
const crs_matrix_type & getL() const
Return the L factor of the ILU factorization.
Definition Ifpack2_RILUK_def.hpp:161
node_type::execution_space execution_space
The Kokkos execution space of the input MatrixType.
Definition Ifpack2_RILUK_decl.hpp:241
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition Ifpack2_RILUK_def.hpp:202
void compute()
Compute the (numeric) incomplete factorization.
Definition Ifpack2_RILUK_def.hpp:1084
static Teuchos::RCP< const row_matrix_type > makeLocalFilter(const Teuchos::RCP< const row_matrix_type > &A)
Return A, wrapped in a LocalFilter, if necessary.
Definition Ifpack2_RILUK_def.hpp:437
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40