Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Experimental_RBILUK_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
11#define IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
12
13#include "Tpetra_BlockMultiVector.hpp"
14#include "Tpetra_BlockView.hpp"
15#include "Tpetra_BlockCrsMatrix_Helpers_decl.hpp"
16#include "Ifpack2_OverlappingRowMatrix.hpp"
17#include "Ifpack2_Details_getCrsMatrix.hpp"
18#include "Ifpack2_LocalFilter.hpp"
19#include "Ifpack2_Utilities.hpp"
20#include "Ifpack2_RILUK.hpp"
21#include "KokkosSparse_sptrsv.hpp"
22
23//#define IFPACK2_RBILUK_INITIAL
24//#define IFPACK2_RBILUK_INITIAL_NOKK
25
26#ifndef IFPACK2_RBILUK_INITIAL_NOKK
27#include "KokkosBatched_Gemm_Decl.hpp"
28#include "KokkosBatched_Gemm_Serial_Impl.hpp"
29#include "KokkosBatched_Util.hpp"
30#endif
31
32namespace Ifpack2 {
33
34namespace Experimental {
35
36namespace {
37template <class MatrixType>
38struct LocalRowHandler {
39 using LocalOrdinal = typename MatrixType::local_ordinal_type;
40 using row_matrix_type = Tpetra::RowMatrix<
41 typename MatrixType::scalar_type,
42 LocalOrdinal,
43 typename MatrixType::global_ordinal_type,
44 typename MatrixType::node_type>;
45 using inds_type = typename row_matrix_type::local_inds_host_view_type;
46 using vals_type = typename row_matrix_type::values_host_view_type;
47
48 LocalRowHandler(Teuchos::RCP<const row_matrix_type> A)
49 : A_(std::move(A)) {
50 if (!A_->supportsRowViews()) {
51 const auto maxNumRowEntr = A_->getLocalMaxNumRowEntries();
52 const auto blockSize = A_->getBlockSize();
53 ind_nc_ = inds_type_nc("Ifpack2::RBILUK::LocalRowHandler::indices", maxNumRowEntr);
54 val_nc_ = vals_type_nc("Ifpack2::RBILUK::LocalRowHandler::values", maxNumRowEntr * blockSize * blockSize);
55 }
56 }
57
58 void getLocalRow(LocalOrdinal local_row, inds_type& InI, vals_type& InV, LocalOrdinal& NumIn) {
59 if (A_->supportsRowViews()) {
60 A_->getLocalRowView(local_row, InI, InV);
61 NumIn = (LocalOrdinal)InI.size();
62 } else {
63 size_t cnt;
64 A_->getLocalRowCopy(local_row, ind_nc_, val_nc_, cnt);
65 InI = ind_nc_;
66 InV = val_nc_;
67 NumIn = (LocalOrdinal)cnt;
68 }
69 }
70
71 private:
72 using inds_type_nc = typename row_matrix_type::nonconst_local_inds_host_view_type;
73 using vals_type_nc = typename row_matrix_type::nonconst_values_host_view_type;
74
75 Teuchos::RCP<const row_matrix_type> A_;
76 inds_type_nc ind_nc_;
77 vals_type_nc val_nc_;
78};
79
80template <typename BsrMatrixType, typename CrsMatrixType>
81CrsMatrixType convertBsrToCrs(const BsrMatrixType& bsrMatrix) {
82 using Ordinal = typename BsrMatrixType::ordinal_type;
83 using RowMapType = typename BsrMatrixType::row_map_type::non_const_type;
84 using EntriesType = typename BsrMatrixType::index_type::non_const_type;
85 using ValuesType = typename BsrMatrixType::values_type::non_const_type;
86 using ExeSpace = typename BsrMatrixType::execution_space;
87
88 const Ordinal blockDim = bsrMatrix.blockDim();
89 const Ordinal blockSize = blockDim * blockDim;
90 const Ordinal numBlockRows = bsrMatrix.numRows();
91 const Ordinal numBlockCols = bsrMatrix.numCols();
92
93 const auto blockRowMap = bsrMatrix.graph.row_map;
94 const auto blockEntries = bsrMatrix.graph.entries;
95 const auto blockValues = bsrMatrix.values;
96
97 const Ordinal numRows = numBlockRows * blockDim;
98 const Ordinal numCols = numBlockCols * blockDim;
99
100 // Allocate CrsMatrix row map
101 RowMapType crsRowMap("crsRowMap", numRows + 1);
102 EntriesType crsEntries("crsEntries", blockEntries.extent(0) * blockSize);
103 ValuesType crsValues("crsValues", blockValues.extent(0) * blockSize);
104
105 Kokkos::RangePolicy<ExeSpace> policy(0, numBlockRows);
106
107 // Fill CrsMatrix row map, entries, and values
108 Kokkos::parallel_for(
109 "ConvertBsrToCrs", policy, KOKKOS_LAMBDA(const Ordinal blockRow) {
110 const Ordinal blockRowStart = blockRowMap(blockRow);
111 const Ordinal blockRowEnd = blockRowMap(blockRow + 1);
112 const Ordinal blockRowCount = blockRowEnd - blockRowStart;
113
114 // Iterate over block entries in this row. This could use a teamRange parallel_for
115 for (Ordinal blockNnz = blockRowStart; blockNnz < blockRowEnd; ++blockNnz) {
116 const Ordinal blockCol = blockEntries(blockNnz);
117 const Ordinal blockNum = blockNnz - blockRowStart;
118
119 // Iterate over block dim to get the unblocked rows
120 for (Ordinal blockRowOffset = 0; blockRowOffset < blockDim; ++blockRowOffset) {
121 const Ordinal crsRow = blockRow * blockDim + blockRowOffset;
122 // Each unblocked row has blockRowCount * blockDim items
123 const Ordinal crsRowStart = blockRowStart * blockSize + blockRowCount * blockDim * blockRowOffset;
124 crsRowMap(crsRow) = crsRowStart;
125
126 // Iterate over block dim to get the unblocked cols
127 for (Ordinal blockColOffset = 0; blockColOffset < blockDim; ++blockColOffset) {
128 const Ordinal crsCol = blockCol * blockDim + blockColOffset;
129 const Ordinal crsNnz = crsRowStart + blockNum * blockDim + blockColOffset;
130 crsEntries(crsNnz) = crsCol;
131 crsValues(crsNnz) = blockValues(blockNnz * blockSize + blockRowOffset * blockDim + blockColOffset);
132 }
133 }
134 }
135 });
136
137 // Finalize CrsMatrix row map
138 Kokkos::RangePolicy<ExeSpace> policy2(0, 1);
139 Kokkos::parallel_for(
140 "FinalizeCrs", policy2, KOKKOS_LAMBDA(const Ordinal) {
141 crsRowMap(numRows) = blockRowMap(numBlockRows) * blockSize;
142 });
143
144 // Construct CrsMatrix
145 return CrsMatrixType("crsMatrix", numRows, numCols, crsEntries.extent(0), crsValues, crsRowMap, crsEntries);
146}
147
148} // namespace
149
150template <class MatrixType>
151RBILUK<MatrixType>::RBILUK(const Teuchos::RCP<const row_matrix_type>& Matrix_in)
152 : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in)) {}
153
154template <class MatrixType>
155RBILUK<MatrixType>::RBILUK(const Teuchos::RCP<const block_crs_matrix_type>& Matrix_in)
156 : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in)) {}
157
158template <class MatrixType>
160
161template <class MatrixType>
162void RBILUK<MatrixType>::setMatrix(const Teuchos::RCP<const block_crs_matrix_type>& A) {
163 // FIXME (mfh 04 Nov 2015) What about A_? When does that get (re)set?
164
165 // It's legal for A to be null; in that case, you may not call
166 // initialize() until calling setMatrix() with a nonnull input.
167 // Regardless, setting the matrix invalidates any previous
168 // factorization.
169 if (A.getRawPtr() != this->A_.getRawPtr()) {
170 this->isAllocated_ = false;
171 this->isInitialized_ = false;
172 this->isComputed_ = false;
173 this->Graph_ = Teuchos::null;
174 L_block_ = Teuchos::null;
175 U_block_ = Teuchos::null;
176 D_block_ = Teuchos::null;
177 }
178}
179
180template <class MatrixType>
181const typename RBILUK<MatrixType>::block_crs_matrix_type&
183 TEUCHOS_TEST_FOR_EXCEPTION(
184 L_block_.is_null(), std::runtime_error,
185 "Ifpack2::RILUK::getL: The L factor "
186 "is null. Please call initialize() (and preferably also compute()) "
187 "before calling this method. If the input matrix has not yet been set, "
188 "you must first call setMatrix() with a nonnull input matrix before you "
189 "may call initialize() or compute().");
190 return *L_block_;
191}
192
193template <class MatrixType>
194const typename RBILUK<MatrixType>::block_crs_matrix_type&
196 TEUCHOS_TEST_FOR_EXCEPTION(
197 D_block_.is_null(), std::runtime_error,
198 "Ifpack2::RILUK::getD: The D factor "
199 "(of diagonal entries) is null. Please call initialize() (and "
200 "preferably also compute()) before calling this method. If the input "
201 "matrix has not yet been set, you must first call setMatrix() with a "
202 "nonnull input matrix before you may call initialize() or compute().");
203 return *D_block_;
204}
205
206template <class MatrixType>
207const typename RBILUK<MatrixType>::block_crs_matrix_type&
209 TEUCHOS_TEST_FOR_EXCEPTION(
210 U_block_.is_null(), std::runtime_error,
211 "Ifpack2::RILUK::getU: The U factor "
212 "is null. Please call initialize() (and preferably also compute()) "
213 "before calling this method. If the input matrix has not yet been set, "
214 "you must first call setMatrix() with a nonnull input matrix before you "
215 "may call initialize() or compute().");
216 return *U_block_;
217}
218
219template <class MatrixType>
221 using Teuchos::null;
222 using Teuchos::rcp;
223
224 if (!this->isAllocated_) {
225 if (!this->isKokkosKernelsStream_) {
226 // Deallocate any existing storage. This avoids storing 2x
227 // memory, since RCP op= does not deallocate until after the
228 // assignment.
229 this->L_ = null;
230 this->U_ = null;
231 this->D_ = null;
232 L_block_ = null;
233 U_block_ = null;
234 D_block_ = null;
235
236 // Allocate Matrix using ILUK graphs
237 L_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getL_Graph(), blockSize_));
238 U_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getU_Graph(), blockSize_));
239 D_block_ = rcp(new block_crs_matrix_type(*(Ifpack2::Details::computeDiagonalGraph(*(this->Graph_->getOverlapGraph()))),
240 blockSize_));
241 L_block_->setAllToScalar(STM::zero()); // Zero out L and U matrices
242 U_block_->setAllToScalar(STM::zero());
243 D_block_->setAllToScalar(STM::zero());
244
245 // Allocate temp space for apply
246 if (this->isKokkosKernelsSpiluk_) {
247 const auto numRows = L_block_->getLocalNumRows();
248 tmp_ = view1d("RBILUK::tmp_", numRows * blockSize_);
249 }
250 } else {
251 this->D_ = null;
252 D_block_ = null;
253
254 L_block_v_ = std::vector<Teuchos::RCP<block_crs_matrix_type> >(this->num_streams_);
255 U_block_v_ = std::vector<Teuchos::RCP<block_crs_matrix_type> >(this->num_streams_);
256 for (int i = 0; i < this->num_streams_; i++) {
257 L_block_v_[i] = rcp(new block_crs_matrix_type(*this->Graph_v_[i]->getL_Graph(), blockSize_));
258 U_block_v_[i] = rcp(new block_crs_matrix_type(*this->Graph_v_[i]->getU_Graph(), blockSize_));
259 L_block_v_[i]->setAllToScalar(STS::zero()); // Zero out L and U matrices
260 U_block_v_[i]->setAllToScalar(STS::zero());
261 }
262
263 // Allocate temp space for apply
264 auto lclMtx = A_local_bcrs_->getLocalMatrixDevice();
265 const auto numRows = lclMtx.numRows();
266 tmp_ = view1d("RBILUK::tmp_", numRows * blockSize_);
267 }
268 }
269 this->isAllocated_ = true;
270}
271
272namespace {
273
274template <class MatrixType>
275Teuchos::RCP<const typename RBILUK<MatrixType>::crs_graph_type>
276getBlockCrsGraph(const Teuchos::RCP<const typename RBILUK<MatrixType>::row_matrix_type>& A) {
277 using local_ordinal_type = typename MatrixType::local_ordinal_type;
278 using Teuchos::RCP;
279 using Teuchos::rcp;
280 using Teuchos::rcp_const_cast;
281 using Teuchos::rcp_dynamic_cast;
282 using Teuchos::rcpFromRef;
283 using row_matrix_type = typename RBILUK<MatrixType>::row_matrix_type;
284 using crs_graph_type = typename RBILUK<MatrixType>::crs_graph_type;
285 using block_crs_matrix_type = typename RBILUK<MatrixType>::block_crs_matrix_type;
286
287 auto A_local = RBILUK<MatrixType>::makeLocalFilter(A);
288
289 {
290 RCP<const LocalFilter<row_matrix_type> > filteredA =
291 rcp_dynamic_cast<const LocalFilter<row_matrix_type> >(A_local);
292 RCP<const OverlappingRowMatrix<row_matrix_type> > overlappedA = Teuchos::null;
293 RCP<const block_crs_matrix_type> A_block = Teuchos::null;
294 if (!filteredA.is_null()) {
295 overlappedA = rcp_dynamic_cast<const OverlappingRowMatrix<row_matrix_type> >(filteredA->getUnderlyingMatrix());
296 }
297
298 if (!overlappedA.is_null()) {
299 A_block = rcp_dynamic_cast<const block_crs_matrix_type>(overlappedA->getUnderlyingMatrix());
300 } else if (!filteredA.is_null()) {
301 // If there is no overlap, filteredA could be the block CRS matrix
302 A_block = rcp_dynamic_cast<const block_crs_matrix_type>(filteredA->getUnderlyingMatrix());
303 } else {
304 A_block = rcp_dynamic_cast<const block_crs_matrix_type>(A_local);
305 }
306
307 if (!A_block.is_null()) {
308 return rcpFromRef(A_block->getCrsGraph());
309 }
310 }
311
312 // Could not extract block crs, make graph manually
313 {
314 local_ordinal_type numRows = A_local->getLocalNumRows();
315 Teuchos::Array<size_t> entriesPerRow(numRows);
316 for (local_ordinal_type i = 0; i < numRows; i++) {
317 entriesPerRow[i] = A_local->getNumEntriesInLocalRow(i);
318 }
319 RCP<crs_graph_type> A_local_crs_nc =
320 rcp(new crs_graph_type(A_local->getRowMap(),
321 A_local->getColMap(),
322 entriesPerRow()));
323
324 {
325 using LocalRowHandler_t = LocalRowHandler<MatrixType>;
326 LocalRowHandler_t localRowHandler(A_local);
327 typename LocalRowHandler_t::inds_type indices;
328 typename LocalRowHandler_t::vals_type values;
329 for (local_ordinal_type i = 0; i < numRows; i++) {
330 local_ordinal_type numEntries = 0;
331 localRowHandler.getLocalRow(i, indices, values, numEntries);
332 A_local_crs_nc->insertLocalIndices(i, numEntries, indices.data());
333 }
334 }
335
336 A_local_crs_nc->fillComplete(A_local->getDomainMap(), A_local->getRangeMap());
337 return rcp_const_cast<const crs_graph_type>(A_local_crs_nc);
338 }
339}
340
341} // namespace
342
343template <class MatrixType>
345 using Teuchos::Array;
346 using Teuchos::RCP;
347 using Teuchos::rcp;
348 using Teuchos::rcp_dynamic_cast;
349
350 using crs_map_type = Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>;
351
352 const char prefix[] = "Ifpack2::Experimental::RBILUK::initialize: ";
353
354 TEUCHOS_TEST_FOR_EXCEPTION(this->A_.is_null(), std::runtime_error, prefix << "The matrix (A_, the "
355 "RowMatrix) is null. Please call setMatrix() with a nonnull input "
356 "before calling this method.");
357 TEUCHOS_TEST_FOR_EXCEPTION(!this->A_->isFillComplete(), std::runtime_error, prefix << "The matrix "
358 "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
359 "initialize() or compute() with this matrix until the matrix is fill "
360 "complete. Note: BlockCrsMatrix is fill complete if and only if its "
361 "underlying graph is fill complete.");
362
363 blockSize_ = this->A_->getBlockSize();
364 this->A_local_ = this->makeLocalFilter(this->A_);
365
366 Teuchos::Time timer("RBILUK::initialize");
367 double startTime = timer.wallTime();
368 { // Start timing
369 Teuchos::TimeMonitor timeMon(timer);
370
371 // Calling initialize() means that the user asserts that the graph
372 // of the sparse matrix may have changed. We must not just reuse
373 // the previous graph in that case.
374 //
375 // Regarding setting isAllocated_ to false: Eventually, we may want
376 // some kind of clever memory reuse strategy, but it's always
377 // correct just to blow everything away and start over.
378 this->isInitialized_ = false;
379 this->isAllocated_ = false;
380 this->isComputed_ = false;
381 this->Graph_ = Teuchos::null;
382
383 if (this->isKokkosKernelsSpiluk_) {
384 A_local_bcrs_ = Details::getBcrsMatrix(this->A_local_);
385 RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(this->A_local_);
386 if (A_local_bcrs_.is_null()) {
387 if (A_local_crs.is_null()) {
388 local_ordinal_type numRows = this->A_local_->getLocalNumRows();
389 Array<size_t> entriesPerRow(numRows);
390 for (local_ordinal_type i = 0; i < numRows; i++) {
391 entriesPerRow[i] = this->A_local_->getNumEntriesInLocalRow(i);
392 }
393 A_local_crs_nc_ =
394 rcp(new crs_matrix_type(this->A_local_->getRowMap(),
395 this->A_local_->getColMap(),
396 entriesPerRow()));
397 // copy entries into A_local_crs
398 nonconst_local_inds_host_view_type indices("indices", this->A_local_->getLocalMaxNumRowEntries());
399 nonconst_values_host_view_type values("values", this->A_local_->getLocalMaxNumRowEntries());
400 for (local_ordinal_type i = 0; i < numRows; i++) {
401 size_t numEntries = 0;
402 this->A_local_->getLocalRowCopy(i, indices, values, numEntries);
403 A_local_crs_nc_->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
404 }
405 A_local_crs_nc_->fillComplete(this->A_local_->getDomainMap(), this->A_local_->getRangeMap());
406 A_local_crs = Teuchos::rcp_const_cast<const crs_matrix_type>(A_local_crs_nc_);
407 }
408 // Create bcrs from crs
409 // We can skip fillLogicalBlocks if we know the input is already filled
410 if (blockSize_ > 1) {
411 auto crs_matrix_block_filled = Tpetra::fillLogicalBlocks(*A_local_crs, blockSize_);
412 A_local_bcrs_ = Tpetra::convertToBlockCrsMatrix(*crs_matrix_block_filled, blockSize_);
413 } else {
414 A_local_bcrs_ = Tpetra::convertToBlockCrsMatrix(*A_local_crs, blockSize_);
415 }
416 }
417 }
418
419 if (this->isKokkosKernelsStream_) {
420 std::vector<int> weights(this->num_streams_);
421 std::fill(weights.begin(), weights.end(), 1);
422 this->exec_space_instances_ = Kokkos::Experimental::partition_space(execution_space(), weights);
423
424 auto lclMtx = A_local_bcrs_->getLocalMatrixDevice();
425 this->Graph_v_ = std::vector<Teuchos::RCP<Ifpack2::IlukGraph<crs_graph_type, kk_handle_type> > >(this->num_streams_);
426 A_block_local_diagblks_v_ = std::vector<local_matrix_device_type>(this->num_streams_);
427 std::vector<local_crs_matrix_device_type> A_crs_local_diagblks_v(this->num_streams_);
428
429 if (!this->hasStreamReordered_) {
430 auto lclCrsMtx = convertBsrToCrs<local_matrix_device_type, local_crs_matrix_device_type>(lclMtx);
431#if KOKKOS_VERSION >= 50100
432 KokkosSparse::Experimental::kk_extract_diagonal_blocks_crsmatrix_sequential(lclCrsMtx, A_crs_local_diagblks_v);
433#else
434 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclCrsMtx, A_crs_local_diagblks_v);
435#endif
436 } else {
437 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, prefix << "Stream reordering is not currently supported for RBILUK");
438 }
439
440 for (int i = 0; i < this->num_streams_; i++) {
441 A_block_local_diagblks_v_[i] = local_matrix_device_type(A_crs_local_diagblks_v[i], blockSize_);
442 }
443
444 A_block_local_diagblks_rowmap_v_ = std::vector<lno_row_view_t>(this->num_streams_);
445 A_block_local_diagblks_entries_v_ = std::vector<lno_nonzero_view_t>(this->num_streams_);
446 A_block_local_diagblks_values_v_ = std::vector<scalar_nonzero_view_t>(this->num_streams_);
447
448 for (int i = 0; i < this->num_streams_; i++) {
449 A_block_local_diagblks_rowmap_v_[i] = A_block_local_diagblks_v_[i].graph.row_map;
450 A_block_local_diagblks_entries_v_[i] = A_block_local_diagblks_v_[i].graph.entries;
451 A_block_local_diagblks_values_v_[i] = A_block_local_diagblks_v_[i].values;
452
453 Teuchos::RCP<const crs_map_type> A_local_diagblks_RowMap = rcp(new crs_map_type(A_block_local_diagblks_v_[i].numRows(),
454 A_block_local_diagblks_v_[i].numRows(),
455 A_local_bcrs_->getRowMap()->getComm()));
456 Teuchos::RCP<const crs_map_type> A_local_diagblks_ColMap = rcp(new crs_map_type(A_block_local_diagblks_v_[i].numCols(),
457 A_block_local_diagblks_v_[i].numCols(),
458 A_local_bcrs_->getColMap()->getComm()));
459
460 Teuchos::RCP<const crs_graph_type> graph = rcp(new crs_graph_type(A_local_diagblks_RowMap, A_local_diagblks_ColMap, A_block_local_diagblks_v_[i].graph));
461 this->Graph_v_[i] = rcp(new Ifpack2::IlukGraph<crs_graph_type, kk_handle_type>(graph,
462 this->LevelOfFill_, 0));
463 }
464 } else {
465 RCP<const crs_graph_type> matrixCrsGraph = getBlockCrsGraph<MatrixType>(this->A_);
466 this->Graph_ = rcp(new Ifpack2::IlukGraph<crs_graph_type, kk_handle_type>(matrixCrsGraph,
467 this->LevelOfFill_, 0));
468 }
469
470 if (this->isKokkosKernelsSpiluk_) {
471 if (!this->isKokkosKernelsStream_) {
472 KernelHandle_block_ = Teuchos::rcp(new kk_handle_type());
473 const auto numRows = this->A_local_->getLocalNumRows();
474 KernelHandle_block_->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
475 numRows,
476 2 * this->A_local_->getLocalNumEntries() * (this->LevelOfFill_ + 1),
477 2 * this->A_local_->getLocalNumEntries() * (this->LevelOfFill_ + 1),
478 blockSize_);
479 this->Graph_->initialize(KernelHandle_block_); // this calls spiluk_symbolic
480
481 L_Sptrsv_KernelHandle_ = Teuchos::rcp(new kk_handle_type());
482 U_Sptrsv_KernelHandle_ = Teuchos::rcp(new kk_handle_type());
483
484 KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
485
486 L_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows, true /*lower*/, blockSize_);
487 U_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows, false /*upper*/, blockSize_);
488 } else {
489 KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
490 KernelHandle_block_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(this->num_streams_);
491 L_Sptrsv_KernelHandle_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(this->num_streams_);
492 U_Sptrsv_KernelHandle_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(this->num_streams_);
493 for (int i = 0; i < this->num_streams_; i++) {
494 const auto numRows = A_block_local_diagblks_v_[i].numRows();
495 KernelHandle_block_v_[i] = Teuchos::rcp(new kk_handle_type());
496 KernelHandle_block_v_[i]->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
497 numRows,
498 2 * A_block_local_diagblks_v_[i].nnz() * (this->LevelOfFill_ + 1),
499 2 * A_block_local_diagblks_v_[i].nnz() * (this->LevelOfFill_ + 1),
500 blockSize_);
501 this->Graph_v_[i]->initialize(KernelHandle_block_v_[i]); // this calls spiluk_symbolic
502
503 L_Sptrsv_KernelHandle_v_[i] = Teuchos::rcp(new kk_handle_type());
504 U_Sptrsv_KernelHandle_v_[i] = Teuchos::rcp(new kk_handle_type());
505
506 L_Sptrsv_KernelHandle_v_[i]->create_sptrsv_handle(alg, numRows, true /*lower*/, blockSize_);
507 U_Sptrsv_KernelHandle_v_[i]->create_sptrsv_handle(alg, numRows, false /*upper*/, blockSize_);
508 }
509 }
510 } else {
511 this->Graph_->initialize();
512 }
513
514 allocate_L_and_U_blocks();
515
516#ifdef IFPACK2_RBILUK_INITIAL
517 initAllValues();
518#endif
519 } // Stop timing
520
521 this->isInitialized_ = true;
522 this->numInitialize_ += 1;
523 this->initializeTime_ += (timer.wallTime() - startTime);
524}
525
526template <class MatrixType>
529 using Teuchos::RCP;
530 typedef Tpetra::Map<LO, GO, node_type> map_type;
531
532 LO NumIn = 0, NumL = 0, NumU = 0;
533 bool DiagFound = false;
534 size_t NumNonzeroDiags = 0;
535 size_t MaxNumEntries = this->A_->getLocalMaxNumRowEntries();
536 LO blockMatSize = blockSize_ * blockSize_;
537
538 // First check that the local row map ordering is the same as the local portion of the column map.
539 // The extraction of the strictly lower/upper parts of A, as well as the factorization,
540 // implicitly assume that this is the case.
541 Teuchos::ArrayView<const GO> rowGIDs = this->A_->getRowMap()->getLocalElementList();
542 Teuchos::ArrayView<const GO> colGIDs = this->A_->getColMap()->getLocalElementList();
543 bool gidsAreConsistentlyOrdered = true;
544 GO indexOfInconsistentGID = 0;
545 for (GO i = 0; i < rowGIDs.size(); ++i) {
546 if (rowGIDs[i] != colGIDs[i]) {
547 gidsAreConsistentlyOrdered = false;
548 indexOfInconsistentGID = i;
549 break;
550 }
551 }
552 TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered == false, std::runtime_error,
553 "The ordering of the local GIDs in the row and column maps is not the same"
554 << std::endl
555 << "at index " << indexOfInconsistentGID
556 << ". Consistency is required, as all calculations are done with"
557 << std::endl
558 << "local indexing.");
559
560 // Allocate temporary space for extracting the strictly
561 // lower and upper parts of the matrix A.
562 Teuchos::Array<LO> LI(MaxNumEntries);
563 Teuchos::Array<LO> UI(MaxNumEntries);
564 Teuchos::Array<scalar_type> LV(MaxNumEntries * blockMatSize);
565 Teuchos::Array<scalar_type> UV(MaxNumEntries * blockMatSize);
566
567 Teuchos::Array<scalar_type> diagValues(blockMatSize);
568
569 L_block_->setAllToScalar(STM::zero()); // Zero out L and U matrices
570 U_block_->setAllToScalar(STM::zero());
571 D_block_->setAllToScalar(STM::zero()); // Set diagonal values to zero
572
573 // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
574 // host, so sync to host first. The const_cast is unfortunate but
575 // is our only option to make this correct.
576
577 /*
578 const_cast<block_crs_matrix_type&> (A).sync_host ();
579 L_block_->sync_host ();
580 U_block_->sync_host ();
581 D_block_->sync_host ();
582 // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
583 L_block_->modify_host ();
584 U_block_->modify_host ();
585 D_block_->modify_host ();
586 */
587
588 RCP<const map_type> rowMap = L_block_->getRowMap();
589
590 // First we copy the user's matrix into L and U, regardless of fill level.
591 // It is important to note that L and U are populated using local indices.
592 // This means that if the row map GIDs are not monotonically increasing
593 // (i.e., permuted or gappy), then the strictly lower (upper) part of the
594 // matrix is not the one that you would get if you based L (U) on GIDs.
595 // This is ok, as the *order* of the GIDs in the rowmap is a better
596 // expression of the user's intent than the GIDs themselves.
597
598 // TODO BMK: Revisit this fence when BlockCrsMatrix is refactored.
599 Kokkos::fence();
600 using LocalRowHandler_t = LocalRowHandler<MatrixType>;
601 LocalRowHandler_t localRowHandler(this->A_);
602 typename LocalRowHandler_t::inds_type InI;
603 typename LocalRowHandler_t::vals_type InV;
604 for (size_t myRow = 0; myRow < this->A_->getLocalNumRows(); ++myRow) {
605 LO local_row = myRow;
606
607 localRowHandler.getLocalRow(local_row, InI, InV, NumIn);
608
609 // Split into L and U (we don't assume that indices are ordered).
610 NumL = 0;
611 NumU = 0;
612 DiagFound = false;
613
614 for (LO j = 0; j < NumIn; ++j) {
615 const LO k = InI[j];
616 const LO blockOffset = blockMatSize * j;
617
618 if (k == local_row) {
619 DiagFound = true;
620 // Store perturbed diagonal in Tpetra::Vector D_
621 for (LO jj = 0; jj < blockMatSize; ++jj)
622 diagValues[jj] = this->Rthresh_ * InV[blockOffset + jj] + IFPACK2_SGN(InV[blockOffset + jj]) * this->Athresh_;
623 D_block_->replaceLocalValues(local_row, &InI[j], diagValues.getRawPtr(), 1);
624 } else if (k < 0) { // Out of range
625 TEUCHOS_TEST_FOR_EXCEPTION(
626 true, std::runtime_error,
627 "Ifpack2::RILUK::initAllValues: current "
628 "GID k = "
629 << k << " < 0. I'm not sure why this is an error; it is "
630 "probably an artifact of the undocumented assumptions of the "
631 "original implementation (likely copied and pasted from Ifpack). "
632 "Nevertheless, the code I found here insisted on this being an error "
633 "state, so I will throw an exception here.");
634 } else if (k < local_row) {
635 LI[NumL] = k;
636 const LO LBlockOffset = NumL * blockMatSize;
637 for (LO jj = 0; jj < blockMatSize; ++jj)
638 LV[LBlockOffset + jj] = InV[blockOffset + jj];
639 NumL++;
640 } else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
641 UI[NumU] = k;
642 const LO UBlockOffset = NumU * blockMatSize;
643 for (LO jj = 0; jj < blockMatSize; ++jj)
644 UV[UBlockOffset + jj] = InV[blockOffset + jj];
645 NumU++;
646 }
647 }
648
649 // Check in things for this row of L and U
650
651 if (DiagFound) {
652 ++NumNonzeroDiags;
653 } else {
654 for (LO jj = 0; jj < blockSize_; ++jj)
655 diagValues[jj * (blockSize_ + 1)] = this->Athresh_;
656 D_block_->replaceLocalValues(local_row, &local_row, diagValues.getRawPtr(), 1);
657 }
658
659 if (NumL) {
660 L_block_->replaceLocalValues(local_row, &LI[0], &LV[0], NumL);
661 }
662
663 if (NumU) {
664 U_block_->replaceLocalValues(local_row, &UI[0], &UV[0], NumU);
665 }
666 }
667
668 // NOTE (mfh 27 May 2016) Sync back to device, in case compute()
669 // ever gets a device implementation.
670 /*
671 {
672 typedef typename block_crs_matrix_type::device_type device_type;
673 const_cast<block_crs_matrix_type&> (A).template sync<device_type> ();
674 L_block_->template sync<device_type> ();
675 U_block_->template sync<device_type> ();
676 D_block_->template sync<device_type> ();
677 }
678 */
679 this->isInitialized_ = true;
680}
681
682namespace { // (anonymous)
683
684// For a given Kokkos::View type, possibly unmanaged, get the
685// corresponding managed Kokkos::View type. This is handy for
686// translating from little_block_type or little_host_vec_type (both
687// possibly unmanaged) to their managed versions.
688template <class LittleBlockType>
689struct GetManagedView {
690 static_assert(Kokkos::is_view<LittleBlockType>::value,
691 "LittleBlockType must be a Kokkos::View.");
692 typedef Kokkos::View<typename LittleBlockType::non_const_data_type,
693 typename LittleBlockType::array_layout,
694 typename LittleBlockType::device_type>
695 managed_non_const_type;
696 static_assert(static_cast<int>(managed_non_const_type::rank) ==
697 static_cast<int>(LittleBlockType::rank),
698 "managed_non_const_type::rank != LittleBlock::rank. "
699 "Please report this bug to the Ifpack2 developers.");
700};
701
702} // namespace
703
704template <class MatrixType>
706 using Teuchos::Array;
707 using Teuchos::RCP;
708 using Teuchos::rcp;
709
710 typedef impl_scalar_type IST;
711 const char prefix[] = "Ifpack2::Experimental::RBILUK::compute: ";
712
713 // initialize() checks this too, but it's easier for users if the
714 // error shows them the name of the method that they actually
715 // called, rather than the name of some internally called method.
716 TEUCHOS_TEST_FOR_EXCEPTION(this->A_.is_null(), std::runtime_error, prefix << "The matrix (A_, "
717 "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
718 "input before calling this method.");
719 TEUCHOS_TEST_FOR_EXCEPTION(!this->A_->isFillComplete(), std::runtime_error, prefix << "The matrix "
720 "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
721 "initialize() or compute() with this matrix until the matrix is fill "
722 "complete. Note: BlockCrsMatrix is fill complete if and only if its "
723 "underlying graph is fill complete.");
724
725 if (!this->isInitialized()) {
726 initialize(); // Don't count this in the compute() time
727 }
728
729 // // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
730 // // host, so sync to host first. The const_cast is unfortunate but
731 // // is our only option to make this correct.
732 // if (! A_block_.is_null ()) {
733 // Teuchos::RCP<block_crs_matrix_type> A_nc =
734 // Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
735 // // A_nc->sync_host ();
736 // }
737 /*
738 L_block_->sync_host ();
739 U_block_->sync_host ();
740 D_block_->sync_host ();
741 // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
742 L_block_->modify_host ();
743 U_block_->modify_host ();
744 D_block_->modify_host ();
745 */
746
747 Teuchos::Time timer("RBILUK::compute");
748 double startTime = timer.wallTime();
749 { // Start timing
750 Teuchos::TimeMonitor timeMon(timer);
751 this->isComputed_ = false;
752
753 // MinMachNum should be officially defined, for now pick something a little
754 // bigger than IEEE underflow value
755
756 // const scalar_type MinDiagonalValue = STS::rmin ();
757 // const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
758 if (!this->isKokkosKernelsSpiluk_) {
759 initAllValues();
760 size_t NumIn;
761 LO NumL, NumU, NumURead;
762
763 TEUCHOS_TEST_FOR_EXCEPTION(
764 this->isKokkosKernelsStream_, std::runtime_error,
765 "Ifpack2::RBILUK::compute: "
766 "streams are not yet supported without KokkosKernels.");
767
768 // Get Maximum Row length
769 const size_t MaxNumEntries =
770 L_block_->getLocalMaxNumRowEntries() + U_block_->getLocalMaxNumRowEntries() + 1;
771
772 const LO blockMatSize = blockSize_ * blockSize_;
773
774 // FIXME (mfh 08 Nov 2015, 24 May 2016) We need to move away from
775 // expressing these strides explicitly, in order to finish #177
776 // (complete Kokkos-ization of BlockCrsMatrix) thoroughly.
777 const LO rowStride = blockSize_;
778
779 Teuchos::Array<int> ipiv_teuchos(blockSize_);
780 Kokkos::View<int*, Kokkos::HostSpace,
781 Kokkos::MemoryUnmanaged>
782 ipiv(ipiv_teuchos.getRawPtr(), blockSize_);
783 Teuchos::Array<IST> work_teuchos(blockSize_);
784 Kokkos::View<IST*, Kokkos::HostSpace,
785 Kokkos::MemoryUnmanaged>
786 work(work_teuchos.getRawPtr(), blockSize_);
787
788 size_t num_cols = U_block_->getColMap()->getLocalNumElements();
789 Teuchos::Array<int> colflag(num_cols);
790
791 typename GetManagedView<little_block_host_type>::managed_non_const_type diagModBlock("diagModBlock", blockSize_, blockSize_);
792 typename GetManagedView<little_block_host_type>::managed_non_const_type matTmp("matTmp", blockSize_, blockSize_);
793 typename GetManagedView<little_block_host_type>::managed_non_const_type multiplier("multiplier", blockSize_, blockSize_);
794
795 // Teuchos::ArrayRCP<scalar_type> DV = D_->get1dViewNonConst(); // Get view of diagonal
796
797 // Now start the factorization.
798
799 // Need some integer workspace and pointers
800 LO NumUU;
801 for (size_t j = 0; j < num_cols; ++j) {
802 colflag[j] = -1;
803 }
804 Teuchos::Array<LO> InI(MaxNumEntries, 0);
805 Teuchos::Array<scalar_type> InV(MaxNumEntries * blockMatSize, STM::zero());
806
807 const LO numLocalRows = L_block_->getLocalNumRows();
808 for (LO local_row = 0; local_row < numLocalRows; ++local_row) {
809 // Fill InV, InI with current row of L, D and U combined
810
811 NumIn = MaxNumEntries;
812 local_inds_host_view_type colValsL;
813 values_host_view_type valsL;
814
815 L_block_->getLocalRowView(local_row, colValsL, valsL);
816 NumL = (LO)colValsL.size();
817 for (LO j = 0; j < NumL; ++j) {
818 const LO matOffset = blockMatSize * j;
819 little_block_host_type lmat((typename little_block_host_type::value_type*)&valsL[matOffset], blockSize_, rowStride);
820 little_block_host_type lmatV((typename little_block_host_type::value_type*)&InV[matOffset], blockSize_, rowStride);
821 // lmatV.assign(lmat);
822 Tpetra::COPY(lmat, lmatV);
823 InI[j] = colValsL[j];
824 }
825
826 little_block_host_type dmat = D_block_->getLocalBlockHostNonConst(local_row, local_row);
827 little_block_host_type dmatV((typename little_block_host_type::value_type*)&InV[NumL * blockMatSize], blockSize_, rowStride);
828 // dmatV.assign(dmat);
829 Tpetra::COPY(dmat, dmatV);
830 InI[NumL] = local_row;
831
832 local_inds_host_view_type colValsU;
833 values_host_view_type valsU;
834 U_block_->getLocalRowView(local_row, colValsU, valsU);
835 NumURead = (LO)colValsU.size();
836 NumU = 0;
837 for (LO j = 0; j < NumURead; ++j) {
838 if (!(colValsU[j] < numLocalRows)) continue;
839 InI[NumL + 1 + j] = colValsU[j];
840 const LO matOffset = blockMatSize * (NumL + 1 + j);
841 little_block_host_type umat((typename little_block_host_type::value_type*)&valsU[blockMatSize * j], blockSize_, rowStride);
842 little_block_host_type umatV((typename little_block_host_type::value_type*)&InV[matOffset], blockSize_, rowStride);
843 // umatV.assign(umat);
844 Tpetra::COPY(umat, umatV);
845 NumU += 1;
846 }
847 NumIn = NumL + NumU + 1;
848
849 // Set column flags
850 for (size_t j = 0; j < NumIn; ++j) {
851 colflag[InI[j]] = j;
852 }
853
854#ifndef IFPACK2_RBILUK_INITIAL
855 for (LO i = 0; i < blockSize_; ++i)
856 for (LO j = 0; j < blockSize_; ++j) {
857 {
858 diagModBlock(i, j) = 0;
859 }
860 }
861#else
862 scalar_type diagmod = STM::zero(); // Off-diagonal accumulator
863 Kokkos::deep_copy(diagModBlock, diagmod);
864#endif
865
866 for (LO jj = 0; jj < NumL; ++jj) {
867 LO j = InI[jj];
868 little_block_host_type currentVal((typename little_block_host_type::value_type*)&InV[jj * blockMatSize], blockSize_, rowStride); // current_mults++;
869 // multiplier.assign(currentVal);
870 Tpetra::COPY(currentVal, multiplier);
871
872 const little_block_host_type dmatInverse = D_block_->getLocalBlockHostNonConst(j, j);
873 // alpha = 1, beta = 0
874#ifndef IFPACK2_RBILUK_INITIAL_NOKK
875 KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
876 KokkosBatched::Trans::NoTranspose,
877 KokkosBatched::Algo::Gemm::Blocked>::invoke(STS::one(), currentVal, dmatInverse, STS::zero(), matTmp);
878#else
879 Tpetra::GEMM("N", "N", STS::one(), currentVal, dmatInverse,
880 STS::zero(), matTmp);
881#endif
882 // blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (currentVal.data ()), reinterpret_cast<impl_scalar_type*> (dmatInverse.data ()), reinterpret_cast<impl_scalar_type*> (matTmp.data ()), blockSize_);
883 // currentVal.assign(matTmp);
884 Tpetra::COPY(matTmp, currentVal);
885 local_inds_host_view_type UUI;
886 values_host_view_type UUV;
887
888 U_block_->getLocalRowView(j, UUI, UUV);
889 NumUU = (LO)UUI.size();
890
891 if (this->RelaxValue_ == STM::zero()) {
892 for (LO k = 0; k < NumUU; ++k) {
893 if (!(UUI[k] < numLocalRows)) continue;
894 const int kk = colflag[UUI[k]];
895 if (kk > -1) {
896 little_block_host_type kkval((typename little_block_host_type::value_type*)&InV[kk * blockMatSize], blockSize_, rowStride);
897 little_block_host_type uumat((typename little_block_host_type::value_type*)&UUV[k * blockMatSize], blockSize_, rowStride);
898#ifndef IFPACK2_RBILUK_INITIAL_NOKK
899 KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
900 KokkosBatched::Trans::NoTranspose,
901 KokkosBatched::Algo::Gemm::Blocked>::invoke(magnitude_type(-STM::one()), multiplier, uumat, STM::one(), kkval);
902#else
903 Tpetra::GEMM("N", "N", magnitude_type(-STM::one()), multiplier, uumat,
904 STM::one(), kkval);
905#endif
906 // blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (multiplier.data ()), reinterpret_cast<impl_scalar_type*> (uumat.data ()), reinterpret_cast<impl_scalar_type*> (kkval.data ()), blockSize_, -STM::one(), STM::one());
907 }
908 }
909 } else {
910 for (LO k = 0; k < NumUU; ++k) {
911 if (!(UUI[k] < numLocalRows)) continue;
912 const int kk = colflag[UUI[k]];
913 little_block_host_type uumat((typename little_block_host_type::value_type*)&UUV[k * blockMatSize], blockSize_, rowStride);
914 if (kk > -1) {
915 little_block_host_type kkval((typename little_block_host_type::value_type*)&InV[kk * blockMatSize], blockSize_, rowStride);
916#ifndef IFPACK2_RBILUK_INITIAL_NOKK
917 KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
918 KokkosBatched::Trans::NoTranspose,
919 KokkosBatched::Algo::Gemm::Blocked>::invoke(magnitude_type(-STM::one()), multiplier, uumat, STM::one(), kkval);
920#else
921 Tpetra::GEMM("N", "N", magnitude_type(-STM::one()), multiplier, uumat,
922 STM::one(), kkval);
923#endif
924 // blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(kkval.data ()), blockSize_, -STM::one(), STM::one());
925 } else {
926#ifndef IFPACK2_RBILUK_INITIAL_NOKK
927 KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
928 KokkosBatched::Trans::NoTranspose,
929 KokkosBatched::Algo::Gemm::Blocked>::invoke(magnitude_type(-STM::one()), multiplier, uumat, STM::one(), diagModBlock);
930#else
931 Tpetra::GEMM("N", "N", magnitude_type(-STM::one()), multiplier, uumat,
932 STM::one(), diagModBlock);
933#endif
934 // blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(diagModBlock.data ()), blockSize_, -STM::one(), STM::one());
935 }
936 }
937 }
938 }
939 if (NumL) {
940 // Replace current row of L
941 L_block_->replaceLocalValues(local_row, InI.getRawPtr(), InV.getRawPtr(), NumL);
942 }
943
944 // dmat.assign(dmatV);
945 Tpetra::COPY(dmatV, dmat);
946
947 if (this->RelaxValue_ != STM::zero()) {
948 // dmat.update(this->RelaxValue_, diagModBlock);
949 Tpetra::AXPY(this->RelaxValue_, diagModBlock, dmat);
950 }
951
952 // if (STS::magnitude (DV[i]) > STS::magnitude (MaxDiagonalValue)) {
953 // if (STS::real (DV[i]) < STM::zero ()) {
954 // DV[i] = -MinDiagonalValue;
955 // }
956 // else {
957 // DV[i] = MinDiagonalValue;
958 // }
959 // }
960 // else
961 {
962 int lapackInfo = 0;
963 for (int k = 0; k < blockSize_; ++k) {
964 ipiv[k] = 0;
965 }
966
967 Tpetra::GETF2(dmat, ipiv, lapackInfo);
968 // lapack.GETRF(blockSize_, blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), &lapackInfo);
969 TEUCHOS_TEST_FOR_EXCEPTION(
970 lapackInfo != 0, std::runtime_error,
971 "Ifpack2::Experimental::RBILUK::compute: "
972 "lapackInfo = "
973 << lapackInfo << " which indicates an error in the factorization GETRF.");
974
975 Tpetra::GETRI(dmat, ipiv, work, lapackInfo);
976 // lapack.GETRI(blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), work.getRawPtr(), lwork, &lapackInfo);
977 TEUCHOS_TEST_FOR_EXCEPTION(
978 lapackInfo != 0, std::runtime_error,
979 "Ifpack2::Experimental::RBILUK::compute: "
980 "lapackInfo = "
981 << lapackInfo << " which indicates an error in the matrix inverse GETRI.");
982 }
983
984 for (LO j = 0; j < NumU; ++j) {
985 little_block_host_type currentVal((typename little_block_host_type::value_type*)&InV[(NumL + 1 + j) * blockMatSize], blockSize_, rowStride); // current_mults++;
986 // scale U by the diagonal inverse
987#ifndef IFPACK2_RBILUK_INITIAL_NOKK
988 KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
989 KokkosBatched::Trans::NoTranspose,
990 KokkosBatched::Algo::Gemm::Blocked>::invoke(STS::one(), dmat, currentVal, STS::zero(), matTmp);
991#else
992 Tpetra::GEMM("N", "N", STS::one(), dmat, currentVal,
993 STS::zero(), matTmp);
994#endif
995 // blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(dmat.data ()), reinterpret_cast<impl_scalar_type*>(currentVal.data ()), reinterpret_cast<impl_scalar_type*>(matTmp.data ()), blockSize_);
996 // currentVal.assign(matTmp);
997 Tpetra::COPY(matTmp, currentVal);
998 }
999
1000 if (NumU) {
1001 // Replace current row of L and U
1002 U_block_->replaceLocalValues(local_row, &InI[NumL + 1], &InV[blockMatSize * (NumL + 1)], NumU);
1003 }
1004
1005#ifndef IFPACK2_RBILUK_INITIAL
1006 // Reset column flags
1007 for (size_t j = 0; j < NumIn; ++j) {
1008 colflag[InI[j]] = -1;
1009 }
1010#else
1011 // Reset column flags
1012 for (size_t j = 0; j < num_cols; ++j) {
1013 colflag[j] = -1;
1014 }
1015#endif
1016 }
1017 } // !this->isKokkosKernelsSpiluk_
1018 else {
1019 // If we are not using A_local_ directly, we must copy values in case of pattern reuse. Due
1020 // to having to deal with filling logical blocks and converting back and forth between CRS and
1021 // BSR, this is not very performant and a lot of computation has to be repeated. If you are going
1022 // to do patten reuse, I recommend having A_local be a BSR.
1023 auto A_local_bcrs_temp = Details::getBcrsMatrix(this->A_local_);
1024 auto A_local_crs_temp = Details::getCrsMatrix(this->A_local_);
1025 if (A_local_bcrs_temp.is_null()) {
1026 if (A_local_crs_temp.is_null()) {
1027 // copy entries into A_local_crs_nc
1028 local_ordinal_type numRows = this->A_local_->getLocalNumRows();
1029 A_local_crs_nc_->resumeFill();
1030 nonconst_local_inds_host_view_type indices("indices", this->A_local_->getLocalMaxNumRowEntries());
1031 nonconst_values_host_view_type values("values", this->A_local_->getLocalMaxNumRowEntries());
1032 for (local_ordinal_type i = 0; i < numRows; i++) {
1033 size_t numEntries = 0;
1034 this->A_local_->getLocalRowCopy(i, indices, values, numEntries);
1035 A_local_crs_nc_->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
1036 }
1037 A_local_crs_nc_->fillComplete(this->A_local_->getDomainMap(), this->A_local_->getRangeMap());
1038 }
1039
1040 if (blockSize_ > 1) {
1041 auto crs_matrix_block_filled = Tpetra::fillLogicalBlocks(*A_local_crs_nc_, blockSize_);
1042 Kokkos::deep_copy(A_local_bcrs_->getLocalMatrixDevice().values,
1043 crs_matrix_block_filled->getLocalMatrixDevice().values);
1044 } else {
1045 Kokkos::deep_copy(A_local_bcrs_->getLocalMatrixDevice().values,
1046 A_local_crs_nc_->getLocalMatrixDevice().values);
1047 }
1048 }
1049
1050 if (!this->isKokkosKernelsStream_) {
1051 auto lclMtx = A_local_bcrs_->getLocalMatrixDevice();
1052 auto A_local_rowmap = lclMtx.graph.row_map;
1053 auto A_local_entries = lclMtx.graph.entries;
1054 auto A_local_values = lclMtx.values;
1055
1056 // L_block_->resumeFill ();
1057 // U_block_->resumeFill ();
1058
1059 if (L_block_->isLocallyIndexed()) {
1060 L_block_->setAllToScalar(STS::zero()); // Zero out L and U matrices
1061 U_block_->setAllToScalar(STS::zero());
1062 }
1063
1064 using row_map_type = typename local_matrix_device_type::row_map_type;
1065
1066 auto lclL = L_block_->getLocalMatrixDevice();
1067 row_map_type L_rowmap = lclL.graph.row_map;
1068 auto L_entries = lclL.graph.entries;
1069 auto L_values = lclL.values;
1070
1071 auto lclU = U_block_->getLocalMatrixDevice();
1072 row_map_type U_rowmap = lclU.graph.row_map;
1073 auto U_entries = lclU.graph.entries;
1074 auto U_values = lclU.values;
1075
1076 KokkosSparse::spiluk_numeric(KernelHandle_block_.getRawPtr(), this->LevelOfFill_,
1077 A_local_rowmap, A_local_entries, A_local_values,
1078 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values);
1079
1080 // Now call symbolic for sptrsvs
1081 KokkosSparse::sptrsv_symbolic(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values);
1082 KokkosSparse::sptrsv_symbolic(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values);
1083 } else {
1084 // Due to A_block_local_diagblks_values_v_, we must always refresh when streams are on
1085 assert(!this->hasStreamReordered_);
1086 auto lclMtx = A_local_bcrs_->getLocalMatrixDevice();
1087 auto lclCrsMtx = convertBsrToCrs<local_matrix_device_type, local_crs_matrix_device_type>(lclMtx);
1088 std::vector<local_crs_matrix_device_type> A_crs_local_diagblks_v(this->num_streams_);
1089#if KOKKOS_VERSION >= 50100
1090 KokkosSparse::Experimental::kk_extract_diagonal_blocks_crsmatrix_sequential(lclCrsMtx, A_crs_local_diagblks_v);
1091#else
1092 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclCrsMtx, A_crs_local_diagblks_v);
1093#endif
1094
1095 for (int i = 0; i < this->num_streams_; i++) {
1096 A_block_local_diagblks_v_[i] = local_matrix_device_type(A_crs_local_diagblks_v[i], blockSize_);
1097 // rowmap and entries should be the same, but not values
1098 A_block_local_diagblks_values_v_[i] = A_block_local_diagblks_v_[i].values;
1099 }
1100
1101 std::vector<lno_row_view_t> L_rowmap_v(this->num_streams_);
1102 std::vector<lno_nonzero_view_t> L_entries_v(this->num_streams_);
1103 std::vector<scalar_nonzero_view_t> L_values_v(this->num_streams_);
1104 std::vector<lno_row_view_t> U_rowmap_v(this->num_streams_);
1105 std::vector<lno_nonzero_view_t> U_entries_v(this->num_streams_);
1106 std::vector<scalar_nonzero_view_t> U_values_v(this->num_streams_);
1107 std::vector<kk_handle_type*> KernelHandle_rawptr_v_(this->num_streams_);
1108
1109 for (int i = 0; i < this->num_streams_; i++) {
1110 L_block_v_[i]->setAllToScalar(STS::zero()); // Zero out L and U matrices
1111 U_block_v_[i]->setAllToScalar(STS::zero());
1112
1113 auto lclL = L_block_v_[i]->getLocalMatrixDevice();
1114 L_rowmap_v[i] = lclL.graph.row_map;
1115 L_entries_v[i] = lclL.graph.entries;
1116 L_values_v[i] = lclL.values;
1117
1118 auto lclU = U_block_v_[i]->getLocalMatrixDevice();
1119 U_rowmap_v[i] = lclU.graph.row_map;
1120 U_entries_v[i] = lclU.graph.entries;
1121 U_values_v[i] = lclU.values;
1122 KernelHandle_rawptr_v_[i] = KernelHandle_block_v_[i].getRawPtr();
1123 }
1124
1125 // L_block_->resumeFill ();
1126 // U_block_->resumeFill ();
1127
1128 KokkosSparse::Experimental::spiluk_numeric_streams(
1129 this->exec_space_instances_, KernelHandle_rawptr_v_, this->LevelOfFill_,
1130 A_block_local_diagblks_rowmap_v_, A_block_local_diagblks_entries_v_, A_block_local_diagblks_values_v_,
1131 L_rowmap_v, L_entries_v, L_values_v,
1132 U_rowmap_v, U_entries_v, U_values_v);
1133
1134 // Now call symbolic for sptrsvs
1135 for (int i = 0; i < this->num_streams_; i++) {
1136 KokkosSparse::sptrsv_symbolic(L_Sptrsv_KernelHandle_v_[i].getRawPtr(), L_rowmap_v[i], L_entries_v[i], L_values_v[i]);
1137 KokkosSparse::sptrsv_symbolic(U_Sptrsv_KernelHandle_v_[i].getRawPtr(), U_rowmap_v[i], U_entries_v[i], U_values_v[i]);
1138 }
1139 }
1140 }
1141 } // Stop timing
1142
1143 // Sync everything back to device, for efficient solves.
1144 /*
1145 {
1146 typedef typename block_crs_matrix_type::device_type device_type;
1147 if (! A_block_.is_null ()) {
1148 Teuchos::RCP<block_crs_matrix_type> A_nc =
1149 Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
1150 A_nc->template sync<device_type> ();
1151 }
1152 L_block_->template sync<device_type> ();
1153 U_block_->template sync<device_type> ();
1154 D_block_->template sync<device_type> ();
1155 }
1156 */
1157
1158 this->isComputed_ = true;
1159 this->numCompute_ += 1;
1160 this->computeTime_ += (timer.wallTime() - startTime);
1161}
1162
1163template <class MatrixType>
1164template <typename X, typename Y>
1166 stream_apply_impl(const X& X_views, Y& Y_views, const bool use_temp_x, const bool use_temp_y, const std::vector<Teuchos::RCP<block_crs_matrix_type> >& LU_block_v, const std::vector<Teuchos::RCP<kk_handle_type> >& LU_sptrsv_handle_v, const LO numVecs) const {
1167 std::vector<lno_row_view_t> ptr_v(this->num_streams_);
1168 std::vector<lno_nonzero_view_t> ind_v(this->num_streams_);
1169 std::vector<scalar_nonzero_view_t> val_v(this->num_streams_);
1170 std::vector<kk_handle_type*> KernelHandle_rawptr_v(this->num_streams_);
1171
1172 for (int j = 0; j < numVecs; ++j) {
1173 auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), j);
1174 auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), j);
1175 std::vector<decltype(X_view)> x_v(this->num_streams_);
1176 std::vector<decltype(Y_view)> y_v(this->num_streams_);
1177 local_ordinal_type stream_begin = 0;
1178 local_ordinal_type stream_end;
1179 for (int i = 0; i < this->num_streams_; i++) {
1180 auto LU_bcrs_i = LU_block_v[i];
1181 auto LUlocal_i = LU_bcrs_i->getLocalMatrixDevice();
1182 ptr_v[i] = LUlocal_i.graph.row_map;
1183 ind_v[i] = LUlocal_i.graph.entries;
1184 val_v[i] = LUlocal_i.values;
1185 stream_end = stream_begin + LUlocal_i.numRows() * blockSize_;
1186 if (use_temp_x) {
1187 x_v[i] = Kokkos::subview(tmp_, Kokkos::make_pair(stream_begin, stream_end));
1188 } else {
1189 x_v[i] = Kokkos::subview(X_view, Kokkos::make_pair(stream_begin, stream_end));
1190 }
1191 if (use_temp_y) {
1192 y_v[i] = Kokkos::subview(tmp_, Kokkos::make_pair(stream_begin, stream_end));
1193 } else {
1194 y_v[i] = Kokkos::subview(Y_view, Kokkos::make_pair(stream_begin, stream_end));
1195 }
1196 KernelHandle_rawptr_v[i] = LU_sptrsv_handle_v[i].getRawPtr();
1197 stream_begin = stream_end;
1198 }
1199
1200 Kokkos::fence();
1201 KokkosSparse::Experimental::sptrsv_solve_streams(this->exec_space_instances_, KernelHandle_rawptr_v, ptr_v, ind_v, val_v, x_v, y_v);
1202 Kokkos::fence();
1203 }
1204}
1205
1206template <class MatrixType>
1208 apply(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1209 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1210 Teuchos::ETransp mode,
1211 scalar_type alpha,
1212 scalar_type beta) const {
1213 using Teuchos::RCP;
1214 typedef Tpetra::BlockMultiVector<scalar_type,
1216 BMV;
1217
1218 TEUCHOS_TEST_FOR_EXCEPTION(
1219 this->A_.is_null(), std::runtime_error,
1220 "Ifpack2::Experimental::RBILUK::apply: The matrix is "
1221 "null. Please call setMatrix() with a nonnull input, then initialize() "
1222 "and compute(), before calling this method.");
1223 TEUCHOS_TEST_FOR_EXCEPTION(
1224 !this->isComputed(), std::runtime_error,
1225 "Ifpack2::Experimental::RBILUK::apply: If you have not yet called compute(), "
1226 "you must call compute() before calling this method.");
1227 TEUCHOS_TEST_FOR_EXCEPTION(
1228 X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
1229 "Ifpack2::Experimental::RBILUK::apply: X and Y do not have the same number of columns. "
1230 "X.getNumVectors() = "
1231 << X.getNumVectors()
1232 << " != Y.getNumVectors() = " << Y.getNumVectors() << ".");
1233 TEUCHOS_TEST_FOR_EXCEPTION(
1234 STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
1235 "Ifpack2::Experimental::RBILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1236 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1237 "fixed. There is a FIXME in this file about this very issue.");
1238
1239 const LO blockMatSize = blockSize_ * blockSize_;
1240
1241 const LO rowStride = blockSize_;
1242
1243 Teuchos::Array<scalar_type> lclarray(blockSize_);
1244 little_host_vec_type lclvec((typename little_host_vec_type::value_type*)&lclarray[0], blockSize_);
1245 const scalar_type one = STM::one();
1246 const scalar_type zero = STM::zero();
1247
1248 Teuchos::Time timer("RBILUK::apply");
1249 double startTime = timer.wallTime();
1250 { // Start timing
1251 Teuchos::TimeMonitor timeMon(timer);
1252 if (!this->isKokkosKernelsSpiluk_) {
1253 BMV yBlock(Y, *(this->Graph_->getA_Graph()->getDomainMap()), blockSize_);
1254 const BMV xBlock(X, *(this->A_->getColMap()), blockSize_);
1255
1256 if (alpha == one && beta == zero) {
1257 if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
1258 // Start by solving L C = X for C. C must have the same Map
1259 // as D. We have to use a temp multivector, since our
1260 // implementation of triangular solves does not allow its
1261 // input and output to alias one another.
1262 //
1263 // FIXME (mfh 24 Jan 2014) Cache this temp multivector.
1264 const LO numVectors = xBlock.getNumVectors();
1265 BMV cBlock(*(this->Graph_->getA_Graph()->getDomainMap()), blockSize_, numVectors);
1266 BMV rBlock(*(this->Graph_->getA_Graph()->getDomainMap()), blockSize_, numVectors);
1267 for (LO imv = 0; imv < numVectors; ++imv) {
1268 for (size_t i = 0; i < D_block_->getLocalNumRows(); ++i) {
1269 LO local_row = i;
1270 const_host_little_vec_type xval =
1271 xBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
1272 little_host_vec_type cval =
1273 cBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
1274 // cval.assign(xval);
1275 Tpetra::COPY(xval, cval);
1276
1277 local_inds_host_view_type colValsL;
1278 values_host_view_type valsL;
1279 L_block_->getLocalRowView(local_row, colValsL, valsL);
1280 LO NumL = (LO)colValsL.size();
1281
1282 for (LO j = 0; j < NumL; ++j) {
1283 LO col = colValsL[j];
1284 const_host_little_vec_type prevVal =
1285 cBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
1286
1287 const LO matOffset = blockMatSize * j;
1288 little_block_host_type lij((typename little_block_host_type::value_type*)&valsL[matOffset], blockSize_, rowStride);
1289
1290 // cval.matvecUpdate(-one, lij, prevVal);
1291 Tpetra::GEMV(-one, lij, prevVal, cval);
1292 }
1293 }
1294 }
1295
1296 // Solve D R = C. Note that D has been replaced by D^{-1} at this point.
1297 D_block_->applyBlock(cBlock, rBlock);
1298
1299 // Solve U Y = R.
1300 for (LO imv = 0; imv < numVectors; ++imv) {
1301 const LO numRows = D_block_->getLocalNumRows();
1302 for (LO i = 0; i < numRows; ++i) {
1303 LO local_row = (numRows - 1) - i;
1304 const_host_little_vec_type rval =
1305 rBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
1306 little_host_vec_type yval =
1307 yBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
1308 // yval.assign(rval);
1309 Tpetra::COPY(rval, yval);
1310
1311 local_inds_host_view_type colValsU;
1312 values_host_view_type valsU;
1313 U_block_->getLocalRowView(local_row, colValsU, valsU);
1314 LO NumU = (LO)colValsU.size();
1315
1316 for (LO j = 0; j < NumU; ++j) {
1317 LO col = colValsU[NumU - 1 - j];
1318 const_host_little_vec_type prevVal =
1319 yBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
1320
1321 const LO matOffset = blockMatSize * (NumU - 1 - j);
1322 little_block_host_type uij((typename little_block_host_type::value_type*)&valsU[matOffset], blockSize_, rowStride);
1323
1324 // yval.matvecUpdate(-one, uij, prevVal);
1325 Tpetra::GEMV(-one, uij, prevVal, yval);
1326 }
1327 }
1328 }
1329 } else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
1330 TEUCHOS_TEST_FOR_EXCEPTION(
1331 true, std::runtime_error,
1332 "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
1333 }
1334 } else { // alpha != 1 or beta != 0
1335 if (alpha == zero) {
1336 if (beta == zero) {
1337 Y.putScalar(zero);
1338 } else {
1339 Y.scale(beta);
1340 }
1341 } else { // alpha != zero
1342 MV Y_tmp(Y.getMap(), Y.getNumVectors());
1343 apply(X, Y_tmp, mode);
1344 Y.update(alpha, Y_tmp, beta);
1345 }
1346 }
1347 } else {
1348 // Kokkos kernels impl.
1349 auto X_views = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
1350 auto Y_views = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
1351 const LO numVecs = X.getNumVectors();
1352
1353 TEUCHOS_TEST_FOR_EXCEPTION(mode != Teuchos::NO_TRANS, std::runtime_error,
1354 "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
1355
1356 if (!this->isKokkosKernelsStream_) {
1357 auto lclL = L_block_->getLocalMatrixDevice();
1358 auto L_rowmap = lclL.graph.row_map;
1359 auto L_entries = lclL.graph.entries;
1360 auto L_values = lclL.values;
1361
1362 auto lclU = U_block_->getLocalMatrixDevice();
1363 auto U_rowmap = lclU.graph.row_map;
1364 auto U_entries = lclU.graph.entries;
1365 auto U_values = lclU.values;
1366
1367 for (LO vec = 0; vec < numVecs; ++vec) {
1368 auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), vec);
1369 KokkosSparse::sptrsv_solve(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values, X_view, tmp_);
1370 }
1371
1372 for (LO vec = 0; vec < numVecs; ++vec) {
1373 auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec);
1374 KokkosSparse::sptrsv_solve(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values, tmp_, Y_view);
1375 }
1376 } else {
1377 stream_apply_impl(X_views, Y_views, false, true, L_block_v_, L_Sptrsv_KernelHandle_v_, numVecs);
1378 stream_apply_impl(X_views, Y_views, true, false, U_block_v_, U_Sptrsv_KernelHandle_v_, numVecs);
1379 }
1380
1381 KokkosBlas::axpby(alpha, Y_views, beta, Y_views);
1382 // Y.getWrappedDualView().sync();
1383 }
1384 } // Stop timing
1385
1386 this->numApply_ += 1;
1387 this->applyTime_ += (timer.wallTime() - startTime);
1388}
1389
1390template <class MatrixType>
1392 std::ostringstream os;
1393
1394 // Output is a valid YAML dictionary in flow style. If you don't
1395 // like everything on a single line, you should call describe()
1396 // instead.
1397 os << "\"Ifpack2::Experimental::RBILUK\": {";
1398 os << "Initialized: " << (this->isInitialized() ? "true" : "false") << ", "
1399 << "Computed: " << (this->isComputed() ? "true" : "false") << ", ";
1400
1401 os << "Level-of-fill: " << this->getLevelOfFill() << ", ";
1402
1403 if (this->A_.is_null()) {
1404 os << "Matrix: null";
1405 } else {
1406 os << "Global matrix dimensions: ["
1407 << this->A_->getGlobalNumRows() << ", " << this->A_->getGlobalNumCols() << "]"
1408 << ", Global nnz: " << this->A_->getGlobalNumEntries();
1409 }
1410
1411 os << "}";
1412 return os.str();
1413}
1414
1415} // namespace Experimental
1416
1417} // namespace Ifpack2
1418
1419// FIXME (mfh 26 Aug 2015) We only need to do instantiation for
1420// MatrixType = Tpetra::RowMatrix. Conversions to BlockCrsMatrix are
1421// handled internally via dynamic cast.
1422
1423#define IFPACK2_EXPERIMENTAL_RBILUK_INSTANT(S, LO, GO, N) \
1424 template class Ifpack2::Experimental::RBILUK<Tpetra::BlockCrsMatrix<S, LO, GO, N> >; \
1425 template class Ifpack2::Experimental::RBILUK<Tpetra::RowMatrix<S, LO, GO, N> >;
1426
1427#endif
File for utility functions.
ILU(k) factorization of a given Tpetra::BlockCrsMatrix.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:96
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:115
const block_crs_matrix_type & getUBlock() const
Return the U factor of the ILU factorization.
Definition Ifpack2_Experimental_RBILUK_def.hpp:208
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:120
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:107
void setMatrix(const Teuchos::RCP< const block_crs_matrix_type > &A)
Change the matrix to be preconditioned.
Definition Ifpack2_Experimental_RBILUK_def.hpp:162
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition Ifpack2_Experimental_RBILUK_def.hpp:1208
void compute()
Compute the (numeric) incomplete factorization.
Definition Ifpack2_Experimental_RBILUK_def.hpp:705
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:127
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:101
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:111
virtual ~RBILUK()
Destructor (declared virtual for memory safety).
Definition Ifpack2_Experimental_RBILUK_def.hpp:159
const block_crs_matrix_type & getDBlock() const
Return the diagonal entries of the ILU factorization.
Definition Ifpack2_Experimental_RBILUK_def.hpp:195
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition Ifpack2_Experimental_RBILUK_def.hpp:344
std::string description() const
A one-line description of this object.
Definition Ifpack2_Experimental_RBILUK_def.hpp:1391
const block_crs_matrix_type & getLBlock() const
Return the L factor of the ILU factorization.
Definition Ifpack2_Experimental_RBILUK_def.hpp:182
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:134
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition Ifpack2_IlukGraph.hpp:67
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition Ifpack2_RILUK_decl.hpp:218
static Teuchos::RCP< const row_matrix_type > makeLocalFilter(const Teuchos::RCP< const row_matrix_type > &A)
Return A, wrapped in a LocalFilter, if necessary.
Definition Ifpack2_RILUK_def.hpp:438
Ifpack2 features that are experimental. Use at your own risk.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40