Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_SparseContainer_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_SPARSECONTAINER_DEF_HPP
11#define IFPACK2_SPARSECONTAINER_DEF_HPP
12
14#ifdef HAVE_MPI
15#include <mpi.h>
16#include "Teuchos_DefaultMpiComm.hpp"
17#else
18#include "Teuchos_DefaultSerialComm.hpp"
19#endif
20#include "Teuchos_TestForException.hpp"
21
22namespace Ifpack2 {
23
24//==============================================================================
25template <class MatrixType, class InverseType>
27 SparseContainer(const Teuchos::RCP<const row_matrix_type>& matrix,
28 const Teuchos::Array<Teuchos::Array<LO>>& partitions,
29 const Teuchos::RCP<const import_type>&,
30 bool pointIndexed)
31 : ContainerImpl<MatrixType, InverseScalar>(matrix, partitions, pointIndexed)
32 ,
33#ifdef HAVE_MPI
34 localComm_(Teuchos::rcp(new Teuchos::MpiComm<int>(MPI_COMM_SELF)))
35#else
36 localComm_(Teuchos::rcp(new Teuchos::SerialComm<int>()))
37#endif // HAVE_MPI
38{
39}
40
41//==============================================================================
42template <class MatrixType, class InverseType>
45
46//==============================================================================
47template <class MatrixType, class InverseType>
48void SparseContainer<MatrixType, InverseType>::setParameters(const Teuchos::ParameterList& List) {
49 List_ = List;
50}
51
52//==============================================================================
53template <class MatrixType, class InverseType>
55 // We assume that if you called this method, you intend to recompute
56 // everything. Thus, we release references to all the internal
57 // objects. We do this first to save memory. (In an RCP
58 // assignment, the right-hand side and left-hand side coexist before
59 // the left-hand side's reference count gets updated.)
60 Teuchos::RCP<Teuchos::Time> timer =
61 Teuchos::TimeMonitor::getNewCounter("Ifpack2::SparseContainer::initialize");
62 Teuchos::TimeMonitor timeMon(*timer);
63
64 // Will create the diagonal blocks and their inverses
65 // in extract()
66 diagBlocks_.assign(this->numBlocks_, Teuchos::null);
67 Inverses_.assign(this->numBlocks_, Teuchos::null);
68
69 // Extract the submatrices.
70 this->extractGraph();
71
72 // Initialize the inverse operator.
73 for (int i = 0; i < this->numBlocks_; i++) {
74 Inverses_[i]->setParameters(List_);
75 Inverses_[i]->initialize();
76 }
77
78 this->IsInitialized_ = true;
79 this->IsComputed_ = false;
80}
81
82//==============================================================================
83template <class MatrixType, class InverseType>
85 Teuchos::RCP<Teuchos::Time> timer =
86 Teuchos::TimeMonitor::getNewCounter("Ifpack2::SparseContainer::compute");
87 Teuchos::TimeMonitor timeMon(*timer);
88
89 this->IsComputed_ = false;
90 if (!this->isInitialized()) {
91 this->initialize();
92 }
93
94 // Extract the submatrices values
95 this->extractValues();
96
97 // Compute the inverse operator.
98 for (int i = 0; i < this->numBlocks_; i++) {
99 Inverses_[i]->compute();
100 }
101
102 this->IsComputed_ = true;
103}
104
105//==============================================================================
106template <class MatrixType, class InverseType>
108 for (auto inv : Inverses_)
109 delete inv.get();
110 Inverses_.clear();
111 diagBlocks_.clear();
113}
114
115//==============================================================================
116template <class MatrixType, class InverseType>
118 solveBlockMV(const inverse_mv_type& X,
119 inverse_mv_type& Y,
120 int blockIndex,
121 Teuchos::ETransp mode,
122 InverseScalar alpha,
123 InverseScalar beta) const {
124 TEUCHOS_TEST_FOR_EXCEPTION(
125 Inverses_[blockIndex]->getDomainMap()->getLocalNumElements() != X.getLocalLength(),
126 std::logic_error,
127 "Ifpack2::SparseContainer::apply: Inverse_ "
128 "operator and X have incompatible dimensions ("
129 << Inverses_[blockIndex]->getDomainMap()->getLocalNumElements() << " resp. "
130 << X.getLocalLength() << "). Please report this bug to "
131 "the Ifpack2 developers.");
132 TEUCHOS_TEST_FOR_EXCEPTION(
133 Inverses_[blockIndex]->getRangeMap()->getLocalNumElements() != Y.getLocalLength(),
134 std::logic_error,
135 "Ifpack2::SparseContainer::apply: Inverse_ "
136 "operator and Y have incompatible dimensions ("
137 << Inverses_[blockIndex]->getRangeMap()->getLocalNumElements() << " resp. "
138 << Y.getLocalLength() << "). Please report this bug to "
139 "the Ifpack2 developers.");
140 Inverses_[blockIndex]->apply(X, Y, mode, alpha, beta);
141}
142
143template <class MatrixType, class InverseType>
145 apply(ConstHostView X,
146 HostView Y,
147 int blockIndex,
148 Teuchos::ETransp mode,
149 SC alpha,
150 SC beta) const {
151 using Teuchos::ArrayView;
152 using Teuchos::as;
153
154 // The InverseType template parameter might have different template
155 // parameters (Scalar, LO, GO, and/or Node) than MatrixType. For
156 // example, MatrixType (a global object) might use a bigger GO
157 // (global ordinal) type than InverseType (which deals with the
158 // diagonal block, a local object). This means that we might have
159 // to convert X and Y to the Tpetra::MultiVector specialization that
160 // InverseType wants. This class' X_ and Y_ internal fields are of
161 // the right type for InverseType, so we can use those as targets.
162 Teuchos::RCP<Teuchos::Time> timer =
163 Teuchos::TimeMonitor::getNewCounter("Ifpack2::SparseContainer::apply");
164 Teuchos::TimeMonitor timeMon(*timer);
165
166 // Tpetra::MultiVector specialization corresponding to InverseType.
168 size_t numVecs = X.extent(1);
169
170 TEUCHOS_TEST_FOR_EXCEPTION(
171 !this->IsComputed_, std::runtime_error,
172 "Ifpack2::SparseContainer::apply: "
173 "You must have called the compute() method before you may call apply(). "
174 "You may call the apply() method as many times as you want after calling "
175 "compute() once, but you must have called compute() at least once.");
176 TEUCHOS_TEST_FOR_EXCEPTION(
177 X.extent(1) != Y.extent(1), std::runtime_error,
178 "Ifpack2::SparseContainer::apply: X and Y have different numbers of "
179 "vectors. X has "
180 << X.extent(1)
181 << ", but Y has " << Y.extent(1) << ".");
182
183 if (numVecs == 0) {
184 return; // done! nothing to do
185 }
186
187 const LO numRows = this->blockSizes_[blockIndex];
188
189 // The operator Inverse_ works on a permuted subset of the local
190 // parts of X and Y. The subset and permutation are defined by the
191 // index array returned by getBlockRows(). If the permutation is
192 // trivial and the subset is exactly equal to the local indices,
193 // then we could use the local parts of X and Y exactly, without
194 // needing to permute. Otherwise, we have to use temporary storage
195 // to permute X and Y. For now, we always use temporary storage.
196 //
197 // Create temporary permuted versions of the input and output.
198 // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
199 // store the permuted versions of X resp. Y. Note that X_local has
200 // the domain Map of the operator, which may be a permuted subset of
201 // the local Map corresponding to X.getMap(). Similarly, Y_local
202 // has the range Map of the operator, which may be a permuted subset
203 // of the local Map corresponding to Y.getMap(). numRows here
204 // gives the number of rows in the row Map of the local Inverse_
205 // operator.
206 //
207 // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
208 // here that the row Map and the range Map of that operator are
209 // the same.
210 //
211 // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
212 // really belongs in Tpetra.
213
214 if (invX.size() == 0) {
215 for (LO i = 0; i < this->numBlocks_; i++)
216 invX.emplace_back(Inverses_[i]->getDomainMap(), numVecs);
217 for (LO i = 0; i < this->numBlocks_; i++)
218 invY.emplace_back(Inverses_[i]->getDomainMap(), numVecs);
219 }
220 inverse_mv_type& X_local = invX[blockIndex];
221 TEUCHOS_TEST_FOR_EXCEPTION(
222 X_local.getLocalLength() != size_t(numRows * this->scalarsPerRow_), std::logic_error,
223 "Ifpack2::SparseContainer::apply: "
224 "X_local has length "
225 << X_local.getLocalLength() << ", which does "
226 "not match numRows = "
227 << numRows * this->scalarsPerRow_ << ". Please report this bug to "
228 "the Ifpack2 developers.");
229 const ArrayView<const LO> blockRows = this->getBlockRows(blockIndex);
230 if (this->scalarsPerRow_ == 1)
231 mvgs.gatherMVtoView(X_local, X, blockRows);
232 else
233 mvgs.gatherMVtoViewBlock(X_local, X, blockRows, this->scalarsPerRow_);
234
235 // We must gather the output multivector Y even on input to
236 // Inverse_->apply(), since the Inverse_ operator might use it as an
237 // initial guess for a linear solve. We have no way of knowing
238 // whether it does or does not.
239
240 inverse_mv_type& Y_local = invY[blockIndex];
241 TEUCHOS_TEST_FOR_EXCEPTION(
242 Y_local.getLocalLength() != size_t(numRows * this->scalarsPerRow_), std::logic_error,
243 "Ifpack2::SparseContainer::apply: "
244 "Y_local has length "
245 << Y_local.getLocalLength() << ", which does "
246 "not match numRows = "
247 << numRows * this->scalarsPerRow_ << ". Please report this bug to "
248 "the Ifpack2 developers.");
249
250 if (this->scalarsPerRow_ == 1)
251 mvgs.gatherMVtoView(Y_local, Y, blockRows);
252 else
253 mvgs.gatherMVtoViewBlock(Y_local, Y, blockRows, this->scalarsPerRow_);
254
255 // Apply the local operator:
256 // Y_local := beta*Y_local + alpha*M^{-1}*X_local
257 this->solveBlockMV(X_local, Y_local, blockIndex, mode,
258 InverseScalar(alpha), InverseScalar(beta));
259
260 // Scatter the permuted subset output vector Y_local back into the
261 // original output multivector Y.
262 if (this->scalarsPerRow_ == 1)
263 mvgs.scatterMVtoView(Y, Y_local, blockRows);
264 else
265 mvgs.scatterMVtoViewBlock(Y, Y_local, blockRows, this->scalarsPerRow_);
266}
267
268//==============================================================================
269template <class MatrixType, class InverseType>
271 weightedApply(ConstHostView X,
272 HostView Y,
273 ConstHostView D,
274 int blockIndex,
275 Teuchos::ETransp mode,
276 SC alpha,
277 SC beta) const {
278 using std::cerr;
279 using std::endl;
280 using Teuchos::ArrayView;
281 using Teuchos::Range1D;
282 typedef Teuchos::ScalarTraits<InverseScalar> STS;
283
284 // The InverseType template parameter might have different template
285 // parameters (Scalar, LO, GO, and/or Node) than MatrixType. For
286 // example, MatrixType (a global object) might use a bigger GO
287 // (global ordinal) type than InverseType (which deals with the
288 // diagonal block, a local object). This means that we might have
289 // to convert X and Y to the Tpetra::MultiVector specialization that
290 // InverseType wants. This class' X_ and Y_ internal fields are of
291 // the right type for InverseType, so we can use those as targets.
292
293 // Tpetra::Vector specialization corresponding to InverseType.
294 typedef Tpetra::Vector<InverseScalar, InverseLocalOrdinal, InverseGlobalOrdinal, InverseNode> inverse_vector_type;
295
297 const size_t numVecs = X.extent(1);
298
299 TEUCHOS_TEST_FOR_EXCEPTION(
300 !this->IsComputed_, std::runtime_error,
301 "Ifpack2::SparseContainer::"
302 "weightedApply: You must have called the compute() method before you may "
303 "call apply(). You may call the apply() method as many times as you want "
304 "after calling compute() once, but you must have called compute() at least "
305 "once.");
306 TEUCHOS_TEST_FOR_EXCEPTION(
307 X.extent(1) != Y.extent(1), std::runtime_error,
308 "Ifpack2::SparseContainer::weightedApply: X and Y have different numbers "
309 "of vectors. X has "
310 << X.extent(1) << ", but Y has "
311 << Y.extent(1) << ".");
312
313 // bmk 7-2019: BlockRelaxation already checked this, but if that changes...
314 TEUCHOS_TEST_FOR_EXCEPTION(
315 this->scalarsPerRow_ > 1, std::logic_error,
316 "Ifpack2::SparseContainer::weightedApply: "
317 "Use of block rows isn't allowed in overlapping Jacobi (the only method that uses weightedApply");
318
319 if (numVecs == 0) {
320 return; // done! nothing to do
321 }
322
323 // The operator Inverse_ works on a permuted subset of the local
324 // parts of X and Y. The subset and permutation are defined by the
325 // index array returned by getBlockRows(). If the permutation is
326 // trivial and the subset is exactly equal to the local indices,
327 // then we could use the local parts of X and Y exactly, without
328 // needing to permute. Otherwise, we have to use temporary storage
329 // to permute X and Y. For now, we always use temporary storage.
330 //
331 // Create temporary permuted versions of the input and output.
332 // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
333 // store the permuted versions of X resp. Y. Note that X_local has
334 // the domain Map of the operator, which may be a permuted subset of
335 // the local Map corresponding to X.getMap(). Similarly, Y_local
336 // has the range Map of the operator, which may be a permuted subset
337 // of the local Map corresponding to Y.getMap(). numRows here
338 // gives the number of rows in the row Map of the local Inverse_
339 // operator.
340 //
341 // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
342 // here that the row Map and the range Map of that operator are
343 // the same.
344 //
345 // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
346 // really belongs in Tpetra.
347 const LO numRows = this->blockSizes_[blockIndex];
348
349 if (invX.size() == 0) {
350 for (LO i = 0; i < this->numBlocks_; i++)
351 invX.emplace_back(Inverses_[i]->getDomainMap(), numVecs);
352 for (LO i = 0; i < this->numBlocks_; i++)
353 invY.emplace_back(Inverses_[i]->getDomainMap(), numVecs);
354 }
355 inverse_mv_type& X_local = invX[blockIndex];
356 const ArrayView<const LO> blockRows = this->getBlockRows(blockIndex);
357 mvgs.gatherMVtoView(X_local, X, blockRows);
358
359 // We must gather the output multivector Y even on input to
360 // Inverse_->apply(), since the Inverse_ operator might use it as an
361 // initial guess for a linear solve. We have no way of knowing
362 // whether it does or does not.
363
364 inverse_mv_type Y_local = invY[blockIndex];
365 TEUCHOS_TEST_FOR_EXCEPTION(
366 Y_local.getLocalLength() != size_t(numRows), std::logic_error,
367 "Ifpack2::SparseContainer::weightedApply: "
368 "Y_local has length "
369 << X_local.getLocalLength() << ", which does "
370 "not match numRows = "
371 << numRows << ". Please report this bug to "
372 "the Ifpack2 developers.");
373 mvgs.gatherMVtoView(Y_local, Y, blockRows);
374
375 // Apply the diagonal scaling D to the input X. It's our choice
376 // whether the result has the original input Map of X, or the
377 // permuted subset Map of X_local. If the latter, we also need to
378 // gather D into the permuted subset Map. We choose the latter, to
379 // save memory and computation. Thus, we do the following:
380 //
381 // 1. Gather D into a temporary vector D_local.
382 // 2. Create a temporary X_scaled to hold diag(D_local) * X_local.
383 // 3. Compute X_scaled := diag(D_loca) * X_local.
384
385 inverse_vector_type D_local(Inverses_[blockIndex]->getDomainMap());
386 TEUCHOS_TEST_FOR_EXCEPTION(
387 D_local.getLocalLength() != size_t(this->blockSizes_[blockIndex]), std::logic_error,
388 "Ifpack2::SparseContainer::weightedApply: "
389 "D_local has length "
390 << X_local.getLocalLength() << ", which does "
391 "not match numRows = "
392 << this->blockSizes_[blockIndex] << ". Please report this bug to "
393 "the Ifpack2 developers.");
394 mvgs.gatherMVtoView(D_local, D, blockRows);
395 inverse_mv_type X_scaled(Inverses_[blockIndex]->getDomainMap(), numVecs);
396 X_scaled.elementWiseMultiply(STS::one(), D_local, X_local, STS::zero());
397
398 // Y_temp will hold the result of M^{-1}*X_scaled. If beta == 0, we
399 // can write the result of Inverse_->apply() directly to Y_local, so
400 // Y_temp may alias Y_local. Otherwise, if beta != 0, we need
401 // temporary storage for M^{-1}*X_scaled, so Y_temp must be
402 // different than Y_local.
403 inverse_mv_type* Y_temp;
404 if (InverseScalar(beta) == STS::zero()) {
405 Y_temp = &Y_local;
406 } else {
407 Y_temp = new inverse_mv_type(Inverses_[blockIndex]->getRangeMap(), numVecs);
408 }
409 // Apply the local operator: Y_temp := M^{-1} * X_scaled
410 Inverses_[blockIndex]->apply(X_scaled, *Y_temp, mode);
411 // Y_local := beta * Y_local + alpha * diag(D_local) * Y_tmp.
412 //
413 // Note that we still use the permuted subset scaling D_local here,
414 // because Y_temp has the same permuted subset Map. That's good, in
415 // fact, because it's a subset: less data to read and multiply.
416 Y_local.elementWiseMultiply(alpha, D_local, *Y_temp, beta);
417 if (Y_temp != &Y_local)
418 delete Y_temp;
419 // Copy the permuted subset output vector Y_local into the original
420 // output multivector Y.
421 mvgs.scatterMVtoView(Y, Y_local, blockRows);
422}
423
424//==============================================================================
425template <class MatrixType, class InverseType>
426std::ostream& SparseContainer<MatrixType, InverseType>::print(std::ostream& os) const {
427 Teuchos::FancyOStream fos(Teuchos::rcp(&os, false));
428 fos.setOutputToRootOnly(0);
429 describe(fos);
430 return (os);
431}
432
433//==============================================================================
434template <class MatrixType, class InverseType>
436 std::ostringstream oss;
437 oss << "\"Ifpack2::SparseContainer\": {";
438 if (this->isInitialized()) {
439 if (this->isComputed()) {
440 oss << "status = initialized, computed";
441 } else {
442 oss << "status = initialized, not computed";
443 }
444 } else {
445 oss << "status = not initialized, not computed";
446 }
447 for (int i = 0; i < this->numBlocks_; i++) {
448 oss << ", Block Inverse " << i << ": {";
449 oss << Inverses_[i]->description();
450 oss << "}";
451 }
452 oss << "}";
453 return oss.str();
454}
455
456//==============================================================================
457template <class MatrixType, class InverseType>
458void SparseContainer<MatrixType, InverseType>::describe(Teuchos::FancyOStream& os, const Teuchos::EVerbosityLevel verbLevel) const {
459 using std::endl;
460 if (verbLevel == Teuchos::VERB_NONE) return;
461 os << "================================================================================" << endl;
462 os << "Ifpack2::SparseContainer" << endl;
463 for (int i = 0; i < this->numBlocks_; i++) {
464 os << "Block " << i << " rows: = " << this->blockSizes_[i] << endl;
465 }
466 os << "isInitialized() = " << this->IsInitialized_ << endl;
467 os << "isComputed() = " << this->IsComputed_ << endl;
468 os << "================================================================================" << endl;
469 os << endl;
470}
471
472//==============================================================================
473template <class MatrixType, class InverseType>
475 extract() {
476 using Teuchos::Array;
477 using Teuchos::ArrayView;
478 using Teuchos::RCP;
479 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
480 // To extract diagonal blocks, need to translate local rows to local columns.
481 // Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
482 // blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
483 // offset - blockOffsets_[b]: gives the column within block b.
484 //
485 // This provides the block and col within a block in O(1).
486 Teuchos::Array<InverseGlobalOrdinal> indicesToInsert;
487 Teuchos::Array<InverseScalar> valuesToInsert;
488 if (this->scalarsPerRow_ > 1) {
489 Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
490 for (int i = 0; i < this->numBlocks_; i++) {
491 // Get the interval where block i is defined in blockRows_
492 LO blockStart = this->blockOffsets_[i];
493 LO blockSize = this->blockSizes_[i];
494 LO blockPointSize = this->blockSizes_[i] * this->scalarsPerRow_;
495 LO blockEnd = blockStart + blockSize;
496 ArrayView<const LO> blockRows = this->getBlockRows(i);
497 // Set the lookup table entries for the columns appearing in block i.
498 // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
499 // this is OK. The values updated here are only needed to process block i's entries.
500 for (LO j = 0; j < blockSize; j++) {
501 LO localCol = this->translateRowToCol(blockRows[j]);
502 colToBlockOffset[localCol] = blockStart + j;
503 }
504 // First, count the number of entries in each row of block i
505 //(to allocate it with StaticProfile)
506 Array<size_t> rowEntryCounts(blockPointSize, 0);
507 // blockRow counts the BlockCrs LIDs that are going into this block
508 // Rows are inserted into the CrsMatrix in sequential order
509 using inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
510 using vals_type = typename block_crs_matrix_type::values_host_view_type;
511 for (LO blockRow = 0; blockRow < blockSize; blockRow++) {
512 // get a raw view of the whole block row
513 inds_type indices;
514 vals_type values;
515 LO inputRow = this->blockRows_[blockStart + blockRow];
516 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
517 LO numEntries = (LO)indices.size();
518 for (LO br = 0; br < this->bcrsBlockSize_; br++) {
519 for (LO k = 0; k < numEntries; k++) {
520 LO colOffset = colToBlockOffset[indices[k]];
521 if (blockStart <= colOffset && colOffset < blockEnd) {
522 rowEntryCounts[blockRow * this->bcrsBlockSize_ + br] += this->bcrsBlockSize_;
523 }
524 }
525 }
526 }
527 // now that row sizes are known, can allocate the diagonal matrix
528 RCP<InverseMap> tempMap(new InverseMap(blockPointSize, 0, this->localComm_));
529 diagBlocks_[i] = rcp(new InverseCrs(tempMap, rowEntryCounts));
530 Inverses_[i] = rcp(new InverseType(diagBlocks_[i]));
531 // insert the actual entries, one row at a time
532 for (LO blockRow = 0; blockRow < blockSize; blockRow++) {
533 // get a raw view of the whole block row
534 inds_type indices;
535 vals_type values;
536 LO inputRow = this->blockRows_[blockStart + blockRow];
537 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
538 LO numEntries = (LO)indices.size();
539 for (LO br = 0; br < this->bcrsBlockSize_; br++) {
540 indicesToInsert.clear();
541 valuesToInsert.clear();
542 for (LO k = 0; k < numEntries; k++) {
543 LO colOffset = colToBlockOffset[indices[k]];
544 if (blockStart <= colOffset && colOffset < blockEnd) {
545 LO blockCol = colOffset - blockStart;
546 // bc iterates over the columns in (block) entry k
547 for (LO bc = 0; bc < this->bcrsBlockSize_; bc++) {
548 indicesToInsert.push_back(blockCol * this->bcrsBlockSize_ + bc);
549 valuesToInsert.push_back(values[k * this->bcrsBlockSize_ * this->bcrsBlockSize_ + bc * this->bcrsBlockSize_ + br]);
550 }
551 }
552 }
553 InverseGlobalOrdinal rowToInsert = blockRow * this->bcrsBlockSize_ + br;
554 if (indicesToInsert.size())
555 diagBlocks_[i]->insertGlobalValues(rowToInsert, indicesToInsert(), valuesToInsert());
556 }
557 }
558 diagBlocks_[i]->fillComplete();
559 }
560 } else {
561 // get the mapping from point-indexed matrix columns to offsets in blockRows_
562 //(this includes regular CrsMatrix columns, in which case scalarsPerRow_ == 1)
563 Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
564 for (int i = 0; i < this->numBlocks_; i++) {
565 // Get the interval where block i is defined in blockRows_
566 LO blockStart = this->blockOffsets_[i];
567 LO blockSize = this->blockSizes_[i];
568 LO blockEnd = blockStart + blockSize;
569 ArrayView<const LO> blockRows = this->getBlockRows(i);
570 // Set the lookup table entries for the columns appearing in block i.
571 // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
572 // this is OK. The values updated here are only needed to process block i's entries.
573 for (LO j = 0; j < blockSize; j++) {
574 // translateRowToCol will return the corresponding split column
575 LO localCol = this->translateRowToCol(blockRows[j]);
576 colToBlockOffset[localCol] = blockStart + j;
577 }
578 Teuchos::Array<size_t> rowEntryCounts(blockSize, 0);
579 for (LO j = 0; j < blockSize; j++) {
580 rowEntryCounts[j] = this->getInputRowView(this->blockRows_[blockStart + j]).size();
581 }
582 RCP<InverseMap> tempMap(new InverseMap(blockSize, 0, this->localComm_));
583 diagBlocks_[i] = rcp(new InverseCrs(tempMap, rowEntryCounts));
584 Inverses_[i] = rcp(new InverseType(diagBlocks_[i]));
585 for (LO blockRow = 0; blockRow < blockSize; blockRow++) {
586 valuesToInsert.clear();
587 indicesToInsert.clear();
588 // get a view of the split row
589 LO inputSplitRow = this->blockRows_[blockStart + blockRow];
590 auto rowView = this->getInputRowView(inputSplitRow);
591 for (size_t k = 0; k < rowView.size(); k++) {
592 LO colOffset = colToBlockOffset[rowView.ind(k)];
593 if (blockStart <= colOffset && colOffset < blockEnd) {
594 LO blockCol = colOffset - blockStart;
595 indicesToInsert.push_back(blockCol);
596 valuesToInsert.push_back(rowView.val(k));
597 }
598 }
599 if (indicesToInsert.size())
600 diagBlocks_[i]->insertGlobalValues(blockRow, indicesToInsert(), valuesToInsert());
601 }
602 diagBlocks_[i]->fillComplete();
603 }
604 }
605}
606
607//==============================================================================
608template <class MatrixType, class InverseType>
609void SparseContainer<MatrixType, InverseType>::
610 extractGraph() {
611 using Teuchos::Array;
612 using Teuchos::ArrayView;
613 using Teuchos::RCP;
614 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
615 // To extract diagonal blocks, need to translate local rows to local columns.
616 // Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
617 // blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
618 // offset - blockOffsets_[b]: gives the column within block b.
619 //
620 // This provides the block and col within a block in O(1).
621 Teuchos::RCP<Teuchos::Time> timer =
622 Teuchos::TimeMonitor::getNewCounter("Ifpack2::SparseContainer::extractGraph");
623 Teuchos::TimeMonitor timeMon(*timer);
624
625 Teuchos::Array<InverseGlobalOrdinal> indicesToInsert;
626 if (this->scalarsPerRow_ > 1) {
627 Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
628 for (int i = 0; i < this->numBlocks_; i++) {
629 // Get the interval where block i is defined in blockRows_
630 LO blockStart = this->blockOffsets_[i];
631 LO blockSize = this->blockSizes_[i];
632 LO blockPointSize = this->blockSizes_[i] * this->scalarsPerRow_;
633 LO blockEnd = blockStart + blockSize;
634 ArrayView<const LO> blockRows = this->getBlockRows(i);
635 // Set the lookup table entries for the columns appearing in block i.
636 // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
637 // this is OK. The values updated here are only needed to process block i's entries.
638 for (LO j = 0; j < blockSize; j++) {
639 LO localCol = this->translateRowToCol(blockRows[j]);
640 colToBlockOffset[localCol] = blockStart + j;
641 }
642 // First, count the number of entries in each row of block i
643 //(to allocate it with StaticProfile)
644 Array<size_t> rowEntryCounts(blockPointSize, 0);
645 // blockRow counts the BlockCrs LIDs that are going into this block
646 // Rows are inserted into the CrsMatrix in sequential order
647 using inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
648 for (LO blockRow = 0; blockRow < blockSize; blockRow++) {
649 // get a raw view of the whole block row
650 inds_type indices;
651 LO inputRow = this->blockRows_[blockStart + blockRow];
652 this->inputBlockMatrix_->getGraph()->getLocalRowView(inputRow, indices);
653 LO numEntries = (LO)indices.size();
654 for (LO br = 0; br < this->bcrsBlockSize_; br++) {
655 for (LO k = 0; k < numEntries; k++) {
656 LO colOffset = colToBlockOffset[indices[k]];
657 if (blockStart <= colOffset && colOffset < blockEnd) {
658 rowEntryCounts[blockRow * this->bcrsBlockSize_ + br] += this->bcrsBlockSize_;
659 }
660 }
661 }
662 }
663 // now that row sizes are known, can allocate the diagonal matrix
664 RCP<InverseMap> tempMap(new InverseMap(blockPointSize, 0, this->localComm_));
665 auto diagGraph = rcp(new InverseGraph(tempMap, rowEntryCounts));
666 // insert the actual entries, one row at a time
667 for (LO blockRow = 0; blockRow < blockSize; blockRow++) {
668 // get a raw view of the whole block row
669 inds_type indices;
670 LO inputRow = this->blockRows_[blockStart + blockRow];
671 this->inputBlockMatrix_->getGraph()->getLocalRowView(inputRow, indices);
672 LO numEntries = (LO)indices.size();
673 for (LO br = 0; br < this->bcrsBlockSize_; br++) {
674 indicesToInsert.clear();
675 for (LO k = 0; k < numEntries; k++) {
676 LO colOffset = colToBlockOffset[indices[k]];
677 if (blockStart <= colOffset && colOffset < blockEnd) {
678 LO blockCol = colOffset - blockStart;
679 // bc iterates over the columns in (block) entry k
680 for (LO bc = 0; bc < this->bcrsBlockSize_; bc++) {
681 indicesToInsert.push_back(blockCol * this->bcrsBlockSize_ + bc);
682 }
683 }
684 }
685 InverseGlobalOrdinal rowToInsert = blockRow * this->bcrsBlockSize_ + br;
686 if (indicesToInsert.size())
687 diagGraph->insertGlobalIndices(rowToInsert, indicesToInsert());
688 }
689 }
690 diagGraph->fillComplete();
691
692 // create matrix block
693 diagBlocks_[i] = rcp(new InverseCrs(diagGraph));
694 Inverses_[i] = rcp(new InverseType(diagBlocks_[i]));
695 }
696 } else {
697 // get the mapping from point-indexed matrix columns to offsets in blockRows_
698 //(this includes regular CrsMatrix columns, in which case scalarsPerRow_ == 1)
699 Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
700 for (int i = 0; i < this->numBlocks_; i++) {
701 // Get the interval where block i is defined in blockRows_
702 LO blockStart = this->blockOffsets_[i];
703 LO blockSize = this->blockSizes_[i];
704 LO blockEnd = blockStart + blockSize;
705 ArrayView<const LO> blockRows = this->getBlockRows(i);
706 // Set the lookup table entries for the columns appearing in block i.
707 // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
708 // this is OK. The values updated here are only needed to process block i's entries.
709 for (LO j = 0; j < blockSize; j++) {
710 // translateRowToCol will return the corresponding split column
711 LO localCol = this->translateRowToCol(blockRows[j]);
712 colToBlockOffset[localCol] = blockStart + j;
713 }
714 Teuchos::Array<size_t> rowEntryCounts(blockSize, 0);
715 for (LO j = 0; j < blockSize; j++) {
716 rowEntryCounts[j] = this->getInputRowView(this->blockRows_[blockStart + j]).size();
717 }
718 RCP<InverseMap> tempMap(new InverseMap(blockSize, 0, this->localComm_));
719 auto diagGraph = rcp(new InverseGraph(tempMap, rowEntryCounts));
720 for (LO blockRow = 0; blockRow < blockSize; blockRow++) {
721 indicesToInsert.clear();
722 // get a view of the split row
723 LO inputSplitRow = this->blockRows_[blockStart + blockRow];
724 auto rowView = this->getInputRowView(inputSplitRow);
725 for (size_t k = 0; k < rowView.size(); k++) {
726 LO colOffset = colToBlockOffset[rowView.ind(k)];
727 if (blockStart <= colOffset && colOffset < blockEnd) {
728 LO blockCol = colOffset - blockStart;
729 indicesToInsert.push_back(blockCol);
730 }
731 }
732 if (indicesToInsert.size())
733 diagGraph->insertGlobalIndices(blockRow, indicesToInsert());
734 }
735 diagGraph->fillComplete();
736
737 // create matrix block
738 diagBlocks_[i] = rcp(new InverseCrs(diagGraph));
739 Inverses_[i] = rcp(new InverseType(diagBlocks_[i]));
740 }
741 }
742}
743
744//==============================================================================
745template <class MatrixType, class InverseType>
746void SparseContainer<MatrixType, InverseType>::
747 extractValues() {
748 using Teuchos::Array;
749 using Teuchos::ArrayView;
750 using Teuchos::RCP;
751 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
752 // To extract diagonal blocks, need to translate local rows to local columns.
753 // Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
754 // blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
755 // offset - blockOffsets_[b]: gives the column within block b.
756 //
757 // This provides the block and col within a block in O(1).
758 Teuchos::RCP<Teuchos::Time> timer =
759 Teuchos::TimeMonitor::getNewCounter("Ifpack2::SparseContainer::extractValues");
760 Teuchos::TimeMonitor timeMon(*timer);
761
762 Teuchos::Array<InverseGlobalOrdinal> indicesToInsert;
763 Teuchos::Array<InverseScalar> valuesToInsert;
764 if (this->scalarsPerRow_ > 1) {
765 Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
766 for (int i = 0; i < this->numBlocks_; i++) {
767 // Get the interval where block i is defined in blockRows_
768 LO blockStart = this->blockOffsets_[i];
769 LO blockSize = this->blockSizes_[i];
770 LO blockEnd = blockStart + blockSize;
771 ArrayView<const LO> blockRows = this->getBlockRows(i);
772 // Set the lookup table entries for the columns appearing in block i.
773 // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
774 // this is OK. The values updated here are only needed to process block i's entries.
775 for (LO j = 0; j < blockSize; j++) {
776 LO localCol = this->translateRowToCol(blockRows[j]);
777 colToBlockOffset[localCol] = blockStart + j;
778 }
779 using inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
780 using vals_type = typename block_crs_matrix_type::values_host_view_type;
781 // insert the actual entries, one row at a time
782 diagBlocks_[i]->resumeFill();
783 for (LO blockRow = 0; blockRow < blockSize; blockRow++) {
784 // get a raw view of the whole block row
785 inds_type indices;
786 vals_type values;
787 LO inputRow = this->blockRows_[blockStart + blockRow];
788 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
789 LO numEntries = (LO)indices.size();
790 for (LO br = 0; br < this->bcrsBlockSize_; br++) {
791 indicesToInsert.clear();
792 valuesToInsert.clear();
793 for (LO k = 0; k < numEntries; k++) {
794 LO colOffset = colToBlockOffset[indices[k]];
795 if (blockStart <= colOffset && colOffset < blockEnd) {
796 LO blockCol = colOffset - blockStart;
797 // bc iterates over the columns in (block) entry k
798 for (LO bc = 0; bc < this->bcrsBlockSize_; bc++) {
799 indicesToInsert.push_back(blockCol * this->bcrsBlockSize_ + bc);
800 valuesToInsert.push_back(values[k * this->bcrsBlockSize_ * this->bcrsBlockSize_ + bc * this->bcrsBlockSize_ + br]);
801 }
802 }
803 }
804 InverseGlobalOrdinal rowToInsert = blockRow * this->bcrsBlockSize_ + br;
805 if (indicesToInsert.size())
806 diagBlocks_[i]->replaceGlobalValues(rowToInsert, indicesToInsert(), valuesToInsert());
807 }
808 }
809 diagBlocks_[i]->fillComplete();
810 }
811 } else {
812 // get the mapping from point-indexed matrix columns to offsets in blockRows_
813 //(this includes regular CrsMatrix columns, in which case scalarsPerRow_ == 1)
814 Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
815 for (int i = 0; i < this->numBlocks_; i++) {
816 // Get the interval where block i is defined in blockRows_
817 LO blockStart = this->blockOffsets_[i];
818 LO blockSize = this->blockSizes_[i];
819 LO blockEnd = blockStart + blockSize;
820 ArrayView<const LO> blockRows = this->getBlockRows(i);
821 // Set the lookup table entries for the columns appearing in block i.
822 // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
823 // this is OK. The values updated here are only needed to process block i's entries.
824 for (LO j = 0; j < blockSize; j++) {
825 // translateRowToCol will return the corresponding split column
826 LO localCol = this->translateRowToCol(blockRows[j]);
827 colToBlockOffset[localCol] = blockStart + j;
828 }
829 diagBlocks_[i]->resumeFill();
830 for (LO blockRow = 0; blockRow < blockSize; blockRow++) {
831 valuesToInsert.clear();
832 indicesToInsert.clear();
833 // get a view of the split row
834 LO inputSplitRow = this->blockRows_[blockStart + blockRow];
835 auto rowView = this->getInputRowView(inputSplitRow);
836 for (size_t k = 0; k < rowView.size(); k++) {
837 LO colOffset = colToBlockOffset[rowView.ind(k)];
838 if (blockStart <= colOffset && colOffset < blockEnd) {
839 LO blockCol = colOffset - blockStart;
840 indicesToInsert.push_back(blockCol);
841 valuesToInsert.push_back(rowView.val(k));
842 }
843 }
844 if (indicesToInsert.size())
845 diagBlocks_[i]->replaceGlobalValues(blockRow, indicesToInsert, valuesToInsert);
846 }
847 diagBlocks_[i]->fillComplete();
848 }
849 }
850}
851
852template <typename MatrixType, typename InverseType>
854 typedef ILUT<Tpetra::RowMatrix<SC, LO, GO, NO>> ILUTInverse;
855#ifdef HAVE_IFPACK2_AMESOS2
857 if (std::is_same<InverseType, ILUTInverse>::value) {
858 return "SparseILUT";
859 } else if (std::is_same<InverseType, AmesosInverse>::value) {
860 return "SparseAmesos";
861 } else {
862 throw std::logic_error("InverseType for SparseContainer must be Ifpack2::ILUT or Details::Amesos2Wrapper");
863 }
864#else
865 // Macros can't have commas in their arguments, so we have to
866 // compute the bool first argument separately.
867 constexpr bool inverseTypeIsILUT = std::is_same<InverseType, ILUTInverse>::value;
868 TEUCHOS_TEST_FOR_EXCEPTION(!inverseTypeIsILUT, std::logic_error,
869 "InverseType for SparseContainer must be Ifpack2::ILUT<ROW>");
870 return "SparseILUT"; // the only supported sparse container specialization if no Amesos2
871#endif
872}
873
874} // namespace Ifpack2
875
876// For ETI
877#include "Ifpack2_ILUT.hpp"
878#ifdef HAVE_IFPACK2_AMESOS2
879#include "Ifpack2_Details_Amesos2Wrapper.hpp"
880#endif
881
882// There's no need to instantiate for CrsMatrix too. All Ifpack2
883// preconditioners can and should do dynamic casts if they need a type
884// more specific than RowMatrix.
885
886#ifdef HAVE_IFPACK2_AMESOS2
887#define IFPACK2_SPARSECONTAINER_INSTANT(S, LO, GO, N) \
888 template class Ifpack2::SparseContainer<Tpetra::RowMatrix<S, LO, GO, N>, \
889 Ifpack2::ILUT<Tpetra::RowMatrix<S, LO, GO, N>>>; \
890 template class Ifpack2::SparseContainer<Tpetra::RowMatrix<S, LO, GO, N>, \
891 Ifpack2::Details::Amesos2Wrapper<Tpetra::RowMatrix<S, LO, GO, N>>>;
892#else
893#define IFPACK2_SPARSECONTAINER_INSTANT(S, LO, GO, N) \
894 template class Ifpack2::SparseContainer<Tpetra::RowMatrix<S, LO, GO, N>, \
895 Ifpack2::ILUT<Tpetra::RowMatrix<S, LO, GO, N>>>;
896#endif
897#endif // IFPACK2_SPARSECONTAINER_DEF_HPP
Ifpack2::SparseContainer class declaration.
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS)....
Definition Ifpack2_Container_decl.hpp:311
typename mv_type::dual_view_type::t_host HostView
Definition Ifpack2_Container_decl.hpp:109
Ifpack2's implementation of Trilinos::Details::LinearSolver interface.
Definition Ifpack2_Details_LinearSolver_decl.hpp:75
ILUT (incomplete LU factorization with threshold) of a Tpetra sparse matrix.
Definition Ifpack2_ILUT_decl.hpp:67
Store and solve a local sparse linear problem.
Definition Ifpack2_SparseContainer_decl.hpp:102
virtual void apply(ConstHostView X, HostView Y, int blockIndex, Teuchos::ETransp mode=Teuchos::NO_TRANS, SC alpha=Teuchos::ScalarTraits< SC >::one(), SC beta=Teuchos::ScalarTraits< SC >::zero()) const
Compute Y := alpha * M^{-1} X + beta*Y.
Definition Ifpack2_SparseContainer_def.hpp:145
SparseContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
Definition Ifpack2_SparseContainer_def.hpp:27
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to the given FancyOStream.
Definition Ifpack2_SparseContainer_def.hpp:458
virtual void weightedApply(ConstHostView X, HostView Y, ConstHostView W, int blockIndex, Teuchos::ETransp mode=Teuchos::NO_TRANS, SC alpha=Teuchos::ScalarTraits< SC >::one(), SC beta=Teuchos::ScalarTraits< SC >::zero()) const
Compute Y := alpha * diag(D) * M^{-1} (diag(D) * X) + beta*Y.
Definition Ifpack2_SparseContainer_def.hpp:271
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition Ifpack2_SparseContainer_def.hpp:853
virtual void initialize()
Do all set-up operations that only require matrix structure.
Definition Ifpack2_SparseContainer_def.hpp:54
void clearBlocks()
Definition Ifpack2_SparseContainer_def.hpp:107
virtual void compute()
Initialize and compute all blocks.
Definition Ifpack2_SparseContainer_def.hpp:84
virtual ~SparseContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition Ifpack2_SparseContainer_def.hpp:44
virtual std::ostream & print(std::ostream &os) const
Print information about this object to the given output stream.
Definition Ifpack2_SparseContainer_def.hpp:426
virtual std::string description() const
A one-line description of this object.
Definition Ifpack2_SparseContainer_def.hpp:435
virtual void setParameters(const Teuchos::ParameterList &List)
Set all necessary parameters.
Definition Ifpack2_SparseContainer_def.hpp:48
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40