Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_TriDiContainer_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_TRIDICONTAINER_DEF_HPP
11#define IFPACK2_TRIDICONTAINER_DEF_HPP
12
13#include "Teuchos_LAPACK.hpp"
15
16#ifdef HAVE_MPI
17#include <mpi.h>
18#include "Teuchos_DefaultMpiComm.hpp"
19#else
20#include "Teuchos_DefaultSerialComm.hpp"
21#endif // HAVE_MPI
22
23namespace Ifpack2 {
24
25template <class MatrixType, class LocalScalarType>
27 TriDiContainer(const Teuchos::RCP<const row_matrix_type>& matrix,
28 const Teuchos::Array<Teuchos::Array<LO> >& partitions,
29 const Teuchos::RCP<const import_type>&,
30 bool pointIndexed)
31 : ContainerImpl<MatrixType, LocalScalarType>(matrix, partitions, pointIndexed)
32 , ipiv_(this->blockRows_.size() * this->scalarsPerRow_)
33 , scalarOffsets_(this->numBlocks_) {
34 TEUCHOS_TEST_FOR_EXCEPTION(
35 !matrix->hasColMap(), std::invalid_argument,
36 "Ifpack2::TriDiContainer: "
37 "The constructor's input matrix must have a column Map.");
38 // FIXME (mfh 25 Aug 2013) What if the matrix's row Map has a
39 // different index base than zero?
40 // compute scalar array offsets (probably different from blockOffsets_)
41 LO scalarTotal = 0;
42 for (LO i = 0; i < this->numBlocks_; i++) {
43 scalarOffsets_[i] = scalarTotal;
44 // actualBlockRows is how many scalars it takes to store the diagonal for block i
45 LO actualBlockRows = this->scalarsPerRow_ * this->blockSizes_[i];
46 if (actualBlockRows == 1) {
47 scalarTotal += 1;
48 } else {
49 // this formula is exact for any block size of 1 or more
50 // includes 1 subdiagonal and 2 superdiagonals.
51 scalarTotal += 4 * (actualBlockRows - 1);
52 }
53 }
54 // Allocate scalar arrays
55 scalars_.resize(scalarTotal);
56 diagBlocks_.reserve(this->numBlocks_);
57 for (int i = 0; i < this->numBlocks_; i++) {
58 diagBlocks_.emplace_back(Teuchos::View, scalars_.data() + scalarOffsets_[i], this->blockSizes_[i] * this->scalarsPerRow_);
59 }
60}
61
62template <class MatrixType, class LocalScalarType>
65
66template <class MatrixType, class LocalScalarType>
68 for (int i = 0; i < this->numBlocks_; i++)
69 diagBlocks_[i].putScalar(Teuchos::ScalarTraits<LSC>::zero());
70 this->IsInitialized_ = true;
71 // We assume that if you called this method, you intend to recompute
72 // everything.
73 this->IsComputed_ = false;
74}
75
76template <class MatrixType, class LocalScalarType>
79 TEUCHOS_TEST_FOR_EXCEPTION(
80 ipiv_.size() != this->blockRows_.size() * this->scalarsPerRow_, std::logic_error,
81 "Ifpack2::TriDiContainer::compute: ipiv_ array has the wrong size. "
82 "Please report this bug to the Ifpack2 developers.");
83 }
84
85 this->IsComputed_ = false;
86 if (!this->isInitialized()) {
87 this->initialize();
88 }
89 extract(); // extract the submatrices
90 factor(); // factor them
91 this->IsComputed_ = true;
92}
93
94template <class MatrixType, class LocalScalarType>
96 using Teuchos::Array;
97 using Teuchos::ArrayView;
98 SC zero = Teuchos::ScalarTraits<SC>::zero();
99 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
100 // To extract diagonal blocks, need to translate local rows to local columns.
101 // Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
102 // blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
103 // offset - blockOffsets_[b]: gives the column within block b.
104 //
105 // This provides the block and col within a block in O(1).
106 if (this->scalarsPerRow_ > 1) {
107 Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
108 for (int i = 0; i < this->numBlocks_; i++) {
109 // Get the interval where block i is defined in blockRows_
110 LO blockStart = this->blockOffsets_[i];
111 LO blockEnd = blockStart + this->blockSizes_[i];
112 ArrayView<const LO> blockRows = this->getBlockRows(i);
113 // Set the lookup table entries for the columns appearing in block i.
114 // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
115 // this is OK. The values updated here are only needed to process block i's entries.
116 for (size_t j = 0; j < size_t(blockRows.size()); j++) {
117 LO localCol = this->translateRowToCol(blockRows[j]);
118 colToBlockOffset[localCol] = blockStart + j;
119 }
120 using h_inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
121 using h_vals_type = typename block_crs_matrix_type::values_host_view_type;
122 for (LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++) {
123 // get a raw view of the whole block row
124 h_inds_type indices;
125 h_vals_type values;
126 LO inputRow = this->blockRows_[blockStart + blockRow];
127 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
128 LO numEntries = (LO)indices.size();
129 for (LO k = 0; k < numEntries; k++) {
130 LO colOffset = colToBlockOffset[indices[k]];
131 if (blockStart <= colOffset && colOffset < blockEnd) {
132 // This entry does appear in the diagonal block.
133 //(br, bc) identifies the scalar's position in the BlockCrs block.
134 // Convert this to (r, c) which is its position in the container block.
135 LO blockCol = colOffset - blockStart;
136 for (LO bc = 0; bc < this->bcrsBlockSize_; bc++) {
137 for (LO br = 0; br < this->bcrsBlockSize_; br++) {
138 LO r = this->bcrsBlockSize_ * blockRow + br;
139 LO c = this->bcrsBlockSize_ * blockCol + bc;
140 auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
141 if (val != zero)
142 diagBlocks_[i](r, c) = val;
143 }
144 }
145 }
146 }
147 }
148 }
149 } else {
150 // get the mapping from point-indexed matrix columns to offsets in blockRows_
151 //(this includes regular CrsMatrix columns, in which case bcrsBlockSize_ == 1)
152 Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
153 for (int i = 0; i < this->numBlocks_; i++) {
154 // Get the interval where block i is defined in blockRows_
155 LO blockStart = this->blockOffsets_[i];
156 LO blockEnd = blockStart + this->blockSizes_[i];
157 ArrayView<const LO> blockRows = this->getBlockRows(i);
158 // Set the lookup table entries for the columns appearing in block i.
159 // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
160 // this is OK. The values updated here are only needed to process block i's entries.
161 for (size_t j = 0; j < size_t(blockRows.size()); j++) {
162 // translateRowToCol will return the corresponding split column
163 LO localCol = this->translateRowToCol(blockRows[j]);
164 colToBlockOffset[localCol] = blockStart + j;
165 }
166 for (size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++) {
167 // get a view of the split row
168 LO inputPointRow = this->blockRows_[blockStart + blockRow];
169 auto rowView = this->getInputRowView(inputPointRow);
170 for (size_t k = 0; k < rowView.size(); k++) {
171 LO colOffset = colToBlockOffset[rowView.ind(k)];
172 if (blockStart <= colOffset && colOffset < blockEnd) {
173 LO blockCol = colOffset - blockStart;
174 auto val = rowView.val(k);
175 if (val != zero)
176 diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
177 }
178 }
179 }
180 }
181 }
182}
183
184template <class MatrixType, class LocalScalarType>
185void TriDiContainer<MatrixType, LocalScalarType>::clearBlocks() {
186 diagBlocks_.clear();
187 scalars_.clear();
188 ContainerImpl<MatrixType, LocalScalarType>::clearBlocks();
189}
190
191template <class MatrixType, class LocalScalarType>
192void TriDiContainer<MatrixType, LocalScalarType>::factor() {
193 for (int i = 0; i < this->numBlocks_; i++) {
194 Teuchos::LAPACK<int, LSC> lapack;
195 int INFO = 0;
196 int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
197 lapack.GTTRF(diagBlocks_[i].numRowsCols(),
198 diagBlocks_[i].DL(),
199 diagBlocks_[i].D(),
200 diagBlocks_[i].DU(),
201 diagBlocks_[i].DU2(),
202 blockIpiv, &INFO);
203 // INFO < 0 is a bug.
204 TEUCHOS_TEST_FOR_EXCEPTION(
205 INFO < 0, std::logic_error,
206 "Ifpack2::TriDiContainer::factor: "
207 "LAPACK's _GTTRF (LU factorization with partial pivoting) was called "
208 "incorrectly. INFO = "
209 << INFO << " < 0. "
210 "Please report this bug to the Ifpack2 developers.");
211 // INFO > 0 means the matrix is singular. This is probably an issue
212 // either with the choice of rows the rows we extracted, or with the
213 // input matrix itself.
214 TEUCHOS_TEST_FOR_EXCEPTION(
215 INFO > 0, std::runtime_error,
216 "Ifpack2::TriDiContainer::factor: "
217 "LAPACK's _GTTRF (LU factorization with partial pivoting) reports that the "
218 "computed U factor is exactly singular. U("
219 << INFO << "," << INFO << ") "
220 "(one-based index i) is exactly zero. This probably means that the input "
221 "matrix has a singular diagonal block.");
222 }
223}
224
225template <class MatrixType, class LocalScalarType>
227 solveBlock(ConstHostSubviewLocal X,
228 HostSubviewLocal Y,
229 int blockIndex,
230 Teuchos::ETransp mode,
231 LSC alpha,
232 LSC beta) const {
233 LSC zero = Teuchos::ScalarTraits<LSC>::zero();
234 size_t numVecs = X.extent(1);
235 size_t numRows = X.extent(0);
236
238 TEUCHOS_TEST_FOR_EXCEPTION(
239 X.extent(0) != Y.extent(0),
240 std::logic_error,
241 "Ifpack2::TriDiContainer::solveBlock: X and Y have "
242 "incompatible dimensions ("
243 << X.extent(0) << " resp. "
244 << Y.extent(0) << "). Please report this bug to "
245 "the Ifpack2 developers.");
246 TEUCHOS_TEST_FOR_EXCEPTION(
247 X.extent(0) != static_cast<size_t>(diagBlocks_[blockIndex].numRowsCols()),
248 std::logic_error,
249 "Ifpack2::TriDiContainer::solveBlock: The input "
250 "multivector X has incompatible dimensions from those of the "
251 "inverse operator ("
252 << X.extent(0) << " vs. "
253 << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols() : diagBlocks_[blockIndex].numRowsCols())
254 << "). Please report this bug to the Ifpack2 developers.");
255 TEUCHOS_TEST_FOR_EXCEPTION(
256 Y.extent(0) != static_cast<size_t>(diagBlocks_[blockIndex].numRowsCols()),
257 std::logic_error,
258 "Ifpack2::TriDiContainer::solveBlock: The output "
259 "multivector Y has incompatible dimensions from those of the "
260 "inverse operator ("
261 << Y.extent(0) << " vs. "
262 << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols() : diagBlocks_[blockIndex].numRowsCols())
263 << "). Please report this bug to the Ifpack2 developers.");
264 }
265
266 if (alpha == zero) { // don't need to solve the linear system
267 if (beta == zero) {
268 // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
269 // any Inf or NaN values in Y (rather than multiplying them by
270 // zero, resulting in NaN values).
271 for (size_t j = 0; j < numVecs; j++)
272 for (size_t i = 0; i < numRows; i++)
273 Y(i, j) = zero;
274 } else { // beta != 0
275 for (size_t j = 0; j < numVecs; j++)
276 for (size_t i = 0; i < numRows; i++)
277 Y(i, j) *= ISC(beta);
278 }
279 } else { // alpha != 0; must solve the linear system
280 Teuchos::LAPACK<int, LSC> lapack;
281 // If beta is nonzero or Y is not constant stride, we have to use
282 // a temporary output multivector. It gets a copy of X, since
283 // GETRS overwrites its (multi)vector input with its output.
284
285 std::vector<LSC> yTemp(numVecs * numRows);
286 for (size_t j = 0; j < numVecs; j++) {
287 for (size_t i = 0; i < numRows; i++)
288 yTemp[j * numRows + i] = X(i, j);
289 }
290
291 int INFO = 0;
292 const char trans =
293 (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
294 int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
295 lapack.GTTRS(trans,
296 diagBlocks_[blockIndex].numRowsCols(),
297 numVecs,
298 diagBlocks_[blockIndex].DL(),
299 diagBlocks_[blockIndex].D(),
300 diagBlocks_[blockIndex].DU(),
301 diagBlocks_[blockIndex].DU2(),
302 blockIpiv,
303 yTemp.data(),
304 numRows,
305 &INFO);
306
307 if (beta != zero) {
308 for (size_t j = 0; j < numVecs; j++) {
309 for (size_t i = 0; i < numRows; i++) {
310 Y(i, j) *= ISC(beta);
311 Y(i, j) += ISC(alpha * yTemp[j * numRows + i]);
312 }
313 }
314 } else {
315 for (size_t j = 0; j < numVecs; j++) {
316 for (size_t i = 0; i < numRows; i++)
317 Y(i, j) = ISC(yTemp[j * numRows + i]);
318 }
319 }
320
321 TEUCHOS_TEST_FOR_EXCEPTION(
322 INFO != 0, std::runtime_error,
323 "Ifpack2::TriDiContainer::solveBlock: "
324 "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
325 "failed with INFO = "
326 << INFO << " != 0.");
327 }
328}
329
330template <class MatrixType, class LocalScalarType>
331std::ostream& TriDiContainer<MatrixType, LocalScalarType>::print(std::ostream& os) const {
332 Teuchos::FancyOStream fos(Teuchos::rcp(&os, false));
333 fos.setOutputToRootOnly(0);
334 describe(fos);
335 return (os);
336}
337
338template <class MatrixType, class LocalScalarType>
340 std::ostringstream oss;
341 oss << Teuchos::Describable::description();
342 if (this->isInitialized()) {
343 if (this->isComputed()) {
344 oss << "{status = initialized, computed";
345 } else {
346 oss << "{status = initialized, not computed";
347 }
348 } else {
349 oss << "{status = not initialized, not computed";
350 }
351
352 oss << "}";
353 return oss.str();
354}
355
356template <class MatrixType, class LocalScalarType>
358 describe(Teuchos::FancyOStream& os,
359 const Teuchos::EVerbosityLevel verbLevel) const {
360 using std::endl;
361 if (verbLevel == Teuchos::VERB_NONE) return;
362 os << "================================================================================" << endl;
363 os << "Ifpack2::TriDiContainer" << endl;
364 os << "Number of blocks = " << this->numBlocks_ << endl;
365 os << "isInitialized() = " << this->IsInitialized_ << endl;
366 os << "isComputed() = " << this->IsComputed_ << endl;
367 os << "================================================================================" << endl;
368 os << endl;
369}
370
371template <class MatrixType, class LocalScalarType>
373 return "TriDi";
374}
375
376#define IFPACK2_TRIDICONTAINER_INSTANT(S, LO, GO, N) \
377 template class Ifpack2::TriDiContainer<Tpetra::RowMatrix<S, LO, GO, N>, S>;
378
379} // namespace Ifpack2
380
381#endif
Declaration of Ifpack2::Details::Behavior, a class that describes Ifpack2's run-time behavior.
int numBlocks_
The number of blocks (partitions) in the container.
Definition Ifpack2_Container_decl.hpp:257
LO scalarsPerRow_
Definition Ifpack2_Container_decl.hpp:282
Teuchos::Array< LO > blockSizes_
Number of rows in each block.
Definition Ifpack2_Container_decl.hpp:261
typename KokkosKernels::ArithTraits< SC >::val_type ISC
Internal representation of Scalar in Kokkos::View.
Definition Ifpack2_Container_decl.hpp:101
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS)....
Definition Ifpack2_Container_decl.hpp:307
static bool debug()
Whether Ifpack2 is in debug mode.
Definition Ifpack2_Details_Behavior.cpp:31
Store and solve a local TriDi linear problem.
Definition Ifpack2_TriDiContainer_decl.hpp:73
virtual std::string description() const
A one-line description of this object.
Definition Ifpack2_TriDiContainer_def.hpp:339
virtual void initialize()
Do all set-up operations that only require matrix structure.
Definition Ifpack2_TriDiContainer_def.hpp:67
virtual void compute()
Extract the local diagonal block and prepare the solver.
Definition Ifpack2_TriDiContainer_def.hpp:77
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to the given FancyOStream.
Definition Ifpack2_TriDiContainer_def.hpp:358
virtual ~TriDiContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition Ifpack2_TriDiContainer_def.hpp:64
virtual std::ostream & print(std::ostream &os) const
Print information about this object to the given output stream.
Definition Ifpack2_TriDiContainer_def.hpp:331
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition Ifpack2_TriDiContainer_def.hpp:372
TriDiContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
Definition Ifpack2_TriDiContainer_def.hpp:27
void solveBlock(ConstHostSubviewLocal X, HostSubviewLocal Y, int blockIndex, Teuchos::ETransp mode, LSC alpha, LSC beta) const
Definition Ifpack2_TriDiContainer_def.hpp:227
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40