Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_DenseContainer_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_DENSECONTAINER_DEF_HPP
11#define IFPACK2_DENSECONTAINER_DEF_HPP
12
13#include "Tpetra_CrsMatrix.hpp"
14#include "Teuchos_LAPACK.hpp"
15#include "Tpetra_BlockMultiVector.hpp"
16
17#ifdef HAVE_MPI
18#include <mpi.h>
19#include "Teuchos_DefaultMpiComm.hpp"
20#else
21#include "Teuchos_DefaultSerialComm.hpp"
22#endif // HAVE_MPI
23
24namespace Ifpack2 {
25
26template <class MatrixType, class LocalScalarType>
28 DenseContainer(const Teuchos::RCP<const row_matrix_type>& matrix,
29 const Teuchos::Array<Teuchos::Array<LO> >& partitions,
30 const Teuchos::RCP<const import_type>&,
31 bool pointIndexed)
32 : ContainerImpl<MatrixType, LocalScalarType>(matrix, partitions, pointIndexed)
33 , scalarOffsets_(this->numBlocks_) {
34 TEUCHOS_TEST_FOR_EXCEPTION(
35 !matrix->hasColMap(), std::invalid_argument,
36 "Ifpack2::DenseContainer: "
37 "The constructor's input matrix must have a column Map.");
38
39 // compute scalarOffsets_
40 GO totalScalars = 0;
41 for (LO i = 0; i < this->numBlocks_; i++) {
42 scalarOffsets_[i] = totalScalars;
43 totalScalars += this->blockSizes_[i] * this->scalarsPerRow_ *
44 this->blockSizes_[i] * this->scalarsPerRow_;
45 }
46 scalars_.resize(totalScalars);
47 for (int i = 0; i < this->numBlocks_; i++) {
48 LO denseRows = this->blockSizes_[i] * this->scalarsPerRow_;
49 // create square dense matrix (stride is same as rows and cols)
50 diagBlocks_.emplace_back(Teuchos::View, scalars_.data() + scalarOffsets_[i], denseRows, denseRows, denseRows);
51 }
52
53 ipiv_.resize(this->blockRows_.size() * this->scalarsPerRow_);
54}
55
56template <class MatrixType, class LocalScalarType>
59
60template <class MatrixType, class LocalScalarType>
63 // Fill the diagonal block and LU permutation array with zeros.
64 for (int i = 0; i < this->numBlocks_; i++)
65 diagBlocks_[i].putScalar(Teuchos::ScalarTraits<LSC>::zero());
66 std::fill(ipiv_.begin(), ipiv_.end(), 0);
67
68 this->IsInitialized_ = true;
69 // We assume that if you called this method, you intend to recompute
70 // everything.
71 this->IsComputed_ = false;
72}
73
74template <class MatrixType, class LocalScalarType>
76 compute() {
77 // FIXME: I am commenting this out because it breaks block CRS support
78 // TEUCHOS_TEST_FOR_EXCEPTION(
79 // static_cast<size_t> (ipiv_.size ()) != numRows_, std::logic_error,
80 // "Ifpack2::DenseContainer::compute: ipiv_ array has the wrong size. "
81 // "Please report this bug to the Ifpack2 developers.");
82
83 this->IsComputed_ = false;
84 if (!this->isInitialized()) {
85 this->initialize();
86 }
87
88 extract(); // Extract the submatrices
89 factor(); // Factor them
90 this->IsComputed_ = true;
91}
92
93template <class MatrixType, class LocalScalarType>
95 using Teuchos::Array;
96 using Teuchos::ArrayView;
97 using STS = Teuchos::ScalarTraits<SC>;
98 SC zero = STS::zero();
99 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
100 // To extract diagonal blocks, need to translate local rows to local columns.
101 // Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
102 // blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
103 // offset - blockOffsets_[b]: gives the column within block b.
104 //
105 // This provides the block and col within a block in O(1).
106 if (this->scalarsPerRow_ > 1) {
107 Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
108 for (int i = 0; i < this->numBlocks_; i++) {
109 // Get the interval where block i is defined in blockRows_
110 LO blockStart = this->blockOffsets_[i];
111 LO blockEnd = blockStart + this->blockSizes_[i];
112 ArrayView<const LO> blockRows = this->getBlockRows(i);
113 // Set the lookup table entries for the columns appearing in block i.
114 // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
115 // this is OK. The values updated here are only needed to process block i's entries.
116 for (size_t j = 0; j < size_t(blockRows.size()); j++) {
117 LO localCol = this->translateRowToCol(blockRows[j]);
118 colToBlockOffset[localCol] = blockStart + j;
119 }
120 using h_inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
121 using h_vals_type = typename block_crs_matrix_type::values_host_view_type;
122 for (LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++) {
123 // get a raw view of the whole block row
124 h_inds_type indices;
125 h_vals_type values;
126 LO inputRow = this->blockRows_[blockStart + blockRow];
127 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
128 LO numEntries = (LO)indices.size();
129 for (LO k = 0; k < numEntries; k++) {
130 LO colOffset = colToBlockOffset[indices[k]];
131 if (blockStart <= colOffset && colOffset < blockEnd) {
132 // This entry does appear in the diagonal block.
133 //(br, bc) identifies the scalar's position in the BlockCrs block.
134 // Convert this to (r, c) which is its position in the container block.
135 LO blockCol = colOffset - blockStart;
136 for (LO bc = 0; bc < this->bcrsBlockSize_; bc++) {
137 for (LO br = 0; br < this->bcrsBlockSize_; br++) {
138 LO r = this->bcrsBlockSize_ * blockRow + br;
139 LO c = this->bcrsBlockSize_ * blockCol + bc;
140 auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
141 if (val != zero)
142 diagBlocks_[i](r, c) = val;
143 }
144 }
145 }
146 }
147 }
148 }
149 } else {
150 // get the mapping from point-indexed matrix columns to offsets in blockRows_
151 //(this includes regular CrsMatrix columns, in which case bcrsBlockSize_ == 1)
152 Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
153 for (int i = 0; i < this->numBlocks_; i++) {
154 // Get the interval where block i is defined in blockRows_
155 LO blockStart = this->blockOffsets_[i];
156 LO blockEnd = blockStart + this->blockSizes_[i];
157 ArrayView<const LO> blockRows = this->getBlockRows(i);
158 // Set the lookup table entries for the columns appearing in block i.
159 // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
160 // this is OK. The values updated here are only needed to process block i's entries.
161 for (size_t j = 0; j < size_t(blockRows.size()); j++) {
162 // translateRowToCol will return the corresponding split column
163 LO localCol = this->translateRowToCol(blockRows[j]);
164 colToBlockOffset[localCol] = blockStart + j;
165 }
166 for (size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++) {
167 // get a view of the split row
168 LO inputPointRow = this->blockRows_[blockStart + blockRow];
169 auto rowView = this->getInputRowView(inputPointRow);
170 for (size_t k = 0; k < rowView.size(); k++) {
171 LO colOffset = colToBlockOffset[rowView.ind(k)];
172 if (blockStart <= colOffset && colOffset < blockEnd) {
173 LO blockCol = colOffset - blockStart;
174 auto val = rowView.val(k);
175 if (val != zero)
176 diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
177 }
178 }
179 }
180 }
181 }
182}
183
184template <class MatrixType, class LocalScalarType>
185void DenseContainer<MatrixType, LocalScalarType>::
186 factor() {
187 Teuchos::LAPACK<int, LSC> lapack;
188 for (int i = 0; i < this->numBlocks_; i++) {
189 int INFO = 0;
190 int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
191 lapack.GETRF(diagBlocks_[i].numRows(),
192 diagBlocks_[i].numCols(),
193 diagBlocks_[i].values(),
194 diagBlocks_[i].stride(),
195 blockIpiv, &INFO);
196 // INFO < 0 is a bug.
197 TEUCHOS_TEST_FOR_EXCEPTION(
198 INFO < 0, std::logic_error,
199 "Ifpack2::DenseContainer::factor: "
200 "LAPACK's _GETRF (LU factorization with partial pivoting) was called "
201 "incorrectly. INFO = "
202 << INFO << " < 0. "
203 "Please report this bug to the Ifpack2 developers.");
204 // INFO > 0 means the matrix is singular. This is probably an issue
205 // either with the choice of rows the rows we extracted, or with the
206 // input matrix itself.
207 TEUCHOS_TEST_FOR_EXCEPTION(
208 INFO > 0, std::runtime_error,
209 "Ifpack2::DenseContainer::factor: "
210 "LAPACK's _GETRF (LU factorization with partial pivoting) reports that the "
211 "computed U factor is exactly singular. U("
212 << INFO << "," << INFO << ") "
213 "(one-based index i) is exactly zero. This probably means that the input "
214 "matrix has a singular diagonal block.");
215 }
216}
217
218template <class MatrixType, class LocalScalarType>
219void DenseContainer<MatrixType, LocalScalarType>::
220 solveBlock(ConstHostSubviewLocal X,
221 HostSubviewLocal Y,
222 int blockIndex,
223 Teuchos::ETransp mode,
224 LSC alpha,
225 LSC beta) const {
226#ifdef HAVE_IFPACK2_DEBUG
227 TEUCHOS_TEST_FOR_EXCEPTION(
228 X.extent(0) != Y.extent(0),
229 std::logic_error,
230 "Ifpack2::DenseContainer::solveBlock: X and Y have "
231 "incompatible dimensions ("
232 << X.extent(0) << " resp. "
233 << Y.extent(0) << "). Please report this bug to "
234 "the Ifpack2 developers.");
235
236 TEUCHOS_TEST_FOR_EXCEPTION(
237 X.extent(1) != Y.extent(1),
238 std::logic_error,
239 "Ifpack2::DenseContainer::solveBlock: X and Y have "
240 "incompatible numbers of vectors ("
241 << X.extent(1) << " resp. "
242 << Y.extent(1) << "). Please report this bug to "
243 "the Ifpack2 developers.");
244#endif
245
246 typedef Teuchos::ScalarTraits<LSC> STS;
247 size_t numRows = X.extent(0);
248 size_t numVecs = X.extent(1);
249 if (alpha == STS::zero()) { // don't need to solve the linear system
250 if (beta == STS::zero()) {
251 // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
252 // any Inf or NaN values in Y (rather than multiplying them by
253 // zero, resulting in NaN values).
254 for (size_t j = 0; j < numVecs; j++) {
255 for (size_t i = 0; i < numRows; i++)
256 Y(i, j) = STS::zero();
257 }
258 } else // beta != 0
259 for (size_t j = 0; j < numVecs; j++) {
260 for (size_t i = 0; i < numRows; i++)
261 Y(i, j) = beta * Y(i, j);
262 }
263 } else { // alpha != 0; must solve the linear system
264 Teuchos::LAPACK<int, LSC> lapack;
265 // If beta is nonzero or Y is not constant stride, we have to use
266 // a temporary output multivector. It gets a (deep) copy of X,
267 // since GETRS overwrites its (multi)vector input with its output.
268 std::vector<LSC> yTemp(numVecs * numRows);
269 for (size_t j = 0; j < numVecs; j++) {
270 for (size_t i = 0; i < numRows; i++)
271 yTemp[j * numRows + i] = X(i, j);
272 }
273
274 int INFO = 0;
275 int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
276 const char trans =
277 (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
278 lapack.GETRS(trans,
279 diagBlocks_[blockIndex].numRows(),
280 numVecs,
281 diagBlocks_[blockIndex].values(),
282 diagBlocks_[blockIndex].stride(),
283 blockIpiv,
284 yTemp.data(),
285 numRows,
286 &INFO);
287
288 if (beta != STS::zero()) {
289 for (size_t j = 0; j < numVecs; j++) {
290 for (size_t i = 0; i < numRows; i++) {
291 Y(i, j) *= ISC(beta);
292 Y(i, j) += ISC(alpha * yTemp[j * numRows + i]);
293 }
294 }
295 } else {
296 for (size_t j = 0; j < numVecs; j++) {
297 for (size_t i = 0; i < numRows; i++)
298 Y(i, j) = ISC(yTemp[j * numRows + i]);
299 }
300 }
301
302 TEUCHOS_TEST_FOR_EXCEPTION(
303 INFO != 0, std::runtime_error,
304 "Ifpack2::DenseContainer::solveBlock: "
305 "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
306 "failed with INFO = "
307 << INFO << " != 0.");
308 }
309}
310
311template <class MatrixType, class LocalScalarType>
312std::ostream&
314 print(std::ostream& os) const {
315 Teuchos::FancyOStream fos(Teuchos::rcpFromRef(os));
316 fos.setOutputToRootOnly(0);
317 this->describe(fos);
318 return os;
319}
320
321template <class MatrixType, class LocalScalarType>
322std::string
324 description() const {
325 std::ostringstream oss;
326 oss << "Ifpack::DenseContainer: ";
327 if (this->isInitialized()) {
328 if (this->isComputed()) {
329 oss << "{status = initialized, computed";
330 } else {
331 oss << "{status = initialized, not computed";
332 }
333 } else {
334 oss << "{status = not initialized, not computed";
335 }
336
337 oss << "}";
338 return oss.str();
339}
340
341template <class MatrixType, class LocalScalarType>
343 describe(Teuchos::FancyOStream& os,
344 const Teuchos::EVerbosityLevel verbLevel) const {
345 using std::endl;
346 if (verbLevel == Teuchos::VERB_NONE) return;
347 os << "================================================================================" << endl;
348 os << "Ifpack2::DenseContainer" << endl;
349 for (int i = 0; i < this->numBlocks_; i++) {
350 os << "Block " << i << " number of rows = " << this->blockSizes_[i] << endl;
351 }
352 os << "isInitialized() = " << this->IsInitialized_ << endl;
353 os << "isComputed() = " << this->IsComputed_ << endl;
354 os << "================================================================================" << endl;
355 os << endl;
356}
357
358template <class MatrixType, class LocalScalarType>
360 diagBlocks_.clear();
361 scalars_.clear();
363}
364
365template <class MatrixType, class LocalScalarType>
367 return "Dense";
368}
369
370} // namespace Ifpack2
371
372// There's no need to instantiate for CrsMatrix too. All Ifpack2
373// preconditioners can and should do dynamic casts if they need a type
374// more specific than RowMatrix.
375
376#define IFPACK2_DENSECONTAINER_INSTANT(S, LO, GO, N) \
377 template class Ifpack2::DenseContainer<Tpetra::RowMatrix<S, LO, GO, N>, S>;
378
379#endif // IFPACK2_DENSECONTAINER_DEF_HPP
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS)....
Definition Ifpack2_Container_decl.hpp:311
int numBlocks_
The number of blocks (partitions) in the container.
Definition Ifpack2_Container_decl.hpp:261
LO scalarsPerRow_
Definition Ifpack2_Container_decl.hpp:286
Teuchos::Array< LO > blockSizes_
Number of rows in each block.
Definition Ifpack2_Container_decl.hpp:265
Teuchos::Array< LO > blockRows_
Local indices of the rows of the input matrix that belong to this block.
Definition Ifpack2_Container_decl.hpp:263
Store and solve a local dense linear problem.
Definition Ifpack2_DenseContainer_decl.hpp:71
virtual std::string description() const
A one-line description of this object.
Definition Ifpack2_DenseContainer_def.hpp:324
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to the given FancyOStream.
Definition Ifpack2_DenseContainer_def.hpp:343
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition Ifpack2_DenseContainer_def.hpp:366
virtual std::ostream & print(std::ostream &os) const
Print information about this object to the given output stream.
Definition Ifpack2_DenseContainer_def.hpp:314
virtual void compute()
Extract the local diagonal block and prepare the solver.
Definition Ifpack2_DenseContainer_def.hpp:76
DenseContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
Definition Ifpack2_DenseContainer_def.hpp:28
virtual ~DenseContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition Ifpack2_DenseContainer_def.hpp:58
virtual void initialize()
Do all set-up operations that only require matrix structure.
Definition Ifpack2_DenseContainer_def.hpp:62
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40