10#ifndef IFPACK2_BANDEDCONTAINER_DEF_HPP
11#define IFPACK2_BANDEDCONTAINER_DEF_HPP
13#include "Teuchos_LAPACK.hpp"
14#include "Tpetra_CrsMatrix.hpp"
21#include "Teuchos_DefaultMpiComm.hpp"
23#include "Teuchos_DefaultSerialComm.hpp"
28template <
class MatrixType,
class LocalScalarType>
31 const Teuchos::Array<Teuchos::Array<LO> >& partitions,
32 const Teuchos::RCP<const import_type>&,
34 :
ContainerImpl<MatrixType, LocalScalarType>(matrix, partitions, pointIndexed)
35 , ipiv_(this->blockRows_.size() * this->scalarsPerRow_)
36 , kl_(this->numBlocks_, -1)
37 , ku_(this->numBlocks_, -1)
38 , scalarOffsets_(this->numBlocks_) {
39 TEUCHOS_TEST_FOR_EXCEPTION(
40 !matrix->hasColMap(), std::invalid_argument,
41 "Ifpack2::BandedContainer: "
42 "The constructor's input matrix must have a column Map.");
45template <
class MatrixType,
class LocalScalarType>
49template <
class MatrixType,
class LocalScalarType>
52 if (List.isParameter(
"relaxation: banded container superdiagonals")) {
53 int ku = List.get<
int>(
"relaxation: banded container superdiagonals");
54 for (LO b = 0; b < this->numBlocks_; b++)
57 if (List.isParameter(
"relaxation: banded container subdiagonals")) {
58 int kl = List.get<
int>(
"relaxation: banded container subdiagonals");
59 for (LO b = 0; b < this->numBlocks_; b++)
64template <
class MatrixType,
class LocalScalarType>
68 using Teuchos::ArrayView;
70 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
71 size_t colToOffsetSize = this->inputMatrix_->getLocalNumCols();
72 if (this->pointIndexed_)
73 colToOffsetSize *= this->bcrsBlockSize_;
74 Array<LO> colToBlockOffset(colToOffsetSize, INVALID);
77 for (
int i = 0; i < this->numBlocks_; i++) {
81 if (this->scalarsPerRow_ > 1) {
83 LO blockStart = this->blockOffsets_[i];
84 LO blockEnd = (i == this->numBlocks_ - 1) ? this->blockRows_.size() : this->blockOffsets_[i + 1];
85 ArrayView<const LO> blockRows = this->getBlockRows(i);
89 for (
size_t j = 0; j < size_t(blockRows.size()); j++) {
90 LO localCol = this->translateRowToCol(blockRows[j]);
91 colToBlockOffset[localCol] = blockStart + j;
94 using h_inds_type =
typename block_crs_matrix_type::local_inds_host_view_type;
95 using h_vals_type =
typename block_crs_matrix_type::values_host_view_type;
96 for (LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++) {
100 LO inputRow = this->blockRows_[blockStart + blockRow];
101 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
102 LO numEntries = (LO)indices.size();
103 for (LO k = 0; k < numEntries; k++) {
104 LO colOffset = colToBlockOffset[indices[k]];
105 if (blockStart <= colOffset && colOffset < blockEnd) {
109 LO blockCol = colOffset - blockStart;
110 for (LO bc = 0; bc < this->bcrsBlockSize_; bc++) {
111 for (LO br = 0; br < this->bcrsBlockSize_; br++) {
112 LO r = this->bcrsBlockSize_ * blockRow + br;
113 LO c = this->bcrsBlockSize_ * blockCol + bc;
116 if (c - r > maxSuper)
125 LO blockStart = this->blockOffsets_[i];
126 LO blockEnd = (i == this->numBlocks_ - 1) ? this->blockRows_.size() : this->blockOffsets_[i + 1];
127 ArrayView<const LO> blockRows = this->getBlockRows(i);
131 for (
size_t j = 0; j < size_t(blockRows.size()); j++) {
133 LO localCol = this->translateRowToCol(blockRows[j]);
134 colToBlockOffset[localCol] = blockStart + j;
136 for (LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++) {
138 LO inputSplitRow = this->blockRows_[blockStart + blockRow];
139 auto rowView = this->getInputRowView(inputSplitRow);
140 for (
size_t k = 0; k < rowView.size(); k++) {
141 LO colOffset = colToBlockOffset[rowView.ind(k)];
142 if (blockStart <= colOffset && colOffset < blockEnd) {
143 LO blockCol = colOffset - blockStart;
144 maxSub = std::max(maxSub, blockRow - blockCol);
145 maxSuper = std::max(maxSuper, blockCol - blockRow);
155template <
class MatrixType,
class LocalScalarType>
165 for (LO b = 0; b < this->numBlocks_; b++) {
166 LO stride = 2 * kl_[b] + ku_[b] + 1;
167 scalarOffsets_[b] = totalScalars;
168 totalScalars += stride * this->blockSizes_[b] * this->scalarsPerRow_;
170 scalars_.resize(totalScalars);
171 for (
int b = 0; b < this->numBlocks_; b++) {
174 LO nrows = this->blockSizes_[b] * this->scalarsPerRow_;
175 diagBlocks_.emplace_back(Teuchos::View, scalars_.data() + scalarOffsets_[b], 2 * kl_[b] + ku_[b] + 1, nrows, nrows, kl_[b], kl_[b] + ku_[b]);
176 diagBlocks_[b].putScalar(Teuchos::ScalarTraits<LSC>::zero());
178 std::fill(ipiv_.begin(), ipiv_.end(), 0);
181 this->IsComputed_ =
false;
182 this->IsInitialized_ =
true;
185template <
class MatrixType,
class LocalScalarType>
188 TEUCHOS_TEST_FOR_EXCEPTION(
189 ipiv_.size() != this->blockRows_.size() * this->scalarsPerRow_, std::logic_error,
190 "Ifpack2::BandedContainer::compute: ipiv_ array has the wrong size. "
191 "Please report this bug to the Ifpack2 developers.");
193 this->IsComputed_ =
false;
194 if (!this->isInitialized()) {
201 this->IsComputed_ =
true;
204template <
class MatrixType,
class LocalScalarType>
206 using Teuchos::Array;
207 using Teuchos::ArrayView;
208 using STS = Teuchos::ScalarTraits<SC>;
209 SC zero = STS::zero();
210 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
217 if (this->scalarsPerRow_ > 1) {
218 Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
219 for (
int i = 0; i < this->numBlocks_; i++) {
221 LO blockStart = this->blockOffsets_[i];
222 LO blockEnd = blockStart + this->blockSizes_[i];
223 ArrayView<const LO> blockRows = this->getBlockRows(i);
227 for (
size_t j = 0; j < size_t(blockRows.size()); j++) {
228 LO localCol = this->translateRowToCol(blockRows[j]);
229 colToBlockOffset[localCol] = blockStart + j;
231 using h_inds_type =
typename block_crs_matrix_type::local_inds_host_view_type;
232 using h_vals_type =
typename block_crs_matrix_type::values_host_view_type;
233 for (LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++) {
237 LO inputRow = this->blockRows_[blockStart + blockRow];
238 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
239 LO numEntries = (LO)indices.size();
240 for (LO k = 0; k < numEntries; k++) {
241 LO colOffset = colToBlockOffset[indices[k]];
242 if (blockStart <= colOffset && colOffset < blockEnd) {
246 LO blockCol = colOffset - blockStart;
247 for (LO bc = 0; bc < this->bcrsBlockSize_; bc++) {
248 for (LO br = 0; br < this->bcrsBlockSize_; br++) {
249 LO r = this->bcrsBlockSize_ * blockRow + br;
250 LO c = this->bcrsBlockSize_ * blockCol + bc;
251 auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
253 diagBlocks_[i](r, c) = val;
263 Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
264 for (
int i = 0; i < this->numBlocks_; i++) {
266 LO blockStart = this->blockOffsets_[i];
267 LO blockEnd = blockStart + this->blockSizes_[i];
268 ArrayView<const LO> blockRows = this->getBlockRows(i);
272 for (
size_t j = 0; j < size_t(blockRows.size()); j++) {
274 LO localCol = this->translateRowToCol(blockRows[j]);
275 colToBlockOffset[localCol] = blockStart + j;
277 for (
size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++) {
279 LO inputPointRow = this->blockRows_[blockStart + blockRow];
280 auto rowView = this->getInputRowView(inputPointRow);
281 for (
size_t k = 0; k < rowView.size(); k++) {
282 LO colOffset = colToBlockOffset[rowView.ind(k)];
283 if (blockStart <= colOffset && colOffset < blockEnd) {
284 LO blockCol = colOffset - blockStart;
285 auto val = rowView.val(k);
287 diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
295template <
class MatrixType,
class LocalScalarType>
296void BandedContainer<MatrixType, LocalScalarType>::
300 ContainerImpl<MatrixType, LocalScalarType>::clearBlocks();
303template <
class MatrixType,
class LocalScalarType>
304void BandedContainer<MatrixType, LocalScalarType>::
306 Teuchos::LAPACK<int, LSC> lapack;
309 for (
int i = 0; i < this->numBlocks_; i++) {
311 TEUCHOS_TEST_FOR_EXCEPTION(diagBlocks_[i].values() == 0, std::invalid_argument,
312 "BandedContainer<T>::factor: Diagonal block is an empty SerialBandDenseMatrix<T>!");
313 TEUCHOS_TEST_FOR_EXCEPTION(diagBlocks_[i].upperBandwidth() < diagBlocks_[i].lowerBandwidth(), std::invalid_argument,
314 "BandedContainer<T>::factor: Diagonal block needs kl additional superdiagonals for factorization! However, the number of superdiagonals is smaller than the number of subdiagonals!");
315 int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
316 lapack.GBTRF(diagBlocks_[i].numRows(),
317 diagBlocks_[i].numCols(),
318 diagBlocks_[i].lowerBandwidth(),
319 diagBlocks_[i].upperBandwidth() - diagBlocks_[i].lowerBandwidth(),
320 diagBlocks_[i].values(),
321 diagBlocks_[i].stride(),
326 TEUCHOS_TEST_FOR_EXCEPTION(
327 INFO < 0, std::logic_error,
328 "Ifpack2::BandedContainer::factor: "
329 "LAPACK's _GBTRF (LU factorization with partial pivoting) was called "
330 "incorrectly. INFO = "
332 "Please report this bug to the Ifpack2 developers.");
336 TEUCHOS_TEST_FOR_EXCEPTION(
337 INFO > 0, std::runtime_error,
338 "Ifpack2::BandedContainer::factor: "
339 "LAPACK's _GBTRF (LU factorization with partial pivoting) reports that the "
340 "computed U factor is exactly singular. U("
341 << INFO <<
"," << INFO <<
") "
342 "(one-based index i) is exactly zero. This probably means that the input "
343 "matrix has a singular diagonal block.");
347template <
class MatrixType,
class LocalScalarType>
348void BandedContainer<MatrixType, LocalScalarType>::
349 solveBlock(ConstHostSubviewLocal X,
352 Teuchos::ETransp mode,
354 const LSC beta)
const {
356 TEUCHOS_TEST_FOR_EXCEPTION(
357 X.extent(0) != Y.extent(0),
359 "Ifpack2::BandedContainer::solveBlock: X and Y have "
360 "incompatible dimensions ("
361 << X.extent(0) <<
" resp. "
362 << Y.extent(0) <<
"). Please report this bug to "
363 "the Ifpack2 developers.");
364 TEUCHOS_TEST_FOR_EXCEPTION(
365 X.extent(0) !=
static_cast<size_t>(mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numCols() : diagBlocks_[blockIndex].numRows()),
367 "Ifpack2::BandedContainer::solveBlock: The input "
368 "multivector X has incompatible dimensions from those of the "
370 << X.extent(0) <<
" vs. "
371 << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numCols() : diagBlocks_[blockIndex].numRows())
372 <<
"). Please report this bug to the Ifpack2 developers.");
373 TEUCHOS_TEST_FOR_EXCEPTION(
374 Y.extent(0) !=
static_cast<size_t>(mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRows() : diagBlocks_[blockIndex].numCols()),
376 "Ifpack2::BandedContainer::solveBlock: The output "
377 "multivector Y has incompatible dimensions from those of the "
379 << Y.extent(0) <<
" vs. "
380 << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRows() : diagBlocks_[blockIndex].numCols())
381 <<
"). Please report this bug to the Ifpack2 developers.");
384 size_t numRows = X.extent(0);
385 size_t numVecs = X.extent(1);
387 LSC zero = Teuchos::ScalarTraits<LSC>::zero();
393 for (
size_t j = 0; j < numVecs; j++)
394 for (
size_t i = 0; i < numRows; i++)
397 for (
size_t j = 0; j < numVecs; j++)
398 for (
size_t i = 0; i < numRows; i++)
399 Y(i, j) = beta * Y(i, j);
402 Teuchos::LAPACK<int, LSC> lapack;
406 std::vector<LSC> yTemp(numVecs * numRows);
407 for (
size_t j = 0; j < numVecs; j++) {
408 for (
size_t i = 0; i < numRows; i++)
409 yTemp[j * numRows + i] = X(i, j);
413 const char trans = (mode == Teuchos::CONJ_TRANS ?
'C' : (mode == Teuchos::TRANS ?
'T' :
'N'));
415 const int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
418 diagBlocks_[blockIndex].numCols(),
419 diagBlocks_[blockIndex].lowerBandwidth(),
421 diagBlocks_[blockIndex].upperBandwidth() - diagBlocks_[blockIndex].lowerBandwidth(),
423 diagBlocks_[blockIndex].values(),
424 diagBlocks_[blockIndex].stride(),
431 for (
size_t j = 0; j < numVecs; j++) {
432 for (
size_t i = 0; i < numRows; i++) {
433 Y(i, j) *= ISC(beta);
434 Y(i, j) += ISC(alpha * yTemp[j * numRows + i]);
438 for (
size_t j = 0; j < numVecs; j++) {
439 for (
size_t i = 0; i < numRows; i++)
440 Y(i, j) = ISC(yTemp[j * numRows + i]);
444 TEUCHOS_TEST_FOR_EXCEPTION(
445 INFO != 0, std::runtime_error,
446 "Ifpack2::BandedContainer::solveBlock: "
447 "LAPACK's _GBTRS (solve using LU factorization with partial pivoting) "
448 "failed with INFO = "
449 << INFO <<
" != 0.");
453template <
class MatrixType,
class LocalScalarType>
456 print(std::ostream& os)
const {
457 Teuchos::FancyOStream fos(Teuchos::rcpFromRef(os));
458 fos.setOutputToRootOnly(0);
463template <
class MatrixType,
class LocalScalarType>
467 std::ostringstream oss;
468 oss << Teuchos::Describable::description();
469 if (this->isInitialized()) {
470 if (this->isComputed()) {
471 oss <<
"{status = initialized, computed";
473 oss <<
"{status = initialized, not computed";
476 oss <<
"{status = not initialized, not computed";
482template <
class MatrixType,
class LocalScalarType>
485 const Teuchos::EVerbosityLevel verbLevel)
const {
486 if (verbLevel == Teuchos::VERB_NONE)
return;
487 os <<
"================================================================================" << std::endl;
488 os <<
"Ifpack2::BandedContainer" << std::endl;
489 for (
int i = 0; i < this->numBlocks_; i++) {
490 os <<
"Block " << i <<
": Number of rows = " << this->blockSizes_[i] << std::endl;
491 os <<
"Block " << i <<
": Number of subdiagonals = " << diagBlocks_[i].lowerBandwidth() << std::endl;
492 os <<
"Block " << i <<
": Number of superdiagonals = " << diagBlocks_[i].upperBandwidth() << std::endl;
494 os <<
"isInitialized() = " << this->IsInitialized_ << std::endl;
495 os <<
"isComputed() = " << this->IsComputed_ << std::endl;
496 os <<
"================================================================================" << std::endl;
500template <
class MatrixType,
class LocalScalarType>
507#define IFPACK2_BANDEDCONTAINER_INSTANT(S, LO, GO, N) \
508 template class Ifpack2::BandedContainer<Tpetra::RowMatrix<S, LO, GO, N>, S>;
Declaration of Ifpack2::Details::Behavior, a class that describes Ifpack2's run-time behavior.
Store and solve a local Banded linear problem.
Definition Ifpack2_BandedContainer_decl.hpp:70
BandedContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &, bool pointIndexed)
Constructor.
Definition Ifpack2_BandedContainer_def.hpp:30
virtual void compute() override
Initialize and compute each block.
Definition Ifpack2_BandedContainer_def.hpp:187
virtual void initialize() override
Do all set-up operations that only require matrix structure.
Definition Ifpack2_BandedContainer_def.hpp:157
virtual std::ostream & print(std::ostream &os) const override
Print information about this object to the given output stream.
Definition Ifpack2_BandedContainer_def.hpp:456
virtual ~BandedContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition Ifpack2_BandedContainer_def.hpp:47
virtual void setParameters(const Teuchos::ParameterList &List) override
Set all necessary parameters (super- and sub-diagonal bandwidth)
Definition Ifpack2_BandedContainer_def.hpp:51
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition Ifpack2_BandedContainer_def.hpp:501
virtual std::string description() const override
A one-line description of this object.
Definition Ifpack2_BandedContainer_def.hpp:466
void computeBandwidth()
Definition Ifpack2_BandedContainer_def.hpp:66
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print the object with some verbosity level to the given FancyOStream.
Definition Ifpack2_BandedContainer_def.hpp:484
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS)....
Definition Ifpack2_Container_decl.hpp:307
static bool debug()
Whether Ifpack2 is in debug mode.
Definition Ifpack2_Details_Behavior.cpp:31
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40