10#ifndef IFPACK2_BANDEDCONTAINER_DEF_HPP
11#define IFPACK2_BANDEDCONTAINER_DEF_HPP
13#include "Teuchos_LAPACK.hpp"
14#include "Tpetra_CrsMatrix.hpp"
20#include "Teuchos_DefaultMpiComm.hpp"
22#include "Teuchos_DefaultSerialComm.hpp"
27template <
class MatrixType,
class LocalScalarType>
30 const Teuchos::Array<Teuchos::Array<LO> >& partitions,
31 const Teuchos::RCP<const import_type>&,
33 :
ContainerImpl<MatrixType, LocalScalarType>(matrix, partitions, pointIndexed)
34 , ipiv_(this->blockRows_.size() * this->scalarsPerRow_)
35 , kl_(this->numBlocks_, -1)
36 , ku_(this->numBlocks_, -1)
37 , scalarOffsets_(this->numBlocks_) {
38 TEUCHOS_TEST_FOR_EXCEPTION(
39 !matrix->hasColMap(), std::invalid_argument,
40 "Ifpack2::BandedContainer: "
41 "The constructor's input matrix must have a column Map.");
44template <
class MatrixType,
class LocalScalarType>
48template <
class MatrixType,
class LocalScalarType>
51 if (List.isParameter(
"relaxation: banded container superdiagonals")) {
52 int ku = List.get<
int>(
"relaxation: banded container superdiagonals");
53 for (LO b = 0; b < this->numBlocks_; b++)
56 if (List.isParameter(
"relaxation: banded container subdiagonals")) {
57 int kl = List.get<
int>(
"relaxation: banded container subdiagonals");
58 for (LO b = 0; b < this->numBlocks_; b++)
63template <
class MatrixType,
class LocalScalarType>
67 using Teuchos::ArrayView;
69 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
70 size_t colToOffsetSize = this->inputMatrix_->getLocalNumCols();
71 if (this->pointIndexed_)
72 colToOffsetSize *= this->bcrsBlockSize_;
73 Array<LO> colToBlockOffset(colToOffsetSize, INVALID);
76 for (
int i = 0; i < this->numBlocks_; i++) {
80 if (this->scalarsPerRow_ > 1) {
82 LO blockStart = this->blockOffsets_[i];
83 LO blockEnd = (i == this->numBlocks_ - 1) ? this->blockRows_.size() : this->blockOffsets_[i + 1];
84 ArrayView<const LO> blockRows = this->getBlockRows(i);
88 for (
size_t j = 0; j < size_t(blockRows.size()); j++) {
89 LO localCol = this->translateRowToCol(blockRows[j]);
90 colToBlockOffset[localCol] = blockStart + j;
93 using h_inds_type =
typename block_crs_matrix_type::local_inds_host_view_type;
94 using h_vals_type =
typename block_crs_matrix_type::values_host_view_type;
95 for (LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++) {
99 LO inputRow = this->blockRows_[blockStart + blockRow];
100 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
101 LO numEntries = (LO)indices.size();
102 for (LO k = 0; k < numEntries; k++) {
103 LO colOffset = colToBlockOffset[indices[k]];
104 if (blockStart <= colOffset && colOffset < blockEnd) {
108 LO blockCol = colOffset - blockStart;
109 for (LO bc = 0; bc < this->bcrsBlockSize_; bc++) {
110 for (LO br = 0; br < this->bcrsBlockSize_; br++) {
111 LO r = this->bcrsBlockSize_ * blockRow + br;
112 LO c = this->bcrsBlockSize_ * blockCol + bc;
115 if (c - r > maxSuper)
124 LO blockStart = this->blockOffsets_[i];
125 LO blockEnd = (i == this->numBlocks_ - 1) ? this->blockRows_.size() : this->blockOffsets_[i + 1];
126 ArrayView<const LO> blockRows = this->getBlockRows(i);
130 for (
size_t j = 0; j < size_t(blockRows.size()); j++) {
132 LO localCol = this->translateRowToCol(blockRows[j]);
133 colToBlockOffset[localCol] = blockStart + j;
135 for (LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++) {
137 LO inputSplitRow = this->blockRows_[blockStart + blockRow];
138 auto rowView = this->getInputRowView(inputSplitRow);
139 for (
size_t k = 0; k < rowView.size(); k++) {
140 LO colOffset = colToBlockOffset[rowView.ind(k)];
141 if (blockStart <= colOffset && colOffset < blockEnd) {
142 LO blockCol = colOffset - blockStart;
143 maxSub = std::max(maxSub, blockRow - blockCol);
144 maxSuper = std::max(maxSuper, blockCol - blockRow);
154template <
class MatrixType,
class LocalScalarType>
164 for (LO b = 0; b < this->numBlocks_; b++) {
165 LO stride = 2 * kl_[b] + ku_[b] + 1;
166 scalarOffsets_[b] = totalScalars;
167 totalScalars += stride * this->blockSizes_[b] * this->scalarsPerRow_;
169 scalars_.resize(totalScalars);
170 for (
int b = 0; b < this->numBlocks_; b++) {
173 LO nrows = this->blockSizes_[b] * this->scalarsPerRow_;
174 diagBlocks_.emplace_back(Teuchos::View, scalars_.data() + scalarOffsets_[b], 2 * kl_[b] + ku_[b] + 1, nrows, nrows, kl_[b], kl_[b] + ku_[b]);
175 diagBlocks_[b].putScalar(Teuchos::ScalarTraits<LSC>::zero());
177 std::fill(ipiv_.begin(), ipiv_.end(), 0);
180 this->IsComputed_ =
false;
181 this->IsInitialized_ =
true;
184template <
class MatrixType,
class LocalScalarType>
187 TEUCHOS_TEST_FOR_EXCEPTION(
188 ipiv_.size() != this->blockRows_.size() * this->scalarsPerRow_, std::logic_error,
189 "Ifpack2::BandedContainer::compute: ipiv_ array has the wrong size. "
190 "Please report this bug to the Ifpack2 developers.");
192 this->IsComputed_ =
false;
193 if (!this->isInitialized()) {
200 this->IsComputed_ =
true;
203template <
class MatrixType,
class LocalScalarType>
205 using Teuchos::Array;
206 using Teuchos::ArrayView;
207 using STS = Teuchos::ScalarTraits<SC>;
208 SC zero = STS::zero();
209 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
216 if (this->scalarsPerRow_ > 1) {
217 Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
218 for (
int i = 0; i < this->numBlocks_; i++) {
220 LO blockStart = this->blockOffsets_[i];
221 LO blockEnd = blockStart + this->blockSizes_[i];
222 ArrayView<const LO> blockRows = this->getBlockRows(i);
226 for (
size_t j = 0; j < size_t(blockRows.size()); j++) {
227 LO localCol = this->translateRowToCol(blockRows[j]);
228 colToBlockOffset[localCol] = blockStart + j;
230 using h_inds_type =
typename block_crs_matrix_type::local_inds_host_view_type;
231 using h_vals_type =
typename block_crs_matrix_type::values_host_view_type;
232 for (LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++) {
236 LO inputRow = this->blockRows_[blockStart + blockRow];
237 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
238 LO numEntries = (LO)indices.size();
239 for (LO k = 0; k < numEntries; k++) {
240 LO colOffset = colToBlockOffset[indices[k]];
241 if (blockStart <= colOffset && colOffset < blockEnd) {
245 LO blockCol = colOffset - blockStart;
246 for (LO bc = 0; bc < this->bcrsBlockSize_; bc++) {
247 for (LO br = 0; br < this->bcrsBlockSize_; br++) {
248 LO r = this->bcrsBlockSize_ * blockRow + br;
249 LO c = this->bcrsBlockSize_ * blockCol + bc;
250 auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
252 diagBlocks_[i](r, c) = val;
262 Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
263 for (
int i = 0; i < this->numBlocks_; i++) {
265 LO blockStart = this->blockOffsets_[i];
266 LO blockEnd = blockStart + this->blockSizes_[i];
267 ArrayView<const LO> blockRows = this->getBlockRows(i);
271 for (
size_t j = 0; j < size_t(blockRows.size()); j++) {
273 LO localCol = this->translateRowToCol(blockRows[j]);
274 colToBlockOffset[localCol] = blockStart + j;
276 for (
size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++) {
278 LO inputPointRow = this->blockRows_[blockStart + blockRow];
279 auto rowView = this->getInputRowView(inputPointRow);
280 for (
size_t k = 0; k < rowView.size(); k++) {
281 LO colOffset = colToBlockOffset[rowView.ind(k)];
282 if (blockStart <= colOffset && colOffset < blockEnd) {
283 LO blockCol = colOffset - blockStart;
284 auto val = rowView.val(k);
286 diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
294template <
class MatrixType,
class LocalScalarType>
295void BandedContainer<MatrixType, LocalScalarType>::
299 ContainerImpl<MatrixType, LocalScalarType>::clearBlocks();
302template <
class MatrixType,
class LocalScalarType>
303void BandedContainer<MatrixType, LocalScalarType>::
305 Teuchos::LAPACK<int, LSC> lapack;
308 for (
int i = 0; i < this->numBlocks_; i++) {
310 TEUCHOS_TEST_FOR_EXCEPTION(diagBlocks_[i].values() == 0, std::invalid_argument,
311 "BandedContainer<T>::factor: Diagonal block is an empty SerialBandDenseMatrix<T>!");
312 TEUCHOS_TEST_FOR_EXCEPTION(diagBlocks_[i].upperBandwidth() < diagBlocks_[i].lowerBandwidth(), std::invalid_argument,
313 "BandedContainer<T>::factor: Diagonal block needs kl additional superdiagonals for factorization! However, the number of superdiagonals is smaller than the number of subdiagonals!");
314 int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
315 lapack.GBTRF(diagBlocks_[i].numRows(),
316 diagBlocks_[i].numCols(),
317 diagBlocks_[i].lowerBandwidth(),
318 diagBlocks_[i].upperBandwidth() - diagBlocks_[i].lowerBandwidth(),
319 diagBlocks_[i].values(),
320 diagBlocks_[i].stride(),
325 TEUCHOS_TEST_FOR_EXCEPTION(
326 INFO < 0, std::logic_error,
327 "Ifpack2::BandedContainer::factor: "
328 "LAPACK's _GBTRF (LU factorization with partial pivoting) was called "
329 "incorrectly. INFO = "
331 "Please report this bug to the Ifpack2 developers.");
335 TEUCHOS_TEST_FOR_EXCEPTION(
336 INFO > 0, std::runtime_error,
337 "Ifpack2::BandedContainer::factor: "
338 "LAPACK's _GBTRF (LU factorization with partial pivoting) reports that the "
339 "computed U factor is exactly singular. U("
340 << INFO <<
"," << INFO <<
") "
341 "(one-based index i) is exactly zero. This probably means that the input "
342 "matrix has a singular diagonal block.");
346template <
class MatrixType,
class LocalScalarType>
347void BandedContainer<MatrixType, LocalScalarType>::
348 solveBlock(ConstHostSubviewLocal X,
351 Teuchos::ETransp mode,
353 const LSC beta)
const {
354#ifdef HAVE_IFPACK2_DEBUG
355 TEUCHOS_TEST_FOR_EXCEPTION(
356 X.extent(0) != Y.extent(0),
358 "Ifpack2::BandedContainer::solveBlock: X and Y have "
359 "incompatible dimensions ("
360 << X.extent(0) <<
" resp. "
361 << Y.extent(0) <<
"). Please report this bug to "
362 "the Ifpack2 developers.");
363 TEUCHOS_TEST_FOR_EXCEPTION(
364 X.extent(0) !=
static_cast<size_t>(mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numCols() : diagBlocks_[blockIndex].numRows()),
366 "Ifpack2::BandedContainer::solveBlock: The input "
367 "multivector X has incompatible dimensions from those of the "
369 << X.extent(0) <<
" vs. "
370 << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numCols() : diagBlocks_[blockIndex].numRows())
371 <<
"). Please report this bug to the Ifpack2 developers.");
372 TEUCHOS_TEST_FOR_EXCEPTION(
373 Y.extent(0) !=
static_cast<size_t>(mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRows() : diagBlocks_[blockIndex].numCols()),
375 "Ifpack2::BandedContainer::solveBlock: The output "
376 "multivector Y has incompatible dimensions from those of the "
378 << Y.extent(0) <<
" vs. "
379 << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRows() : diagBlocks_[blockIndex].numCols())
380 <<
"). Please report this bug to the Ifpack2 developers.");
383 size_t numRows = X.extent(0);
384 size_t numVecs = X.extent(1);
386 LSC zero = Teuchos::ScalarTraits<LSC>::zero();
392 for (
size_t j = 0; j < numVecs; j++)
393 for (
size_t i = 0; i < numRows; i++)
396 for (
size_t j = 0; j < numVecs; j++)
397 for (
size_t i = 0; i < numRows; i++)
398 Y(i, j) = beta * Y(i, j);
401 Teuchos::LAPACK<int, LSC> lapack;
405 std::vector<LSC> yTemp(numVecs * numRows);
406 for (
size_t j = 0; j < numVecs; j++) {
407 for (
size_t i = 0; i < numRows; i++)
408 yTemp[j * numRows + i] = X(i, j);
412 const char trans = (mode == Teuchos::CONJ_TRANS ?
'C' : (mode == Teuchos::TRANS ?
'T' :
'N'));
414 const int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
417 diagBlocks_[blockIndex].numCols(),
418 diagBlocks_[blockIndex].lowerBandwidth(),
420 diagBlocks_[blockIndex].upperBandwidth() - diagBlocks_[blockIndex].lowerBandwidth(),
422 diagBlocks_[blockIndex].values(),
423 diagBlocks_[blockIndex].stride(),
430 for (
size_t j = 0; j < numVecs; j++) {
431 for (
size_t i = 0; i < numRows; i++) {
432 Y(i, j) *= ISC(beta);
433 Y(i, j) += ISC(alpha * yTemp[j * numRows + i]);
437 for (
size_t j = 0; j < numVecs; j++) {
438 for (
size_t i = 0; i < numRows; i++)
439 Y(i, j) = ISC(yTemp[j * numRows + i]);
443 TEUCHOS_TEST_FOR_EXCEPTION(
444 INFO != 0, std::runtime_error,
445 "Ifpack2::BandedContainer::solveBlock: "
446 "LAPACK's _GBTRS (solve using LU factorization with partial pivoting) "
447 "failed with INFO = "
448 << INFO <<
" != 0.");
452template <
class MatrixType,
class LocalScalarType>
455 print(std::ostream& os)
const {
456 Teuchos::FancyOStream fos(Teuchos::rcpFromRef(os));
457 fos.setOutputToRootOnly(0);
462template <
class MatrixType,
class LocalScalarType>
466 std::ostringstream oss;
467 oss << Teuchos::Describable::description();
468 if (this->isInitialized()) {
469 if (this->isComputed()) {
470 oss <<
"{status = initialized, computed";
472 oss <<
"{status = initialized, not computed";
475 oss <<
"{status = not initialized, not computed";
481template <
class MatrixType,
class LocalScalarType>
484 const Teuchos::EVerbosityLevel verbLevel)
const {
485 if (verbLevel == Teuchos::VERB_NONE)
return;
486 os <<
"================================================================================" << std::endl;
487 os <<
"Ifpack2::BandedContainer" << std::endl;
488 for (
int i = 0; i < this->numBlocks_; i++) {
489 os <<
"Block " << i <<
": Number of rows = " << this->blockSizes_[i] << std::endl;
490 os <<
"Block " << i <<
": Number of subdiagonals = " << diagBlocks_[i].lowerBandwidth() << std::endl;
491 os <<
"Block " << i <<
": Number of superdiagonals = " << diagBlocks_[i].upperBandwidth() << std::endl;
493 os <<
"isInitialized() = " << this->IsInitialized_ << std::endl;
494 os <<
"isComputed() = " << this->IsComputed_ << std::endl;
495 os <<
"================================================================================" << std::endl;
499template <
class MatrixType,
class LocalScalarType>
506#define IFPACK2_BANDEDCONTAINER_INSTANT(S, LO, GO, N) \
507 template class Ifpack2::BandedContainer<Tpetra::RowMatrix<S, LO, GO, N>, S>;
Store and solve a local Banded linear problem.
Definition Ifpack2_BandedContainer_decl.hpp:70
BandedContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &, bool pointIndexed)
Constructor.
Definition Ifpack2_BandedContainer_def.hpp:29
virtual void compute() override
Initialize and compute each block.
Definition Ifpack2_BandedContainer_def.hpp:186
virtual void initialize() override
Do all set-up operations that only require matrix structure.
Definition Ifpack2_BandedContainer_def.hpp:156
virtual std::ostream & print(std::ostream &os) const override
Print information about this object to the given output stream.
Definition Ifpack2_BandedContainer_def.hpp:455
virtual ~BandedContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition Ifpack2_BandedContainer_def.hpp:46
virtual void setParameters(const Teuchos::ParameterList &List) override
Set all necessary parameters (super- and sub-diagonal bandwidth)
Definition Ifpack2_BandedContainer_def.hpp:50
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition Ifpack2_BandedContainer_def.hpp:500
virtual std::string description() const override
A one-line description of this object.
Definition Ifpack2_BandedContainer_def.hpp:465
void computeBandwidth()
Definition Ifpack2_BandedContainer_def.hpp:65
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print the object with some verbosity level to the given FancyOStream.
Definition Ifpack2_BandedContainer_def.hpp:483
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS)....
Definition Ifpack2_Container_decl.hpp:311
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40