Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Container_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_CONTAINER_DEF_HPP
11#define IFPACK2_CONTAINER_DEF_HPP
12
14#include <Teuchos_Time.hpp>
15
16namespace Ifpack2 {
17
18// Implementation of Ifpack2::Container
19
20template <class MatrixType>
22 const Teuchos::RCP<const row_matrix_type>& matrix,
23 const Teuchos::Array<Teuchos::Array<LO>>& partitions,
24 bool pointIndexed)
25 : inputMatrix_(matrix)
26 , inputCrsMatrix_(Teuchos::rcp_dynamic_cast<const crs_matrix_type>(inputMatrix_))
27 , inputBlockMatrix_(Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(inputMatrix_))
28 , pointIndexed_(pointIndexed)
29 , IsInitialized_(false)
30 , IsComputed_(false) {
31 using Teuchos::Array;
32 using Teuchos::ArrayView;
33 using Teuchos::Comm;
34 using Teuchos::Ptr;
35 using Teuchos::RCP;
36 NumLocalRows_ = inputMatrix_->getLocalNumRows();
37 NumGlobalRows_ = inputMatrix_->getGlobalNumRows();
38 NumGlobalNonzeros_ = inputMatrix_->getGlobalNumEntries();
39 IsParallel_ = inputMatrix_->getRowMap()->getComm()->getSize() != 1;
41 if (hasBlockCrs_)
42 bcrsBlockSize_ = inputBlockMatrix_->getBlockSize();
43 else
47 else
49 setBlockSizes(partitions);
50// Sanity check the partitions
51#ifdef HAVE_IFPACK2_DEBUG
52 // Check whether the input set of local row indices is correct.
53 const map_type& rowMap = *inputMatrix_->getRowMap();
54 for (int i = 0; i < numBlocks_; i++) {
55 Teuchos::ArrayView<const LO> blockRows = getBlockRows(i);
56 for (LO j = 0; j < blockSizes_[i]; j++) {
57 LO row = blockRows[j];
58 if (pointIndexed) {
59 // convert the point row to the corresponding block row
60 row /= bcrsBlockSize_;
61 }
62 TEUCHOS_TEST_FOR_EXCEPTION(
63 !rowMap.isNodeLocalElement(row),
64 std::invalid_argument,
65 "Ifpack2::Container: "
66 "On process "
67 << rowMap.getComm()->getRank() << " of "
68 << rowMap.getComm()->getSize() << ", in the given set of local row "
69 "indices blockRows = "
70 << Teuchos::toString(blockRows) << ", the following "
71 "entries is not valid local row index on the calling process: "
72 << row << ".");
73 }
74 }
75#endif
76}
77
78template <class MatrixType>
81
82template <class MatrixType>
83Teuchos::ArrayView<const typename MatrixType::local_ordinal_type>
85 return Teuchos::ArrayView<const LO>(&blockRows_[blockOffsets_[blockIndex]], blockSizes_[blockIndex]);
86}
87
88template <class MatrixType>
89void Container<MatrixType>::setBlockSizes(const Teuchos::Array<Teuchos::Array<LO>>& partitions) {
90 // First, create a grand total of all the rows in all the blocks
91 // Note: If partitioner allowed overlap, this could be greater than the # of local rows
92 LO totalBlockRows = 0;
93 numBlocks_ = partitions.size();
94 blockSizes_.resize(numBlocks_);
95 blockOffsets_.resize(numBlocks_);
96 maxBlockSize_ = 0;
97 for (int i = 0; i < numBlocks_; i++) {
98 LO rowsInBlock = partitions[i].size();
99 blockSizes_[i] = rowsInBlock;
100 blockOffsets_[i] = totalBlockRows;
101 totalBlockRows += rowsInBlock;
102 maxBlockSize_ = std::max(maxBlockSize_, rowsInBlock * scalarsPerRow_);
103 }
104 blockRows_.resize(totalBlockRows);
105 // set blockRows_: each entry is the partition/block of the row
106 LO iter = 0;
107 for (int i = 0; i < numBlocks_; i++) {
108 for (int j = 0; j < blockSizes_[i]; j++) {
109 blockRows_[iter++] = partitions[i][j];
110 }
111 }
112}
113
114template <class MatrixType>
116 if (Diag_.is_null()) {
117 Diag_ = rcp(new vector_type(inputMatrix_->getDomainMap()));
118 inputMatrix_->getLocalDiagCopy(*Diag_);
119 }
120}
121
122template <class MatrixType>
124 return IsInitialized_;
125}
126
127template <class MatrixType>
129 return IsComputed_;
130}
131
132template <class MatrixType>
134 applyMV(const mv_type& X, mv_type& Y) const {
135 TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "Not implemented.");
136}
137
138template <class MatrixType>
140 weightedApplyMV(const mv_type& X, mv_type& Y, vector_type& W) const {
141 TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "Not implemented.");
142}
143
144template <class MatrixType>
146 getName() {
147 return "Generic";
148}
149
150template <class MatrixType>
152 SC dampingFactor, LO i) const {
153 TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "Not implemented.");
154}
155
156template <class MatrixType>
157void Container<MatrixType>::DoJacobi(ConstHostView X, HostView Y, SC dampingFactor) const {
158 using STS = Teuchos::ScalarTraits<ISC>;
159 const ISC one = STS::one();
160 // use blockRows_ and blockSizes_
161 size_t numVecs = X.extent(1);
162 // Non-overlapping Jacobi
163 for (LO i = 0; i < numBlocks_; i++) {
164 // may happen that a partition is empty
165 if (blockSizes_[i] != 1 || hasBlockCrs_) {
166 if (blockSizes_[i] == 0)
167 continue;
168 apply(X, Y, i, Teuchos::NO_TRANS, dampingFactor, one);
169 } else // singleton, can't access Containers_[i] as it was never filled and may be null.
170 {
171 LO LRID = blockRows_[blockOffsets_[i]];
172 getMatDiag();
173 auto diagView = Diag_->getLocalViewHost(Tpetra::Access::ReadOnly);
174 ISC d = one * (static_cast<ISC>(dampingFactor)) / diagView(LRID, 0);
175 for (size_t nv = 0; nv < numVecs; nv++) {
176 ISC x = X(LRID, nv);
177 Y(LRID, nv) += x * d;
178 }
179 }
180 }
181}
182
183template <class MatrixType>
184void Container<MatrixType>::DoOverlappingJacobi(ConstHostView X, HostView Y, ConstHostView W, SC dampingFactor, bool nonsymScaling) const {
185 using STS = Teuchos::ScalarTraits<SC>;
186 // Overlapping Jacobi
187 for (LO i = 0; i < numBlocks_; i++) {
188 // may happen that a partition is empty
189 if (blockSizes_[i] == 0)
190 continue;
191 if (blockSizes_[i] != 1) {
192 if (!nonsymScaling)
193 weightedApply(X, Y, W, i, Teuchos::NO_TRANS, dampingFactor, STS::one());
194 else {
195 // A crummy way of doing nonsymmetric scaling. We effectively
196 // first reverse scale x, which later gets scaled inside weightedApply
197 // so the net effect is that x is not scaled.
198 // This was done to keep using weightedApply() that is defined in
199 // many spots in the code.
200 HostView tempo("", X.extent(0), X.extent(1));
201 size_t numVecs = X.extent(1);
202 LO bOffset = blockOffsets_[i];
203 for (LO ii = 0; ii < blockSizes_[i]; ii++) {
204 LO LRID = blockRows_[bOffset++];
205 for (size_t jj = 0; jj < numVecs; jj++) tempo(LRID, jj) = X(LRID, jj) / W(LRID, 0);
206 }
207 weightedApply(tempo, Y, W, i, Teuchos::NO_TRANS, dampingFactor, STS::one());
208 }
209 } else // singleton, can't access Containers_[i] as it was never filled and may be null.
210 {
211 const ISC one = STS::one();
212 size_t numVecs = X.extent(1);
213 LO LRID = blockRows_[blockOffsets_[i]];
214 getMatDiag();
215 auto diagView = Diag_->getLocalViewHost(Tpetra::Access::ReadOnly);
216 ISC recip = one / diagView(LRID, 0);
217 ISC wval = W(LRID, 0);
218 ISC combo = wval * recip;
219 ISC d = combo * (static_cast<ISC>(dampingFactor));
220 for (size_t nv = 0; nv < numVecs; nv++) {
221 ISC x = X(LRID, nv);
222 Y(LRID, nv) = x * d + Y(LRID, nv);
223 }
224 }
225 }
226}
227
228// Do Gauss-Seidel with just block i
229// This is used 3 times: once in DoGaussSeidel and twice in DoSGS
230template <class MatrixType, typename LocalScalarType>
232 ConstHostView X, HostView Y, HostView Y2, HostView Resid,
233 SC dampingFactor, LO i) const {
234 using Teuchos::ArrayView;
235 using STS = Teuchos::ScalarTraits<ISC>;
236 size_t numVecs = X.extent(1);
237 const ISC one = STS::one();
238 if (this->blockSizes_[i] == 0)
239 return; // Skip empty partitions
240 if (this->hasBlockCrs_ && !this->pointIndexed_) {
241 // Use efficient blocked version
242 ArrayView<const LO> blockRows = this->getBlockRows(i);
243 const size_t localNumRows = this->blockSizes_[i];
244 using inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
245 using vals_type = typename block_crs_matrix_type::values_host_view_type;
246 for (size_t j = 0; j < localNumRows; j++) {
247 LO row = blockRows[j]; // Containers_[i]->ID (j);
248 vals_type values;
249 inds_type colinds;
250 this->inputBlockMatrix_->getLocalRowView(row, colinds, values);
251 LO numEntries = (LO)colinds.size();
252 for (size_t m = 0; m < numVecs; m++) {
253 for (int localR = 0; localR < this->bcrsBlockSize_; localR++)
254 Resid(row * this->bcrsBlockSize_ + localR, m) = X(row * this->bcrsBlockSize_ + localR, m);
255 for (LO k = 0; k < numEntries; ++k) {
256 const LO col = colinds[k];
257 for (int localR = 0; localR < this->bcrsBlockSize_; localR++) {
258 for (int localC = 0; localC < this->bcrsBlockSize_; localC++) {
259 Resid(row * this->bcrsBlockSize_ + localR, m) -=
260 values[k * this->bcrsBlockSize_ * this->bcrsBlockSize_ + localR + localC * this->bcrsBlockSize_] * Y2(col * this->bcrsBlockSize_ + localC, m);
261 }
262 }
263 }
264 }
265 }
266 // solve with this block
267 //
268 // Note: I'm abusing the ordering information, knowing that X/Y
269 // and Y2 have the same ordering for on-proc unknowns.
270 //
271 // Note: Add flop counts for inverse apply
272 apply(Resid, Y2, i, Teuchos::NO_TRANS, dampingFactor, one);
273 } else if (!this->hasBlockCrs_ && this->blockSizes_[i] == 1) {
274 // singleton, can't access Containers_[i] as it was never filled and may be null.
275 // a singleton calculation (just using matrix diagonal) is exact, all residuals should be zero.
276 LO LRID = this->blockOffsets_[i]; // by definition, a singleton 1 row in block.
277 // Use the KokkosSparse internal matrix for low-overhead values/indices access
278 // But, can only do this if the matrix is accessible directly from host, since it's not a DualView
280 container_exec_space().fence();
281 auto localA = this->inputCrsMatrix_->getLocalMatrixHost();
282 using size_type = typename crs_matrix_type::local_matrix_host_type::size_type;
283 const auto& rowmap = localA.graph.row_map;
284 const auto& entries = localA.graph.entries;
285 const auto& values = localA.values;
286 this->getMatDiag();
287 auto diagView = this->Diag_->getLocalViewHost(Tpetra::Access::ReadOnly);
288 ISC d = (static_cast<ISC>(dampingFactor)) / diagView(LRID, 0);
289 for (size_t m = 0; m < numVecs; m++) {
290 // ISC x = X(LRID, m);
291 ISC r = X(LRID, m);
292 for (size_type k = rowmap(LRID); k < rowmap(LRID + 1); k++) {
293 const LO col = entries(k);
294 r -= values(k) * Y2(col, m);
295 }
296
297 ISC newy = r * d;
298 Y2(LRID, m) += newy;
299 }
300 } else if (!this->inputCrsMatrix_.is_null() &&
301 std::is_same<typename crs_matrix_type::device_type::memory_space, Kokkos::HostSpace>::value) {
302 // Use the KokkosSparse internal matrix for low-overhead values/indices access
303 // But, can only do this if the matrix is accessible directly from host, since it's not a DualView
305 container_exec_space().fence();
306 auto localA = this->inputCrsMatrix_->getLocalMatrixHost();
307 using size_type = typename crs_matrix_type::local_matrix_host_type::size_type;
308 const auto& rowmap = localA.graph.row_map;
309 const auto& entries = localA.graph.entries;
310 const auto& values = localA.values;
311 ArrayView<const LO> blockRows = this->getBlockRows(i);
312 for (size_t j = 0; j < size_t(blockRows.size()); j++) {
313 const LO row = blockRows[j];
314 for (size_t m = 0; m < numVecs; m++) {
315 ISC r = X(row, m);
316 for (size_type k = rowmap(row); k < rowmap(row + 1); k++) {
317 const LO col = entries(k);
318 r -= values(k) * Y2(col, m);
319 }
320 Resid(row, m) = r;
321 }
322 }
323 // solve with this block
324 //
325 // Note: I'm abusing the ordering information, knowing that X/Y
326 // and Y2 have the same ordering for on-proc unknowns.
327 //
328 // Note: Add flop counts for inverse apply
329 apply(Resid, Y2, i, Teuchos::NO_TRANS, dampingFactor, one);
330 } else {
331 // Either a point-indexed block matrix, or a normal row matrix
332 // that doesn't support getLocalMatrix
333 ArrayView<const LO> blockRows = this->getBlockRows(i);
334 for (size_t j = 0; j < size_t(blockRows.size()); j++) {
335 const LO row = blockRows[j];
336 auto rowView = getInputRowView(row);
337 for (size_t m = 0; m < numVecs; m++) {
338 Resid(row, m) = X(row, m);
339 for (size_t k = 0; k < rowView.size(); ++k) {
340 const LO col = rowView.ind(k);
341 Resid(row, m) -= rowView.val(k) * Y2(col, m);
342 }
343 }
344 }
345 // solve with this block
346 //
347 // Note: I'm abusing the ordering information, knowing that X/Y
348 // and Y2 have the same ordering for on-proc unknowns.
349 //
350 // Note: Add flop counts for inverse apply
351 apply(Resid, Y2, i, Teuchos::NO_TRANS, dampingFactor, one);
352 }
353}
354
355template <class MatrixType>
357 DoGaussSeidel(ConstHostView X, HostView Y, HostView Y2, SC dampingFactor) const {
358 using Teuchos::Array;
359 using Teuchos::ArrayRCP;
360 using Teuchos::ArrayView;
361 using Teuchos::Ptr;
362 using Teuchos::RCP;
363 using Teuchos::rcp;
364 using Teuchos::rcpFromRef;
365 // This function just extracts the diagonal if it hasn't already.
366 getMatDiag();
367 auto numVecs = X.extent(1);
368 // X = RHS, Y = initial guess
369 HostView Resid("", X.extent(0), X.extent(1));
370 for (LO i = 0; i < numBlocks_; i++) {
371 DoGSBlock(X, Y, Y2, Resid, dampingFactor, i);
372 }
373 if (IsParallel_) {
374 auto numMyRows = inputMatrix_->getLocalNumRows();
375 for (size_t m = 0; m < numVecs; ++m) {
376 for (size_t i = 0; i < numMyRows * bcrsBlockSize_; ++i) {
377 Y(i, m) = Y2(i, m);
378 }
379 }
380 }
381}
382
383template <class MatrixType>
384void Container<MatrixType>::
385 DoSGS(ConstHostView X, HostView Y, HostView Y2, SC dampingFactor) const {
386 // X = RHS, Y = initial guess
387 using Teuchos::Array;
388 using Teuchos::ArrayRCP;
389 using Teuchos::ArrayView;
390 using Teuchos::Ptr;
391 using Teuchos::ptr;
392 using Teuchos::RCP;
393 using Teuchos::rcp;
394 using Teuchos::rcpFromRef;
395 auto numVecs = X.extent(1);
396 HostView Resid("", X.extent(0), X.extent(1));
397 // Forward Sweep
398 for (LO i = 0; i < numBlocks_; i++) {
399 DoGSBlock(X, Y, Y2, Resid, dampingFactor, i);
400 }
401 static_assert(std::is_signed<LO>::value,
402 "Local ordinal must be signed (unsigned breaks reverse iteration to 0)");
403 // Reverse Sweep
404 for (LO i = numBlocks_ - 1; i >= 0; --i) {
405 DoGSBlock(X, Y, Y2, Resid, dampingFactor, i);
406 }
407 if (IsParallel_) {
408 auto numMyRows = inputMatrix_->getLocalNumRows();
409 for (size_t m = 0; m < numVecs; ++m) {
410 for (size_t i = 0; i < numMyRows * bcrsBlockSize_; ++i) {
411 Y(i, m) = Y2(i, m);
412 }
413 }
414 }
415}
416
417template <class MatrixType>
418void Container<MatrixType>::
419 clearBlocks() {
420 numBlocks_ = 0;
421 blockRows_.clear();
422 blockSizes_.clear();
423 blockOffsets_.clear();
424 Diag_ = Teuchos::null; // Diag_ will be recreated if needed
425}
426
427// Implementation of Ifpack2::ContainerImpl
428
429template <class MatrixType, class LocalScalarType>
432 const Teuchos::RCP<const row_matrix_type>& matrix,
433 const Teuchos::Array<Teuchos::Array<LO>>& partitions,
434 bool pointIndexed)
435 : Container<MatrixType>(matrix, partitions, pointIndexed) {}
437template <class MatrixType, class LocalScalarType>
440
441template <class MatrixType, class LocalScalarType>
443 setParameters(const Teuchos::ParameterList& List) {}
444
445template <class MatrixType, class LocalScalarType>
447 applyInverseJacobi(const mv_type& /* X */, mv_type& /* Y */,
448 SC dampingFactor,
449 bool /* zeroStartingSolution = false */,
450 int /* numSweeps = 1 */) const {
451 TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "Not implemented.");
452}
453
454template <class MatrixType, class LocalScalarType>
456 applyMV(const mv_type& X, mv_type& Y) const {
457 ConstHostView XView = X.getLocalViewHost(Tpetra::Access::ReadOnly);
458 HostView YView = Y.getLocalViewHost(Tpetra::Access::ReadWrite);
459 this->apply(XView, YView, 0);
461
462template <class MatrixType, class LocalScalarType>
464 weightedApplyMV(const mv_type& X,
465 mv_type& Y,
466 vector_type& W) const {
467 ConstHostView XView = X.getLocalViewHost(Tpetra::Access::ReadOnly);
468 HostView YView = Y.getLocalViewHost(Tpetra::Access::ReadWrite);
469 ConstHostView WView = W.getLocalViewHost(Tpetra::Access::ReadOnly);
470 weightedApply(XView, YView, WView, 0);
471}
472
473template <class MatrixType, class LocalScalarType>
475 getName() {
476 return "Generic";
477}
478
479template <class MatrixType, class LocalScalarType>
481 solveBlock(ConstHostSubviewLocal X,
482 HostSubviewLocal Y,
483 int blockIndex,
484 Teuchos::ETransp mode,
485 const LSC alpha,
486 const LSC beta) const {
487 TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "Not implemented.");
488}
489
490template <class MatrixType, class LocalScalarType>
491typename ContainerImpl<MatrixType, LocalScalarType>::LO
493 translateRowToCol(LO row) {
494 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
495 GO GO_INVALID = Teuchos::OrdinalTraits<GO>::invalid();
496 const map_type& globalRowMap = *(this->inputMatrix_->getRowMap());
497 const map_type& globalColMap = *(this->inputMatrix_->getColMap());
498 LO rowLID = row;
499 LO dofOffset = 0;
500 if (this->pointIndexed_) {
501 rowLID = row / this->bcrsBlockSize_;
502 dofOffset = row % this->bcrsBlockSize_;
503 }
504 GO diagGID = globalRowMap.getGlobalElement(rowLID);
505 TEUCHOS_TEST_FOR_EXCEPTION(
506 diagGID == GO_INVALID,
507 std::runtime_error,
508 "Ifpack2::Container::translateRowToCol: "
509 "On process "
510 << this->inputMatrix_->getRowMap()->getComm()->getRank() << ", at least one row index in the set of local "
511 "row indices given to the constructor is not a valid local row index in "
512 "the input matrix's row Map on this process. This should be impossible "
513 "because the constructor checks for this case. Here is the complete set "
514 "of invalid local row indices: "
515 << rowLID << ". "
516 "Please report this bug to the Ifpack2 developers.");
517 // now, can translate diagGID (both a global row AND global col ID) to local column
518 LO colLID = globalColMap.getLocalElement(diagGID);
519 TEUCHOS_TEST_FOR_EXCEPTION(
520 colLID == LO_INVALID,
521 std::runtime_error,
522 "Ifpack2::Container::translateRowToCol: "
523 "On process "
524 << this->inputMatrix_->getRowMap()->getComm()->getRank() << ", "
525 "at least one row index in the set of row indices given to the constructor "
526 "does not have a corresponding column index in the input matrix's column "
527 "Map. This probably means that the column(s) in question is/are empty on "
528 "this process, which would make the submatrix to extract structurally "
529 "singular. The invalid global column index is "
530 << diagGID << ".");
531 // colLID could identify a block column - translate to split column if needed
532 if (this->pointIndexed_)
533 return colLID * this->bcrsBlockSize_ + dofOffset;
534 return colLID;
535}
536
537template <class MatrixType, class LocalScalarType>
539 apply(ConstHostView X,
540 HostView Y,
541 int blockIndex,
542 Teuchos::ETransp mode,
543 SC alpha,
544 SC beta) const {
545 using Teuchos::ArrayView;
546 using Teuchos::as;
547 using Teuchos::RCP;
548 using Teuchos::rcp;
549
550 // The local operator might have a different Scalar type than
551 // MatrixType. This means that we might have to convert X and Y to
552 // the Tpetra::MultiVector specialization that the local operator
553 // wants. This class' X_ and Y_ internal fields are of the right
554 // type for the local operator, so we can use those as targets.
555
557
558 TEUCHOS_TEST_FOR_EXCEPTION(
559 !this->IsComputed_, std::runtime_error,
560 "Ifpack2::Container::apply: "
561 "You must have called the compute() method before you may call apply(). "
562 "You may call the apply() method as many times as you want after calling "
563 "compute() once, but you must have called compute() at least once.");
564
565 const size_t numVecs = X.extent(1);
566
567 if (numVecs == 0) {
568 return; // done! nothing to do
569 }
570
571 // The local operator works on a permuted subset of the local parts
572 // of X and Y. The subset and permutation are defined by the index
573 // array returned by getBlockRows(). If the permutation is trivial
574 // and the subset is exactly equal to the local indices, then we
575 // could use the local parts of X and Y exactly, without needing to
576 // permute. Otherwise, we have to use temporary storage to permute
577 // X and Y. For now, we always use temporary storage.
578 //
579 // Create temporary permuted versions of the input and output.
580 // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
581 // store the permuted versions of X resp. Y. Note that X_local has
582 // the domain Map of the operator, which may be a permuted subset of
583 // the local Map corresponding to X.getMap(). Similarly, Y_local
584 // has the range Map of the operator, which may be a permuted subset
585 // of the local Map corresponding to Y.getMap(). numRows_ here
586 // gives the number of rows in the row Map of the local Inverse_
587 // operator.
588 //
589 // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
590 // here that the row Map and the range Map of that operator are
591 // the same.
592 //
593 // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
594 // really belongs in Tpetra.
595
596 if (X_localBlocks_.size() == 0 || X.extent(1) != X_local_.extent(1)) {
597 // need to resize (or create for the first time) the three scratch arrays
598 X_localBlocks_.clear();
599 Y_localBlocks_.clear();
600 X_localBlocks_.reserve(this->numBlocks_);
601 Y_localBlocks_.reserve(this->numBlocks_);
602
603 X_local_ = HostViewLocal("X_local", this->blockRows_.size() * this->scalarsPerRow_, numVecs);
604 Y_local_ = HostViewLocal("Y_local", this->blockRows_.size() * this->scalarsPerRow_, numVecs);
605
606 // create all X_local and Y_local managed Views at once, are
607 // reused in subsequent apply() calls
608 for (int i = 0; i < this->numBlocks_; i++) {
609 auto blockBounds = std::make_pair(this->blockOffsets_[i] * this->scalarsPerRow_,
610 (this->blockOffsets_[i] + this->blockSizes_[i]) * this->scalarsPerRow_);
611 X_localBlocks_.emplace_back(X_local_, blockBounds, Kokkos::ALL());
612 Y_localBlocks_.emplace_back(Y_local_, blockBounds, Kokkos::ALL());
613 }
614 }
615
616 const ArrayView<const LO> blockRows = this->getBlockRows(blockIndex);
617
618 if (this->scalarsPerRow_ == 1)
619 mvgs.gatherViewToView(X_localBlocks_[blockIndex], X, blockRows);
620 else
621 mvgs.gatherViewToViewBlock(X_localBlocks_[blockIndex], X, blockRows, this->scalarsPerRow_);
622
623 // We must gather the contents of the output multivector Y even on
624 // input to solveBlock(), since the inverse operator might use it as
625 // an initial guess for a linear solve. We have no way of knowing
626 // whether it does or does not.
627
628 if (this->scalarsPerRow_ == 1)
629 mvgs.gatherViewToView(Y_localBlocks_[blockIndex], Y, blockRows);
630 else
631 mvgs.gatherViewToViewBlock(Y_localBlocks_[blockIndex], Y, blockRows, this->scalarsPerRow_);
632
633 // Apply the local operator:
634 // Y_local := beta*Y_local + alpha*M^{-1}*X_local
635 this->solveBlock(X_localBlocks_[blockIndex], Y_localBlocks_[blockIndex], blockIndex, mode,
636 LSC(alpha), LSC(beta));
637
638 // Scatter the permuted subset output vector Y_local back into the
639 // original output multivector Y.
640 if (this->scalarsPerRow_ == 1)
641 mvgs.scatterViewToView(Y, Y_localBlocks_[blockIndex], blockRows);
642 else
643 mvgs.scatterViewToViewBlock(Y, Y_localBlocks_[blockIndex], blockRows, this->scalarsPerRow_);
644}
645
646template <class MatrixType, class LocalScalarType>
648 weightedApply(ConstHostView X,
649 HostView Y,
650 ConstHostView D,
651 int blockIndex,
652 Teuchos::ETransp mode,
653 SC alpha,
654 SC beta) const {
655 using std::endl;
656 using Teuchos::ArrayRCP;
657 using Teuchos::ArrayView;
658 using Teuchos::Ptr;
659 using Teuchos::ptr;
660 using Teuchos::Range1D;
661 using Teuchos::RCP;
662 using Teuchos::rcp;
663 using Teuchos::rcp_const_cast;
664 using STS = Teuchos::ScalarTraits<SC>;
665
666 // The local operator template parameter might have a different
667 // Scalar type than MatrixType. This means that we might have to
668 // convert X and Y to the Tpetra::MultiVector specialization that
669 // the local operator wants. This class' X_ and Y_ internal fields
670 // are of the right type for the local operator, so we can use those
671 // as targets.
672
673 const char prefix[] = "Ifpack2::Container::weightedApply: ";
674 TEUCHOS_TEST_FOR_EXCEPTION(
675 !this->IsComputed_, std::runtime_error, prefix << "You must have called the "
676 "compute() method before you may call this method. You may call "
677 "weightedApply() as many times as you want after calling compute() once, "
678 "but you must have called compute() at least once first.");
679
680 // bmk 7-2019: BlockRelaxation already checked this, but if that changes...
681 TEUCHOS_TEST_FOR_EXCEPTION(
682 this->scalarsPerRow_ > 1, std::logic_error, prefix << "Use of block rows isn't allowed "
683 "in overlapping Jacobi (the only method that uses weightedApply");
684
685 const size_t numVecs = X.extent(1);
686
687 TEUCHOS_TEST_FOR_EXCEPTION(
688 X.extent(1) != Y.extent(1), std::runtime_error,
689 prefix << "X and Y have different numbers of vectors (columns). X has "
690 << X.extent(1) << ", but Y has " << Y.extent(1) << ".");
691
692 if (numVecs == 0) {
693 return; // done! nothing to do
694 }
695
696 const size_t numRows = this->blockSizes_[blockIndex];
697
698 // The local operator works on a permuted subset of the local parts
699 // of X and Y. The subset and permutation are defined by the index
700 // array returned by getBlockRows(). If the permutation is trivial
701 // and the subset is exactly equal to the local indices, then we
702 // could use the local parts of X and Y exactly, without needing to
703 // permute. Otherwise, we have to use temporary storage to permute
704 // X and Y. For now, we always use temporary storage.
705 //
706 // Create temporary permuted versions of the input and output.
707 // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
708 // store the permuted versions of X resp. Y. Note that X_local has
709 // the domain Map of the operator, which may be a permuted subset of
710 // the local Map corresponding to X.getMap(). Similarly, Y_local
711 // has the range Map of the operator, which may be a permuted subset
712 // of the local Map corresponding to Y.getMap(). numRows_ here
713 // gives the number of rows in the row Map of the local operator.
714 //
715 // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
716 // here that the row Map and the range Map of that operator are
717 // the same.
718 //
719 // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
720 // really belongs in Tpetra.
721 if (X_localBlocks_.size() == 0 || X.extent(1) != X_local_.extent(1)) {
722 // need to resize (or create for the first time) the three scratch arrays
723 X_localBlocks_.clear();
724 Y_localBlocks_.clear();
725 X_localBlocks_.reserve(this->numBlocks_);
726 Y_localBlocks_.reserve(this->numBlocks_);
727
728 X_local_ = HostViewLocal("X_local", this->blockRows_.size() * this->scalarsPerRow_, numVecs);
729 Y_local_ = HostViewLocal("Y_local", this->blockRows_.size() * this->scalarsPerRow_, numVecs);
730
731 // create all X_local and Y_local managed Views at once, are
732 // reused in subsequent apply() calls
733 for (int i = 0; i < this->numBlocks_; i++) {
734 auto blockBounds = std::make_pair(this->blockOffsets_[i] * this->scalarsPerRow_,
735 (this->blockOffsets_[i] + this->blockSizes_[i]) * this->scalarsPerRow_);
736 X_localBlocks_.emplace_back(X_local_, blockBounds, Kokkos::ALL());
737 Y_localBlocks_.emplace_back(Y_local_, blockBounds, Kokkos::ALL());
738 }
739 }
740 if (int(weightedApplyScratch_.extent(0)) != 3 * this->maxBlockSize_ ||
741 weightedApplyScratch_.extent(1) != numVecs) {
742 weightedApplyScratch_ = HostViewLocal("weightedApply scratch", 3 * this->maxBlockSize_, numVecs);
743 }
744
745 ArrayView<const LO> blockRows = this->getBlockRows(blockIndex);
746
748
749 // note: BlockCrs w/ weighted Jacobi isn't allowed, so no need to use block gather/scatter
750 mvgs.gatherViewToView(X_localBlocks_[blockIndex], X, blockRows);
751 // We must gather the output multivector Y even on input to
752 // solveBlock(), since the local operator might use it as an initial
753 // guess for a linear solve. We have no way of knowing whether it
754 // does or does not.
755
756 mvgs.gatherViewToView(Y_localBlocks_[blockIndex], Y, blockRows);
757
758 // Apply the diagonal scaling D to the input X. It's our choice
759 // whether the result has the original input Map of X, or the
760 // permuted subset Map of X_local. If the latter, we also need to
761 // gather D into the permuted subset Map. We choose the latter, to
762 // save memory and computation. Thus, we do the following:
763 //
764 // 1. Gather D into a temporary vector D_local.
765 // 2. Create a temporary X_scaled to hold diag(D_local) * X_local.
766 // 3. Compute X_scaled := diag(D_loca) * X_local.
767 auto maxBS = this->maxBlockSize_;
768 auto bs = this->blockSizes_[blockIndex] * this->scalarsPerRow_;
769
770 HostSubviewLocal D_local(weightedApplyScratch_, std::make_pair(0, bs), std::make_pair(0, 1));
771 mvgs.gatherViewToView(D_local, D, blockRows);
772 HostSubviewLocal X_scaled(weightedApplyScratch_, std::make_pair(maxBS, maxBS + bs), Kokkos::ALL());
773 for (size_t j = 0; j < numVecs; j++)
774 for (size_t i = 0; i < numRows; i++)
775 X_scaled(i, j) = X_localBlocks_[blockIndex](i, j) * D_local(i, 0);
776
777 HostSubviewLocal Y_temp(weightedApplyScratch_, std::make_pair(maxBS * 2, maxBS * 2 + bs), Kokkos::ALL());
778 // Apply the local operator: Y_temp := M^{-1} * X_scaled
779 this->solveBlock(X_scaled, Y_temp, blockIndex, mode, STS::one(), STS::zero());
780 // Y_local := beta * Y_local + alpha * diag(D_local) * Y_temp.
781 //
782 // Note that we still use the permuted subset scaling D_local here,
783 // because Y_temp has the same permuted subset Map. That's good, in
784 // fact, because it's a subset: less data to read and multiply.
785 LISC a = alpha;
786 LISC b = beta;
787 for (size_t j = 0; j < numVecs; j++)
788 for (size_t i = 0; i < numRows; i++)
789 Y_localBlocks_[blockIndex](i, j) = b * Y_localBlocks_[blockIndex](i, j) + a * Y_temp(i, j) * D_local(i, 0);
790
791 // Copy the permuted subset output vector Y_local into the original
792 // output multivector Y.
793 mvgs.scatterViewToView(Y, Y_localBlocks_[blockIndex], blockRows);
794}
795
796template <class MatrixType, class LocalScalarType>
798 typename ContainerImpl<MatrixType, LocalScalarType>::SC,
799 typename ContainerImpl<MatrixType, LocalScalarType>::LO,
800 typename ContainerImpl<MatrixType, LocalScalarType>::GO,
801 typename ContainerImpl<MatrixType, LocalScalarType>::NO>
803 getInputRowView(LO row) const {
804 typedef typename MatrixType::nonconst_local_inds_host_view_type nonconst_local_inds_host_view_type;
805 typedef typename MatrixType::nonconst_values_host_view_type nonconst_values_host_view_type;
806
807 typedef typename MatrixType::local_inds_host_view_type local_inds_host_view_type;
808 typedef typename MatrixType::values_host_view_type values_host_view_type;
809 using IST = typename row_matrix_type::impl_scalar_type;
810
811 if (this->hasBlockCrs_) {
812 using h_inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
813 using h_vals_type = typename block_crs_matrix_type::values_host_view_type;
814 h_inds_type colinds;
815 h_vals_type values;
816 this->inputBlockMatrix_->getLocalRowView(row / this->bcrsBlockSize_, colinds, values);
817 size_t numEntries = colinds.size();
818 // CMS: Can't say I understand what this really does
819 // return StridedRowView(values + row % this->bcrsBlockSize_, colinds, this->bcrsBlockSize_, numEntries * this->bcrsBlockSize_);
820 h_vals_type subvals = Kokkos::subview(values, std::pair<size_t, size_t>(row % this->bcrsBlockSize_, values.size()));
821 return StridedRowView(subvals, colinds, this->bcrsBlockSize_, numEntries * this->bcrsBlockSize_);
822 } else if (!this->inputMatrix_->supportsRowViews()) {
823 size_t maxEntries = this->inputMatrix_->getLocalMaxNumRowEntries();
824 Teuchos::Array<LO> inds(maxEntries);
825 Teuchos::Array<SC> vals(maxEntries);
826 nonconst_local_inds_host_view_type inds_v(inds.data(), maxEntries);
827 nonconst_values_host_view_type vals_v(reinterpret_cast<IST*>(vals.data()), maxEntries);
828 size_t numEntries;
829 this->inputMatrix_->getLocalRowCopy(row, inds_v, vals_v, numEntries);
830 vals.resize(numEntries);
831 inds.resize(numEntries);
832 return StridedRowView(vals, inds);
833 } else {
834 // CMS - This is dangerous and might not work.
835 local_inds_host_view_type colinds;
836 values_host_view_type values;
837 this->inputMatrix_->getLocalRowView(row, colinds, values);
838 return StridedRowView(values, colinds, 1, colinds.size());
839 }
840}
841
842template <class MatrixType, class LocalScalarType>
844 clearBlocks() {
845 X_localBlocks_.clear();
846 Y_localBlocks_.clear();
847 X_local_ = HostViewLocal();
848 Y_local_ = HostViewLocal();
850}
851
852namespace Details {
853
854// Implementation of Ifpack2::Details::StridedRowView
855template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
857 StridedRowView(h_vals_type vals_, h_inds_type inds_, int blockSize_, size_t nnz_)
858 : vals(vals_)
859 , inds(inds_)
860 , blockSize(blockSize_)
861 , nnz(nnz_) {}
862
863template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
865 StridedRowView(Teuchos::Array<SC>& vals_, Teuchos::Array<LO>& inds_)
866 : vals()
867 , inds()
868 , blockSize(1)
869 , nnz(vals_.size()) {
870 valsCopy.swap(vals_);
871 indsCopy.swap(inds_);
872}
873
874template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
876 val(size_t i) const {
877#ifdef HAVE_IFPACK2_DEBUG
878 TEUCHOS_TEST_FOR_EXCEPTION(i >= nnz, std::runtime_error,
879 "Out-of-bounds access into Ifpack2::Container::StridedRowView");
880#endif
881 if (vals.size() > 0) {
882 if (blockSize == 1)
883 return vals[i];
884 else
885 return vals[i * blockSize];
886 } else
887 return valsCopy[i];
888}
889
890template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
891LocalOrdinal StridedRowView<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
892 ind(size_t i) const {
893#ifdef HAVE_IFPACK2_DEBUG
894 TEUCHOS_TEST_FOR_EXCEPTION(i >= nnz, std::runtime_error,
895 "Out-of-bounds access into Ifpack2::Container::StridedRowView");
896#endif
897 // inds is smaller than vals by a factor of the block size (dofs/node)
898 if (inds.size() > 0) {
899 if (blockSize == 1)
900 return inds[i];
901 else
902 return inds[i / blockSize] * blockSize + i % blockSize;
903 } else
904 return indsCopy[i];
905}
906
907template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
908size_t StridedRowView<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
909 size() const {
910 return nnz;
911}
912} // namespace Details
913
914} // namespace Ifpack2
915
916template <class MatrixType>
917std::ostream& operator<<(std::ostream& os, const Ifpack2::Container<MatrixType>& obj) {
918 return obj.print(os);
919}
920
921#define IFPACK2_CONTAINER_INSTANT(S, LO, GO, N) \
922 template class Ifpack2::Container<Tpetra::RowMatrix<S, LO, GO, N>>; \
923 template class Ifpack2::ContainerImpl<Tpetra::RowMatrix<S, LO, GO, N>, S>; \
924 template class Ifpack2::Details::StridedRowView<S, LO, GO, N>; \
925 template std::ostream& operator<< <Tpetra::RowMatrix<S, LO, GO, N>>( \
926 std::ostream& os, const Ifpack2::Container<Tpetra::RowMatrix<S, LO, GO, N>>& obj);
927
928#endif
Declaration and definition of the Ifpack2::Details::MultiVectorLocalGatherScatter class.
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS)....
Definition Ifpack2_Container_decl.hpp:311
LocalScalarType LSC
The internal representation of LocalScalarType in Kokkos::View.
Definition Ifpack2_Container_decl.hpp:330
void DoGSBlock(ConstHostView X, HostView Y, HostView Y2, HostView Resid, SC dampingFactor, LO i) const
Do one step of Gauss-Seidel on block i (used by DoGaussSeidel and DoSGS)
Definition Ifpack2_Container_def.hpp:231
Interface for creating and solving a set of local linear problems.
Definition Ifpack2_Container_decl.hpp:79
virtual ~Container()
Destructor.
Definition Ifpack2_Container_def.hpp:80
bool hasBlockCrs_
Whether the input matrix is a BlockCRS matrix.
Definition Ifpack2_Container_decl.hpp:279
bool IsParallel_
Whether the problem is distributed across multiple MPI processes.
Definition Ifpack2_Container_decl.hpp:271
int numBlocks_
The number of blocks (partitions) in the container.
Definition Ifpack2_Container_decl.hpp:261
Teuchos::ArrayView< const LO > getBlockRows(int blockIndex) const
Local indices of the rows of the input matrix that belong to this block.
Definition Ifpack2_Container_def.hpp:84
static std::string getName()
Definition Ifpack2_Container_def.hpp:146
typename Kokkos::ArithTraits< SC >::val_type ISC
Internal representation of Scalar in Kokkos::View.
Definition Ifpack2_Container_decl.hpp:104
int bcrsBlockSize_
If hasBlockCrs_, the number of DOFs per vertex. Otherwise 1.
Definition Ifpack2_Container_decl.hpp:281
Teuchos::RCP< const block_crs_matrix_type > inputBlockMatrix_
The input matrix, dynamic cast to BlockCrsMatrix. May be null.
Definition Ifpack2_Container_decl.hpp:258
virtual void DoGSBlock(ConstHostView X, HostView Y, HostView Y2, HostView Resid, SC dampingFactor, LO i) const
Do one step of Gauss-Seidel on block i (used by DoGaussSeidel and DoSGS)
Definition Ifpack2_Container_def.hpp:151
virtual void applyMV(const mv_type &X, mv_type &Y) const
Wrapper for apply with MultiVector.
Definition Ifpack2_Container_def.hpp:134
LO NumLocalRows_
Number of local rows in input matrix.
Definition Ifpack2_Container_decl.hpp:273
bool isInitialized() const
Whether the container has been successfully initialized.
Definition Ifpack2_Container_def.hpp:123
void setBlockSizes(const Teuchos::Array< Teuchos::Array< LO > > &partitions)
Initialize arrays with information about block sizes.
Definition Ifpack2_Container_def.hpp:89
LO scalarsPerRow_
Definition Ifpack2_Container_decl.hpp:286
Teuchos::Array< LO > blockSizes_
Number of rows in each block.
Definition Ifpack2_Container_decl.hpp:265
GO NumGlobalNonzeros_
Number of nonzeros in input matrix.
Definition Ifpack2_Container_decl.hpp:277
Container(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, bool pointIndexed)
Constructor.
Definition Ifpack2_Container_def.hpp:21
virtual std::ostream & print(std::ostream &os) const =0
Print basic information about the container to os.
virtual void weightedApplyMV(const mv_type &X, mv_type &Y, vector_type &W) const
Wrapper for weightedApply with MultiVector.
Definition Ifpack2_Container_def.hpp:140
Teuchos::RCP< const row_matrix_type > inputMatrix_
The input matrix to the constructor.
Definition Ifpack2_Container_decl.hpp:252
bool pointIndexed_
(If hasBlockCrs_) Whether the blocks are described using sub-block row indices instead of full block ...
Definition Ifpack2_Container_decl.hpp:283
GO NumGlobalRows_
Number of global rows in input matrix.
Definition Ifpack2_Container_decl.hpp:275
bool isComputed() const
Whether the container has been successfully computed.
Definition Ifpack2_Container_def.hpp:128
typename mv_type::dual_view_type::t_host HostView
Definition Ifpack2_Container_decl.hpp:109
Ifpack2's implementation of Trilinos::Details::LinearSolver interface.
Definition Ifpack2_Details_LinearSolver_decl.hpp:75
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40
Structure for read-only views of general matrix rows.
Definition Ifpack2_Container_decl.hpp:506
StridedRowView(h_vals_type vals_, h_inds_type inds_, int blockSize_, size_t nnz_)
Constructor for row views (preferred)
Definition Ifpack2_Container_def.hpp:857