10#ifndef IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
11#define IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
13#include "Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp"
14#include "KokkosKernels_Sorting.hpp"
15#include "KokkosSparse_SortCrs.hpp"
16#include "Kokkos_Bitset.hpp"
21template <
class MatrixType>
24 const Teuchos::ArrayRCP<local_ordinal_type>& perm,
25 const Teuchos::ArrayRCP<local_ordinal_type>& reverseperm,
26 bool filterSingletons) {
27 setup(A_unfiltered, perm, reverseperm, filterSingletons);
30template <
class MatrixType>
32 setup(
const Teuchos::RCP<const row_matrix_type>& A_unfiltered,
33 const Teuchos::ArrayRCP<local_ordinal_type>& perm,
34 const Teuchos::ArrayRCP<local_ordinal_type>& reverseperm,
35 bool filterSingletons) {
38 using Teuchos::rcp_dynamic_cast;
42 TEUCHOS_TEST_FOR_EXCEPTION(
43 !rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered) &&
44 !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered),
45 std::invalid_argument,
46 "Ifpack2::Details::AdditiveSchwarzFilter: The input matrix must be a Tpetra::CrsMatrix or an Ifpack2::OverlappingRowMatrix");
47 A_unfiltered_ = A_unfiltered;
48 local_matrix_type A_local, A_halo;
49 bool haveHalo = !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_).is_null();
51 auto A_overlapping = rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_);
52 A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
53 A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
55 auto A_crs = rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered_);
56 A_local = A_crs->getLocalMatrixDevice();
60 TEUCHOS_TEST_FOR_EXCEPTION(
61 perm.size() != reverseperm.size(),
62 std::invalid_argument,
63 "Ifpack2::Details::AdditiveSchwarzFilter: perm and reverseperm should be the same length");
64 TEUCHOS_TEST_FOR_EXCEPTION(
65 (
size_t)perm.size() != (
size_t)A_unfiltered_->getLocalNumRows(),
66 std::invalid_argument,
67 "Ifpack2::Details::AdditiveSchwarzFilter: length of perm and reverseperm should be the same as the number of local rows in unfiltered A");
69 FilterSingletons_ = filterSingletons;
70 local_ordinal_type numLocalRows;
71 local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
72 if (FilterSingletons_) {
74 singletons_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"singletons_"), totalLocalRows);
75 singletonDiagonals_ = Kokkos::DualView<impl_scalar_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"singletonDiagonals_"), totalLocalRows);
76 auto singletonsDevice = singletons_.view_device();
77 singletons_.modify_device();
78 auto singletonDiagonalsDevice = singletonDiagonals_.view_device();
79 singletonDiagonals_.modify_device();
80 local_ordinal_type numSingletons;
81 Kokkos::Bitset<device_type> isSingletonBitset(totalLocalRows);
82 Kokkos::parallel_scan(
83 policy_type(0, totalLocalRows),
84 KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type & lnumSingletons,
bool finalPass) {
85 bool isSingleton =
true;
86#if KOKKOS_VERSION >= 40799
87 impl_scalar_type singletonValue = KokkosKernels::ArithTraits<impl_scalar_type>::zero();
89 impl_scalar_type singletonValue = Kokkos::ArithTraits<impl_scalar_type>::zero();
91 if (i < A_local.numRows()) {
93 size_type rowBegin = A_local.graph.row_map(i);
94 size_type rowEnd = A_local.graph.row_map(i + 1);
95 for (size_type j = rowBegin; j < rowEnd; j++) {
96 local_ordinal_type entry = A_local.graph.entries(j);
97 if (entry < totalLocalRows && entry != i) {
101 if (finalPass && entry == i) {
103 singletonValue += A_local.values(j);
108 local_ordinal_type row = i - A_local.numRows();
109 size_type rowBegin = A_halo.graph.row_map(row);
110 size_type rowEnd = A_halo.graph.row_map(row + 1);
111 for (size_type j = rowBegin; j < rowEnd; j++) {
112 local_ordinal_type entry = A_halo.graph.entries(j);
113 if (entry < totalLocalRows && entry != i) {
117 if (finalPass && entry == i) {
118 singletonValue += A_halo.values(j);
124 isSingletonBitset.set(i);
125 singletonsDevice(lnumSingletons) = i;
126 singletonDiagonalsDevice(lnumSingletons) = singletonValue;
132 numSingletons_ = numSingletons;
134 numLocalRows = totalLocalRows - numSingletons_;
136 perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"perm_"), totalLocalRows);
137 perm_.modify_device();
138 reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"perm_"), numLocalRows);
139 reverseperm_.modify_device();
140 auto permView = perm_.view_device();
141 auto reversepermView = reverseperm_.view_device();
143 Kokkos::View<local_ordinal_type*, device_type> origpermView(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"input reverse perm"), totalLocalRows);
144 Kokkos::View<local_ordinal_type*, Kokkos::HostSpace> origpermHost(reverseperm.get(), totalLocalRows);
145 Kokkos::deep_copy(execution_space(), origpermView, origpermHost);
147 Kokkos::parallel_scan(
148 policy_type(0, totalLocalRows),
149 KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type & lindex,
bool finalPass) {
150 local_ordinal_type origRow = origpermView(i);
151 if (!isSingletonBitset.test(origRow)) {
154 reversepermView(lindex) = origRow;
155 permView(origRow) = lindex;
161 permView(origRow) = local_ordinal_type(-1);
165 reverseperm_.sync_host();
168 perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"perm_"), perm.size());
170 auto permHost = perm_.view_host();
171 for (
size_t i = 0; i < (size_t)perm.size(); i++)
172 permHost(i) = perm[i];
174 reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"reverseperm_"), reverseperm.size());
175 reverseperm_.modify_host();
176 auto reversepermHost = reverseperm_.view_host();
177 for (
size_t i = 0; i < (size_t)reverseperm.size(); i++)
178 reversepermHost(i) = reverseperm[i];
179 reverseperm_.sync_device();
181 numLocalRows = totalLocalRows;
183 auto permView = perm_.view_device();
184 auto reversepermView = reverseperm_.view_device();
187 row_map_type localRowptrs(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Filtered rowptrs"), numLocalRows + 1);
188 Kokkos::parallel_for(
189 policy_type(0, numLocalRows + 1),
190 KOKKOS_LAMBDA(local_ordinal_type i) {
191 if (i == numLocalRows) {
197 local_ordinal_type numEntries = 0;
198 local_ordinal_type origRow = reversepermView(i);
199 if (origRow < A_local.numRows()) {
201 size_type rowBegin = A_local.graph.row_map(origRow);
202 size_type rowEnd = A_local.graph.row_map(origRow + 1);
203 for (size_type j = rowBegin; j < rowEnd; j++) {
204 local_ordinal_type entry = A_local.graph.entries(j);
205 if (entry < totalLocalRows && permView(entry) != -1)
210 local_ordinal_type row = origRow - A_local.numRows();
211 size_type rowBegin = A_halo.graph.row_map(row);
212 size_type rowEnd = A_halo.graph.row_map(row + 1);
213 for (size_type j = rowBegin; j < rowEnd; j++) {
214 local_ordinal_type entry = A_halo.graph.entries(j);
215 if (entry < totalLocalRows && permView(entry) != -1)
219 localRowptrs(i) = numEntries;
222 size_type numLocalEntries;
223 Kokkos::parallel_scan(
224 policy_type(0, numLocalRows + 1),
225 KOKKOS_LAMBDA(local_ordinal_type i, size_type & lnumEntries,
bool finalPass) {
226 size_type numEnt = localRowptrs(i);
228 localRowptrs(i) = lnumEntries;
229 lnumEntries += numEnt;
233 entries_type localEntries(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Filtered entries"), numLocalEntries);
234 values_type localValues(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Filtered values"), numLocalEntries);
236 local_matrix_type localMatrix(
"AdditiveSchwarzFilter", numLocalRows, numLocalRows, numLocalEntries, localValues, localRowptrs, localEntries);
237 fillLocalMatrix(localMatrix);
239#ifdef HAVE_IFPACK2_DEBUG
240 TEUCHOS_TEST_FOR_EXCEPTION(
241 !mapPairsAreFitted(*A_unfiltered_), std::invalid_argument,
242 "Ifpack2::LocalFilter: "
243 "A's Map pairs are not fitted to each other on Process "
244 << A_->getRowMap()->getComm()->getRank() <<
" of the input matrix's "
246 "This means that LocalFilter does not currently know how to work with A. "
247 "This will change soon. Please see discussion of Bug 5992.");
251 RCP<const Teuchos::Comm<int> > localComm;
253 localComm = rcp(
new Teuchos::MpiComm<int>(MPI_COMM_SELF));
255 localComm = rcp(
new Teuchos::SerialComm<int>());
258 localMap_ = rcp(
new map_type(numLocalRows, 0, localComm, Tpetra::GloballyDistributed));
260 auto crsParams = rcp(
new Teuchos::ParameterList);
261 crsParams->template set<bool>(
"sorted",
true);
268 A_ = rcp(
new crs_matrix_type(localMap_, localMap_, localMatrix, crsParams));
271template <
class MatrixType>
274template <
class MatrixType>
277 auto localMatrix = A_->getLocalMatrixDevice();
279 fillLocalMatrix(localMatrix);
280 A_->setAllValues(localMatrix.graph.row_map, localMatrix.graph.entries, localMatrix.values);
283template <
class MatrixType>
284Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::crs_matrix_type>
289template <
class MatrixType>
291 auto localRowptrs = localMatrix.graph.row_map;
292 auto localEntries = localMatrix.graph.entries;
293 auto localValues = localMatrix.values;
294 auto permView = perm_.view_device();
295 auto reversepermView = reverseperm_.view_device();
296 local_matrix_type A_local, A_halo;
297 bool haveHalo = !Teuchos::rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_).is_null();
299 auto A_overlapping = Teuchos::rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_);
300 A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
301 A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
303 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered_);
304 A_local = A_crs->getLocalMatrixDevice();
307 local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
309 Kokkos::parallel_for(
310 policy_type(0, localMatrix.numRows()),
311 KOKKOS_LAMBDA(local_ordinal_type i) {
314 size_type outRowStart = localRowptrs(i);
315 local_ordinal_type insertPos = 0;
316 local_ordinal_type origRow = reversepermView(i);
317 if (origRow < A_local.numRows()) {
319 size_type rowBegin = A_local.graph.row_map(origRow);
320 size_type rowEnd = A_local.graph.row_map(origRow + 1);
321 for (size_type j = rowBegin; j < rowEnd; j++) {
322 local_ordinal_type entry = A_local.graph.entries(j);
323 if (entry < totalLocalRows) {
324 auto newCol = permView(entry);
326 localEntries(outRowStart + insertPos) = newCol;
327 localValues(outRowStart + insertPos) = A_local.values(j);
334 local_ordinal_type row = origRow - A_local.numRows();
335 size_type rowBegin = A_halo.graph.row_map(row);
336 size_type rowEnd = A_halo.graph.row_map(row + 1);
337 for (size_type j = rowBegin; j < rowEnd; j++) {
338 local_ordinal_type entry = A_halo.graph.entries(j);
339 if (entry < totalLocalRows) {
340 auto newCol = permView(entry);
342 localEntries(outRowStart + insertPos) = newCol;
343 localValues(outRowStart + insertPos) = A_halo.values(j);
351 KokkosSparse::sort_crs_matrix(localMatrix);
354template <
class MatrixType>
356 return localMap_->getComm();
359template <
class MatrixType>
360Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
365template <
class MatrixType>
366Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
371template <
class MatrixType>
372Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
377template <
class MatrixType>
378Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
383template <
class MatrixType>
384Teuchos::RCP<
const Tpetra::RowGraph<
typename MatrixType::local_ordinal_type,
385 typename MatrixType::global_ordinal_type,
386 typename MatrixType::node_type> >
392 return A_unfiltered_->getGraph();
395template <
class MatrixType>
397 return A_->getBlockSize();
400template <
class MatrixType>
402 return A_->getGlobalNumRows();
405template <
class MatrixType>
407 return A_->getGlobalNumCols();
410template <
class MatrixType>
412 return A_->getLocalNumRows();
415template <
class MatrixType>
417 return A_->getLocalNumCols();
420template <
class MatrixType>
422 return A_->getIndexBase();
425template <
class MatrixType>
427 return getLocalNumEntries();
430template <
class MatrixType>
432 return A_->getLocalNumEntries();
435template <
class MatrixType>
438 return A_->getNumEntriesInGlobalRow(globalRow);
441template <
class MatrixType>
444 return A_->getNumEntriesInLocalRow(localRow);
447template <
class MatrixType>
451 return A_unfiltered_->getGlobalMaxNumRowEntries();
454template <
class MatrixType>
457 return A_unfiltered_->getLocalMaxNumRowEntries();
460template <
class MatrixType>
465template <
class MatrixType>
470template <
class MatrixType>
475template <
class MatrixType>
480template <
class MatrixType>
483 nonconst_global_inds_host_view_type& globalInd,
484 nonconst_values_host_view_type& val,
485 size_t& numEntries)
const {
486 throw std::runtime_error(
"Ifpack2::Details::AdditiveSchwarzFilter does not implement getGlobalRowCopy.");
489template <
class MatrixType>
492 nonconst_local_inds_host_view_type& Indices,
493 nonconst_values_host_view_type& Values,
494 size_t& NumEntries)
const
497 A_->getLocalRowCopy(LocalRow, Indices, Values, NumEntries);
500template <
class MatrixType>
502 global_inds_host_view_type& ,
503 values_host_view_type& )
const {
504 throw std::runtime_error(
"Ifpack2::AdditiveSchwarzFilter: does not support getGlobalRowView.");
507template <
class MatrixType>
509 local_inds_host_view_type& indices,
510 values_host_view_type& values)
const {
511 A_->getLocalRowView(LocalRow, indices, values);
514template <
class MatrixType>
516 getLocalDiagCopy(Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& diag)
const {
518 A_->getLocalDiagCopy(diag);
521template <
class MatrixType>
523 throw std::runtime_error(
"Ifpack2::AdditiveSchwarzFilter does not support leftScale.");
526template <
class MatrixType>
528 throw std::runtime_error(
"Ifpack2::AdditiveSchwarzFilter does not support rightScale.");
531template <
class MatrixType>
533 apply(
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
534 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
535 Teuchos::ETransp mode,
537 scalar_type beta)
const {
538 A_->apply(X, Y, mode, alpha, beta);
541template <
class MatrixType>
544 const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& OverlappingB,
545 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& OverlappingY,
546 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& ReducedReorderedB)
const {
549 TEUCHOS_TEST_FOR_EXCEPTION(!OverlappingB.isConstantStride() || !OverlappingY.isConstantStride() || !ReducedReorderedB.isConstantStride(),
550 std::logic_error,
"Ifpack2::AdditiveSchwarzFilter::CreateReducedProblem ERROR: One of the input MultiVectors is not constant stride.");
551 local_ordinal_type numVecs = OverlappingB.getNumVectors();
552 auto b = OverlappingB.getLocalViewDevice(Tpetra::Access::ReadOnly);
553 auto reducedB = ReducedReorderedB.getLocalViewDevice(Tpetra::Access::OverwriteAll);
554 auto singletons = singletons_.view_device();
555 auto singletonDiags = singletonDiagonals_.view_device();
556 auto revperm = reverseperm_.view_device();
559 auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
560 Kokkos::parallel_for(
561 policy_2d_type({0, 0}, {(local_ordinal_type)numSingletons_, numVecs}),
562 KOKKOS_LAMBDA(local_ordinal_type singletonIndex, local_ordinal_type col) {
563 local_ordinal_type row = singletons(singletonIndex);
564 y(row, col) = b(row, col) / singletonDiags(singletonIndex);
568 Kokkos::parallel_for(
569 policy_2d_type({0, 0}, {(local_ordinal_type)reducedB.extent(0), numVecs}),
570 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col) {
571 reducedB(row, col) = b(revperm(row), col);
576 if (numSingletons_) {
577 mv_type singletonUpdates(A_unfiltered_->getRowMap(), numVecs,
false);
578 A_unfiltered_->apply(OverlappingY, singletonUpdates);
579 auto updatesView = singletonUpdates.getLocalViewDevice(Tpetra::Access::ReadOnly);
581 Kokkos::parallel_for(
582 policy_2d_type({0, 0}, {(local_ordinal_type)reducedB.extent(0), numVecs}),
583 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col) {
584 local_ordinal_type origRow = revperm(row);
585 reducedB(row, col) -= updatesView(origRow, col);
590template <
class MatrixType>
592 const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& ReducedReorderedY,
593 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& OverlappingY)
const {
596 TEUCHOS_TEST_FOR_EXCEPTION(!ReducedReorderedY.isConstantStride() || !OverlappingY.isConstantStride(),
597 std::logic_error,
"Ifpack2::AdditiveSchwarzFilter::UpdateLHS ERROR: One of the input MultiVectors is not constant stride.");
598 auto reducedY = ReducedReorderedY.getLocalViewDevice(Tpetra::Access::ReadOnly);
599 auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
600 auto revperm = reverseperm_.view_device();
601 Kokkos::parallel_for(
602 policy_2d_type({0, 0}, {(local_ordinal_type)reducedY.extent(0), (local_ordinal_type)reducedY.extent(1)}),
603 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col) {
604 y(revperm(row), col) = reducedY(row, col);
608template <
class MatrixType>
613template <
class MatrixType>
618template <
class MatrixType>
621 return A_->getFrobeniusNorm();
624template <
class MatrixType>
627 const map_type& rangeMap = *(A.getRangeMap());
628 const map_type& rowMap = *(A.getRowMap());
629 const bool rangeAndRowFitted = mapPairIsFitted(rowMap, rangeMap);
631 const map_type& domainMap = *(A.getDomainMap());
632 const map_type& columnMap = *(A.getColMap());
633 const bool domainAndColumnFitted = mapPairIsFitted(columnMap, domainMap);
640 int localSuccess = rangeAndRowFitted && domainAndColumnFitted;
643 Teuchos::reduceAll<int, int>(*(A.getComm()), Teuchos::REDUCE_MIN, localSuccess, Teuchos::outArg(globalSuccess));
645 return globalSuccess == 1;
648template <
class MatrixType>
651 return map1.isLocallyFitted(map2);
657#define IFPACK2_DETAILS_ADDITIVESCHWARZFILTER_INSTANT(S, LO, GO, N) \
658 template class Ifpack2::Details::AdditiveSchwarzFilter<Tpetra::RowMatrix<S, LO, GO, N> >;
Wraps a Tpetra::CrsMatrix or Ifpack2::OverlappingRowMatrix in a filter that removes off-process edges...
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40