Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Details_AdditiveSchwarzFilter_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
11#define IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
12
13#include "Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp"
14#include "KokkosKernels_Sorting.hpp"
15#include "KokkosSparse_SortCrs.hpp"
16#include "Kokkos_Bitset.hpp"
17
18namespace Ifpack2 {
19namespace Details {
20
21template <class MatrixType>
23 AdditiveSchwarzFilter(const Teuchos::RCP<const row_matrix_type>& A_unfiltered,
24 const Teuchos::ArrayRCP<local_ordinal_type>& perm,
25 const Teuchos::ArrayRCP<local_ordinal_type>& reverseperm,
26 bool filterSingletons) {
27 setup(A_unfiltered, perm, reverseperm, filterSingletons);
28}
29
30template <class MatrixType>
32 setup(const Teuchos::RCP<const row_matrix_type>& A_unfiltered,
33 const Teuchos::ArrayRCP<local_ordinal_type>& perm,
34 const Teuchos::ArrayRCP<local_ordinal_type>& reverseperm,
35 bool filterSingletons) {
36 using Teuchos::RCP;
37 using Teuchos::rcp;
38 using Teuchos::rcp_dynamic_cast;
39 // Check that A is an instance of allowed types, either:
40 // - Tpetra::CrsMatrix
41 // - Ifpack2::OverlappingRowMatrix (which always consists of two CrsMatrices, A_ and ExtMatrix_)
42 TEUCHOS_TEST_FOR_EXCEPTION(
43 !rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered) &&
44 !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered),
45 std::invalid_argument,
46 "Ifpack2::Details::AdditiveSchwarzFilter: The input matrix must be a Tpetra::CrsMatrix or an Ifpack2::OverlappingRowMatrix");
47 A_unfiltered_ = A_unfiltered;
48 local_matrix_type A_local, A_halo;
49 bool haveHalo = !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_).is_null();
50 if (haveHalo) {
51 auto A_overlapping = rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_);
52 A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
53 A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
54 } else {
55 auto A_crs = rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered_);
56 A_local = A_crs->getLocalMatrixDevice();
57 // leave A_halo empty in this case
58 }
59 // Check that perm and reversePerm are the same length and match the number of local rows in A
60 TEUCHOS_TEST_FOR_EXCEPTION(
61 perm.size() != reverseperm.size(),
62 std::invalid_argument,
63 "Ifpack2::Details::AdditiveSchwarzFilter: perm and reverseperm should be the same length");
64 TEUCHOS_TEST_FOR_EXCEPTION(
65 (size_t)perm.size() != (size_t)A_unfiltered_->getLocalNumRows(),
66 std::invalid_argument,
67 "Ifpack2::Details::AdditiveSchwarzFilter: length of perm and reverseperm should be the same as the number of local rows in unfiltered A");
68 // First, compute the permutation tables (that exclude singletons, if requested)
69 FilterSingletons_ = filterSingletons;
70 local_ordinal_type numLocalRows;
71 local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
72 if (FilterSingletons_) {
73 // Fill singletons and singletonDiagonals (allocate them to the upper bound at first, then shrink it to size)
74 singletons_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "singletons_"), totalLocalRows);
75 singletonDiagonals_ = Kokkos::DualView<impl_scalar_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "singletonDiagonals_"), totalLocalRows);
76 auto singletonsDevice = singletons_.view_device();
77 singletons_.modify_device();
78 auto singletonDiagonalsDevice = singletonDiagonals_.view_device();
79 singletonDiagonals_.modify_device();
80 local_ordinal_type numSingletons;
81 Kokkos::Bitset<device_type> isSingletonBitset(totalLocalRows);
82 Kokkos::parallel_scan(
83 policy_type(0, totalLocalRows),
84 KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type & lnumSingletons, bool finalPass) {
85 bool isSingleton = true;
86#if KOKKOS_VERSION >= 40799
87 impl_scalar_type singletonValue = KokkosKernels::ArithTraits<impl_scalar_type>::zero();
88#else
89 impl_scalar_type singletonValue = Kokkos::ArithTraits<impl_scalar_type>::zero();
90#endif
91 if (i < A_local.numRows()) {
92 // row i is in original local matrix
93 size_type rowBegin = A_local.graph.row_map(i);
94 size_type rowEnd = A_local.graph.row_map(i + 1);
95 for (size_type j = rowBegin; j < rowEnd; j++) {
96 local_ordinal_type entry = A_local.graph.entries(j);
97 if (entry < totalLocalRows && entry != i) {
98 isSingleton = false;
99 break;
100 }
101 if (finalPass && entry == i) {
102 // note: using a sum to compute the diagonal value, in case entries are not compressed.
103 singletonValue += A_local.values(j);
104 }
105 }
106 } else {
107 // row i is in halo
108 local_ordinal_type row = i - A_local.numRows();
109 size_type rowBegin = A_halo.graph.row_map(row);
110 size_type rowEnd = A_halo.graph.row_map(row + 1);
111 for (size_type j = rowBegin; j < rowEnd; j++) {
112 local_ordinal_type entry = A_halo.graph.entries(j);
113 if (entry < totalLocalRows && entry != i) {
114 isSingleton = false;
115 break;
116 }
117 if (finalPass && entry == i) {
118 singletonValue += A_halo.values(j);
119 }
120 }
121 }
122 if (isSingleton) {
123 if (finalPass) {
124 isSingletonBitset.set(i);
125 singletonsDevice(lnumSingletons) = i;
126 singletonDiagonalsDevice(lnumSingletons) = singletonValue;
127 }
128 lnumSingletons++;
129 }
130 },
131 numSingletons);
132 numSingletons_ = numSingletons;
133 // Each local row in A_unfiltered is either a singleton or a row in the filtered matrix.
134 numLocalRows = totalLocalRows - numSingletons_;
135 // Using the list of singletons, create the reduced permutations.
136 perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), totalLocalRows);
137 perm_.modify_device();
138 reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), numLocalRows);
139 reverseperm_.modify_device();
140 auto permView = perm_.view_device();
141 auto reversepermView = reverseperm_.view_device();
142 // Get the original inverse permutation on device
143 Kokkos::View<local_ordinal_type*, device_type> origpermView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "input reverse perm"), totalLocalRows);
144 Kokkos::View<local_ordinal_type*, Kokkos::HostSpace> origpermHost(reverseperm.get(), totalLocalRows);
145 Kokkos::deep_copy(execution_space(), origpermView, origpermHost);
146 // reverseperm is a list of local rows in A_unfiltered, so filter out the elements of reverseperm where isSingleton is true. Then regenerate the forward permutation.
147 Kokkos::parallel_scan(
148 policy_type(0, totalLocalRows),
149 KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type & lindex, bool finalPass) {
150 local_ordinal_type origRow = origpermView(i);
151 if (!isSingletonBitset.test(origRow)) {
152 if (finalPass) {
153 // mark the mapping in both directions between origRow and lindex (a row in filtered A)
154 reversepermView(lindex) = origRow;
155 permView(origRow) = lindex;
156 }
157 lindex++;
158 } else {
159 // origRow is a singleton, so mark a -1 in the new forward permutation to show that it has no corresponding row in filtered A.
160 if (finalPass)
161 permView(origRow) = local_ordinal_type(-1);
162 }
163 });
164 perm_.sync_host();
165 reverseperm_.sync_host();
166 } else {
167 // We will keep all the local rows, so use the permutation as is
168 perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), perm.size());
169 perm_.modify_host();
170 auto permHost = perm_.view_host();
171 for (size_t i = 0; i < (size_t)perm.size(); i++)
172 permHost(i) = perm[i];
173 perm_.sync_device();
174 reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "reverseperm_"), reverseperm.size());
175 reverseperm_.modify_host();
176 auto reversepermHost = reverseperm_.view_host();
177 for (size_t i = 0; i < (size_t)reverseperm.size(); i++)
178 reversepermHost(i) = reverseperm[i];
179 reverseperm_.sync_device();
180 numSingletons_ = 0;
181 numLocalRows = totalLocalRows;
182 }
183 auto permView = perm_.view_device();
184 auto reversepermView = reverseperm_.view_device();
185 // Fill the local matrix of A_ (filtered and permuted)
186 // First, construct the rowmap by counting the entries in each row
187 row_map_type localRowptrs(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered rowptrs"), numLocalRows + 1);
188 Kokkos::parallel_for(
189 policy_type(0, numLocalRows + 1),
190 KOKKOS_LAMBDA(local_ordinal_type i) {
191 if (i == numLocalRows) {
192 localRowptrs(i) = 0;
193 return;
194 }
195 // Count the entries that will be in filtered row i.
196 // This means entries which correspond to local, non-singleton rows/columns.
197 local_ordinal_type numEntries = 0;
198 local_ordinal_type origRow = reversepermView(i);
199 if (origRow < A_local.numRows()) {
200 // row i is in original local matrix
201 size_type rowBegin = A_local.graph.row_map(origRow);
202 size_type rowEnd = A_local.graph.row_map(origRow + 1);
203 for (size_type j = rowBegin; j < rowEnd; j++) {
204 local_ordinal_type entry = A_local.graph.entries(j);
205 if (entry < totalLocalRows && permView(entry) != -1)
206 numEntries++;
207 }
208 } else {
209 // row i is in halo
210 local_ordinal_type row = origRow - A_local.numRows();
211 size_type rowBegin = A_halo.graph.row_map(row);
212 size_type rowEnd = A_halo.graph.row_map(row + 1);
213 for (size_type j = rowBegin; j < rowEnd; j++) {
214 local_ordinal_type entry = A_halo.graph.entries(j);
215 if (entry < totalLocalRows && permView(entry) != -1)
216 numEntries++;
217 }
218 }
219 localRowptrs(i) = numEntries;
220 });
221 // Prefix sum to finish computing the rowptrs
222 size_type numLocalEntries;
223 Kokkos::parallel_scan(
224 policy_type(0, numLocalRows + 1),
225 KOKKOS_LAMBDA(local_ordinal_type i, size_type & lnumEntries, bool finalPass) {
226 size_type numEnt = localRowptrs(i);
227 if (finalPass)
228 localRowptrs(i) = lnumEntries;
229 lnumEntries += numEnt;
230 },
231 numLocalEntries);
232 // Allocate and fill local entries and values
233 entries_type localEntries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered entries"), numLocalEntries);
234 values_type localValues(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered values"), numLocalEntries);
235 // Create the local matrix with the uninitialized entries/values, then fill it.
236 local_matrix_type localMatrix("AdditiveSchwarzFilter", numLocalRows, numLocalRows, numLocalEntries, localValues, localRowptrs, localEntries);
237 fillLocalMatrix(localMatrix);
238 // Create a serial comm and the map for the final filtered CrsMatrix (each process uses its own local map)
239#ifdef HAVE_IFPACK2_DEBUG
240 TEUCHOS_TEST_FOR_EXCEPTION(
241 !mapPairsAreFitted(*A_unfiltered_), std::invalid_argument,
242 "Ifpack2::LocalFilter: "
243 "A's Map pairs are not fitted to each other on Process "
244 << A_->getRowMap()->getComm()->getRank() << " of the input matrix's "
245 "communicator. "
246 "This means that LocalFilter does not currently know how to work with A. "
247 "This will change soon. Please see discussion of Bug 5992.");
248#endif // HAVE_IFPACK2_DEBUG
249
250 // Build the local communicator (containing this process only).
251 RCP<const Teuchos::Comm<int> > localComm;
252#ifdef HAVE_MPI
253 localComm = rcp(new Teuchos::MpiComm<int>(MPI_COMM_SELF));
254#else
255 localComm = rcp(new Teuchos::SerialComm<int>());
256#endif // HAVE_MPI
257 // All 4 maps are the same for a local square operator.
258 localMap_ = rcp(new map_type(numLocalRows, 0, localComm, Tpetra::GloballyDistributed));
259 // Create the inner filtered matrix.
260 auto crsParams = rcp(new Teuchos::ParameterList);
261 crsParams->template set<bool>("sorted", true);
262 // NOTE: this is the fastest way to construct A_ - it's created as fillComplete,
263 // and no communication needs to be done since localMap_ uses a local comm.
264 // It does need to copy the whole local matrix to host when DualViews are constructed
265 //(only an issue with non-UVM GPU backends) but this is just unavoidable when creating a Tpetra::CrsMatrix.
266 // It also needs to compute local constants (maxNumRowEntries) but this should be a
267 // cheap operation relative to what this constructor already did.
268 A_ = rcp(new crs_matrix_type(localMap_, localMap_, localMatrix, crsParams));
269}
270
271template <class MatrixType>
273
274template <class MatrixType>
276 // Get the local matrix back from A_
277 auto localMatrix = A_->getLocalMatrixDevice();
278 // Copy new values from A_unfiltered to the local matrix, and then reconstruct A_.
279 fillLocalMatrix(localMatrix);
280 A_->setAllValues(localMatrix.graph.row_map, localMatrix.graph.entries, localMatrix.values);
281}
282
283template <class MatrixType>
284Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::crs_matrix_type>
286 return A_;
287}
288
289template <class MatrixType>
291 auto localRowptrs = localMatrix.graph.row_map;
292 auto localEntries = localMatrix.graph.entries;
293 auto localValues = localMatrix.values;
294 auto permView = perm_.view_device();
295 auto reversepermView = reverseperm_.view_device();
296 local_matrix_type A_local, A_halo;
297 bool haveHalo = !Teuchos::rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_).is_null();
298 if (haveHalo) {
299 auto A_overlapping = Teuchos::rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_);
300 A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
301 A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
302 } else {
303 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered_);
304 A_local = A_crs->getLocalMatrixDevice();
305 // leave A_halo empty in this case
306 }
307 local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
308 // Fill entries and values
309 Kokkos::parallel_for(
310 policy_type(0, localMatrix.numRows()),
311 KOKKOS_LAMBDA(local_ordinal_type i) {
312 // Count the entries that will be in filtered row i.
313 // This means entries which correspond to local, non-singleton rows/columns.
314 size_type outRowStart = localRowptrs(i);
315 local_ordinal_type insertPos = 0;
316 local_ordinal_type origRow = reversepermView(i);
317 if (origRow < A_local.numRows()) {
318 // row i is in original local matrix
319 size_type rowBegin = A_local.graph.row_map(origRow);
320 size_type rowEnd = A_local.graph.row_map(origRow + 1);
321 for (size_type j = rowBegin; j < rowEnd; j++) {
322 local_ordinal_type entry = A_local.graph.entries(j);
323 if (entry < totalLocalRows) {
324 auto newCol = permView(entry);
325 if (newCol != -1) {
326 localEntries(outRowStart + insertPos) = newCol;
327 localValues(outRowStart + insertPos) = A_local.values(j);
328 insertPos++;
329 }
330 }
331 }
332 } else {
333 // row i is in halo
334 local_ordinal_type row = origRow - A_local.numRows();
335 size_type rowBegin = A_halo.graph.row_map(row);
336 size_type rowEnd = A_halo.graph.row_map(row + 1);
337 for (size_type j = rowBegin; j < rowEnd; j++) {
338 local_ordinal_type entry = A_halo.graph.entries(j);
339 if (entry < totalLocalRows) {
340 auto newCol = permView(entry);
341 if (newCol != -1) {
342 localEntries(outRowStart + insertPos) = newCol;
343 localValues(outRowStart + insertPos) = A_halo.values(j);
344 insertPos++;
345 }
346 }
347 }
348 }
349 });
350 // Sort the matrix, since the reordering can shuffle the entries into any order.
351 KokkosSparse::sort_crs_matrix(localMatrix);
352}
353
354template <class MatrixType>
355Teuchos::RCP<const Teuchos::Comm<int> > AdditiveSchwarzFilter<MatrixType>::getComm() const {
356 return localMap_->getComm();
357}
358
359template <class MatrixType>
360Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
362 return localMap_;
363}
364
365template <class MatrixType>
366Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
368 return localMap_;
369}
370
371template <class MatrixType>
372Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
374 return localMap_;
375}
376
377template <class MatrixType>
378Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
380 return localMap_;
381}
382
383template <class MatrixType>
384Teuchos::RCP<const Tpetra::RowGraph<typename MatrixType::local_ordinal_type,
385 typename MatrixType::global_ordinal_type,
386 typename MatrixType::node_type> >
388 // NOTE BMK 6-22: this is to maintain compatibilty with LocalFilter.
389 // Situations like overlapping AdditiveSchwarz + BlockRelaxation
390 // require the importer of the original distributed graph, even though the
391 // BlockRelaxation is preconditioning a local matrix (A_).
392 return A_unfiltered_->getGraph();
393}
394
395template <class MatrixType>
396typename MatrixType::local_ordinal_type AdditiveSchwarzFilter<MatrixType>::getBlockSize() const {
397 return A_->getBlockSize();
398}
399
400template <class MatrixType>
402 return A_->getGlobalNumRows();
403}
404
405template <class MatrixType>
407 return A_->getGlobalNumCols();
408}
409
410template <class MatrixType>
412 return A_->getLocalNumRows();
413}
414
415template <class MatrixType>
417 return A_->getLocalNumCols();
418}
419
420template <class MatrixType>
421typename MatrixType::global_ordinal_type AdditiveSchwarzFilter<MatrixType>::getIndexBase() const {
422 return A_->getIndexBase();
423}
424
425template <class MatrixType>
427 return getLocalNumEntries();
428}
429
430template <class MatrixType>
432 return A_->getLocalNumEntries();
433}
434
435template <class MatrixType>
437 getNumEntriesInGlobalRow(global_ordinal_type globalRow) const {
438 return A_->getNumEntriesInGlobalRow(globalRow);
439}
440
441template <class MatrixType>
443 getNumEntriesInLocalRow(local_ordinal_type localRow) const {
444 return A_->getNumEntriesInLocalRow(localRow);
445}
446
447template <class MatrixType>
449 // Use A_unfiltered_ to get a valid upper bound for this.
450 // This lets us support this function without computing global constants on A_.
451 return A_unfiltered_->getGlobalMaxNumRowEntries();
452}
453
454template <class MatrixType>
456 // Use A_unfiltered_ to get a valid upper bound for this
457 return A_unfiltered_->getLocalMaxNumRowEntries();
458}
459
460template <class MatrixType>
462 return true;
463}
464
465template <class MatrixType>
467 return true;
468}
469
470template <class MatrixType>
472 return false;
473}
474
475template <class MatrixType>
477 return true;
478}
479
480template <class MatrixType>
482 getGlobalRowCopy(global_ordinal_type globalRow,
483 nonconst_global_inds_host_view_type& globalInd,
484 nonconst_values_host_view_type& val,
485 size_t& numEntries) const {
486 throw std::runtime_error("Ifpack2::Details::AdditiveSchwarzFilter does not implement getGlobalRowCopy.");
487}
488
489template <class MatrixType>
491 getLocalRowCopy(local_ordinal_type LocalRow,
492 nonconst_local_inds_host_view_type& Indices,
493 nonconst_values_host_view_type& Values,
494 size_t& NumEntries) const
495
496{
497 A_->getLocalRowCopy(LocalRow, Indices, Values, NumEntries);
498}
499
500template <class MatrixType>
501void AdditiveSchwarzFilter<MatrixType>::getGlobalRowView(global_ordinal_type /* GlobalRow */,
502 global_inds_host_view_type& /*indices*/,
503 values_host_view_type& /*values*/) const {
504 throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter: does not support getGlobalRowView.");
505}
506
507template <class MatrixType>
508void AdditiveSchwarzFilter<MatrixType>::getLocalRowView(local_ordinal_type LocalRow,
509 local_inds_host_view_type& indices,
510 values_host_view_type& values) const {
511 A_->getLocalRowView(LocalRow, indices, values);
512}
513
514template <class MatrixType>
516 getLocalDiagCopy(Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& diag) const {
517 // This is somewhat dubious as to how the maps match.
518 A_->getLocalDiagCopy(diag);
519}
520
521template <class MatrixType>
522void AdditiveSchwarzFilter<MatrixType>::leftScale(const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */) {
523 throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter does not support leftScale.");
524}
525
526template <class MatrixType>
527void AdditiveSchwarzFilter<MatrixType>::rightScale(const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */) {
528 throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter does not support rightScale.");
529}
530
531template <class MatrixType>
533 apply(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
534 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
535 Teuchos::ETransp mode,
536 scalar_type alpha,
537 scalar_type beta) const {
538 A_->apply(X, Y, mode, alpha, beta);
539}
540
541template <class MatrixType>
544 const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& OverlappingB,
545 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& OverlappingY,
546 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& ReducedReorderedB) const {
547 // NOTE: the three vectors here are always constructed within AdditiveSchwarz and are not subviews,
548 // so they are all constant stride (this avoids a lot of boilerplate with whichVectors)
549 TEUCHOS_TEST_FOR_EXCEPTION(!OverlappingB.isConstantStride() || !OverlappingY.isConstantStride() || !ReducedReorderedB.isConstantStride(),
550 std::logic_error, "Ifpack2::AdditiveSchwarzFilter::CreateReducedProblem ERROR: One of the input MultiVectors is not constant stride.");
551 local_ordinal_type numVecs = OverlappingB.getNumVectors();
552 auto b = OverlappingB.getLocalViewDevice(Tpetra::Access::ReadOnly);
553 auto reducedB = ReducedReorderedB.getLocalViewDevice(Tpetra::Access::OverwriteAll);
554 auto singletons = singletons_.view_device();
555 auto singletonDiags = singletonDiagonals_.view_device();
556 auto revperm = reverseperm_.view_device();
557 // First, solve the singletons.
558 {
559 auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
560 Kokkos::parallel_for(
561 policy_2d_type({0, 0}, {(local_ordinal_type)numSingletons_, numVecs}),
562 KOKKOS_LAMBDA(local_ordinal_type singletonIndex, local_ordinal_type col) {
563 local_ordinal_type row = singletons(singletonIndex);
564 y(row, col) = b(row, col) / singletonDiags(singletonIndex);
565 });
566 }
567 // Next, permute OverlappingB to ReducedReorderedB.
568 Kokkos::parallel_for(
569 policy_2d_type({0, 0}, {(local_ordinal_type)reducedB.extent(0), numVecs}),
570 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col) {
571 reducedB(row, col) = b(revperm(row), col);
572 });
573 // Finally, if there are singletons, eliminate the matrix entries which are in singleton columns ("eliminate" here means update reducedB like in row reduction)
574 // This could be done more efficiently by storing a separate matrix of just the singleton columns and permuted non-singleton rows, but this adds a lot of complexity.
575 // Instead, just apply() the unfiltered matrix to OverlappingY (which is 0, except for singletons), and then permute the result of that and subtract it from reducedB.
576 if (numSingletons_) {
577 mv_type singletonUpdates(A_unfiltered_->getRowMap(), numVecs, false);
578 A_unfiltered_->apply(OverlappingY, singletonUpdates);
579 auto updatesView = singletonUpdates.getLocalViewDevice(Tpetra::Access::ReadOnly);
580 // auto revperm = reverseperm_.view_device();
581 Kokkos::parallel_for(
582 policy_2d_type({0, 0}, {(local_ordinal_type)reducedB.extent(0), numVecs}),
583 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col) {
584 local_ordinal_type origRow = revperm(row);
585 reducedB(row, col) -= updatesView(origRow, col);
586 });
587 }
588}
589
590template <class MatrixType>
592 const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& ReducedReorderedY,
593 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& OverlappingY) const {
594 // NOTE: both vectors here are always constructed within AdditiveSchwarz and are not subviews,
595 // so they are all constant stride (this avoids a lot of boilerplate with whichVectors)
596 TEUCHOS_TEST_FOR_EXCEPTION(!ReducedReorderedY.isConstantStride() || !OverlappingY.isConstantStride(),
597 std::logic_error, "Ifpack2::AdditiveSchwarzFilter::UpdateLHS ERROR: One of the input MultiVectors is not constant stride.");
598 auto reducedY = ReducedReorderedY.getLocalViewDevice(Tpetra::Access::ReadOnly);
599 auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
600 auto revperm = reverseperm_.view_device();
601 Kokkos::parallel_for(
602 policy_2d_type({0, 0}, {(local_ordinal_type)reducedY.extent(0), (local_ordinal_type)reducedY.extent(1)}),
603 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col) {
604 y(revperm(row), col) = reducedY(row, col);
605 });
606}
607
608template <class MatrixType>
610 return true;
611}
612
613template <class MatrixType>
615 return true;
616}
617
618template <class MatrixType>
620 // Reordering doesn't change the Frobenius norm.
621 return A_->getFrobeniusNorm();
622}
623
624template <class MatrixType>
626 mapPairsAreFitted(const row_matrix_type& A) {
627 const map_type& rangeMap = *(A.getRangeMap());
628 const map_type& rowMap = *(A.getRowMap());
629 const bool rangeAndRowFitted = mapPairIsFitted(rowMap, rangeMap);
630
631 const map_type& domainMap = *(A.getDomainMap());
632 const map_type& columnMap = *(A.getColMap());
633 const bool domainAndColumnFitted = mapPairIsFitted(columnMap, domainMap);
634
635 // Note BMK 6-22: Map::isLocallyFitted is a local-only operation, not a collective.
636 // This means that it can return different values on different ranks. This can cause MPI to hang,
637 // even though it's supposed to terminate globally when any single rank does.
638 //
639 // This function doesn't need to be fast since it's debug-only code.
640 int localSuccess = rangeAndRowFitted && domainAndColumnFitted;
641 int globalSuccess;
642
643 Teuchos::reduceAll<int, int>(*(A.getComm()), Teuchos::REDUCE_MIN, localSuccess, Teuchos::outArg(globalSuccess));
644
645 return globalSuccess == 1;
646}
647
648template <class MatrixType>
650 mapPairIsFitted(const map_type& map1, const map_type& map2) {
651 return map1.isLocallyFitted(map2);
652}
653
654} // namespace Details
655} // namespace Ifpack2
656
657#define IFPACK2_DETAILS_ADDITIVESCHWARZFILTER_INSTANT(S, LO, GO, N) \
658 template class Ifpack2::Details::AdditiveSchwarzFilter<Tpetra::RowMatrix<S, LO, GO, N> >;
659
660#endif
Wraps a Tpetra::CrsMatrix or Ifpack2::OverlappingRowMatrix in a filter that removes off-process edges...
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40