MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_UtilitiesBase_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_UTILITIESBASE_DEF_HPP
11#define MUELU_UTILITIESBASE_DEF_HPP
12
13#include "KokkosKernels_ArithTraits.hpp"
14#include "MueLu_ConfigDefs.hpp"
15
17
18#include "MueLu_PerfUtils.hpp"
19#include "MueLu_Monitor.hpp"
20#include <Xpetra_MatrixMatrix.hpp>
21#include <Xpetra_MatrixFactory.hpp>
22#include <Xpetra_MatrixUtils.hpp>
23#include <Xpetra_TripleMatrixMultiply.hpp>
24
25#include <Kokkos_Core.hpp>
26#include <KokkosSparse_CrsMatrix.hpp>
27#include <KokkosSparse_getDiagCopy.hpp>
28
29#include <Xpetra_BlockedVector.hpp>
30#include <Xpetra_BlockedMap.hpp>
31#include <Xpetra_BlockedMultiVector.hpp>
32#include <Xpetra_ExportFactory.hpp>
33
34#include <Xpetra_Import.hpp>
35#include <Xpetra_ImportFactory.hpp>
36#include <Xpetra_MatrixMatrix.hpp>
37#include <Xpetra_CrsGraph.hpp>
38#include <Xpetra_CrsGraphFactory.hpp>
39#include <Xpetra_CrsMatrixWrap.hpp>
40#include <Xpetra_MatrixFactory.hpp>
41#include <Xpetra_StridedMap.hpp>
42
43#include "MueLu_Exceptions.hpp"
44#include "MueLu_Behavior.hpp"
45#include "Xpetra_CrsMatrixFactory.hpp"
46
47#include <KokkosKernels_Handle.hpp>
48#include <KokkosGraph_RCM.hpp>
49#include "MueLu_Behavior.hpp"
50
51namespace MueLu {
52
53template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
54RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
56 Crs2Op(RCP<Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op) {
57 if (Op.is_null())
58 return Teuchos::null;
59 return rcp(new CrsMatrixWrap(Op));
60}
61
62template <class Scalar,
63 class LocalOrdinal,
64 class GlobalOrdinal,
65 class Node>
66Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
67removeSmallEntries(Teuchos::RCP<Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
68 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType threshold,
69 const bool keepDiagonal) {
70 using ATS = KokkosKernels::ArithTraits<Scalar>;
71 using impl_SC = typename ATS::val_type;
72 using impl_ATS = KokkosKernels::ArithTraits<impl_SC>;
73
74 RCP<Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> crsA;
75 if (keepDiagonal) {
76 crsA = applyFilter_GID(
77 A, KOKKOS_LAMBDA(GlobalOrdinal rgid,
78 GlobalOrdinal cgid,
79 impl_SC val) {
80 return ((impl_ATS::magnitude(val) > threshold) || (rgid == cgid));
81 });
82
83 } else {
84 crsA = applyFilter_vals(
85 A, KOKKOS_LAMBDA(impl_SC val) {
86 return (impl_ATS::magnitude(val) > threshold);
87 });
88 }
89 return rcp(new Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>(crsA));
90}
92template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
93RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
95 GetThresholdedMatrix(const RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& Ain, const typename Teuchos::ScalarTraits<Scalar>::magnitudeType threshold, const bool keepDiagonal) {
96 RCP<Matrix> Aout;
97 {
98 using matrix_type = Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
99 using local_matrix_type = typename matrix_type::local_matrix_type;
100 using local_graph_type = typename matrix_type::local_graph_type;
101 using execution_space = typename Node::execution_space;
102 using rowmap_type = typename local_graph_type::row_map_type::non_const_type;
103 using entries_type = typename local_graph_type::entries_type::non_const_type;
104 using values_type = typename local_matrix_type::values_type::non_const_type;
105 using range_type = Kokkos::RangePolicy<execution_space, LocalOrdinal>;
106 using implATS = KokkosKernels::ArithTraits<typename matrix_type::impl_scalar_type>;
107 auto lclA = Ain->getLocalMatrixDevice();
108 auto lclRowmap = Ain->getRowMap()->getLocalMap();
109 auto lclColmap = Ain->getColMap()->getLocalMap();
110
111 local_matrix_type thresholdedLocalMatrix;
112 if (keepDiagonal) {
113 rowmap_type rowptr("MueLu::GetThresholdedMatrix::rowptr", lclA.numRows() + 1);
114 LocalOrdinal nnz = 0;
115 Kokkos::parallel_scan(
116 range_type(0, lclA.numRows()), KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& my_nnz, const bool is_final) {
117 auto row = lclA.rowConst(rlid);
118 auto rclid = lclColmap.getLocalElement(lclRowmap.getGlobalElement(rlid));
119
120 for (LocalOrdinal offset = 0; offset < row.length; ++offset) {
121 auto clid = row.colidx(offset);
122 auto val = row.value(offset);
123 if ((rclid == clid) || implATS::magnitude(val) > threshold) {
124 ++my_nnz;
125 if (is_final && (rlid + 1 < lclA.numRows())) {
126 rowptr(rlid + 2) = my_nnz;
127 }
128 }
129 }
130 },
131 nnz);
132
133 entries_type entries(Kokkos::ViewAllocateWithoutInitializing("MueLu::GetThresholdedGraph::indices"), nnz);
134 values_type values(Kokkos::ViewAllocateWithoutInitializing("MueLu::GetThresholdedGraph::values"), nnz);
135 Kokkos::parallel_for(
136 range_type(0, lclA.numRows()), KOKKOS_LAMBDA(const LocalOrdinal rlid) {
137 auto row = lclA.rowConst(rlid);
138 auto rclid = lclColmap.getLocalElement(lclRowmap.getGlobalElement(rlid));
139
140 for (LocalOrdinal offset = 0; offset < row.length; ++offset) {
141 auto clid = row.colidx(offset);
142 auto val = row.value(offset);
143 if ((rclid == clid) || implATS::magnitude(val) > threshold) {
144 entries(rowptr(rlid + 1)) = clid;
145 values(rowptr(rlid + 1)) = val;
146 ++rowptr(rlid + 1);
148 }
149 });
150
151 thresholdedLocalMatrix = local_matrix_type("thresholdedLocalMatrix", lclA.numRows(), lclA.numCols(), nnz, values, rowptr, entries);
152 } else {
153 rowmap_type rowptr("MueLu::GetThresholdedMatrix::rowptr", lclA.numRows() + 1);
154 LocalOrdinal nnz = 0;
155 Kokkos::parallel_scan(
156 range_type(0, lclA.numRows()), KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& my_nnz, const bool is_final) {
157 auto row = lclA.rowConst(rlid);
158
159 for (LocalOrdinal offset = 0; offset < row.length; ++offset) {
160 auto val = row.value(offset);
161 if (implATS::magnitude(val) > threshold) {
162 ++my_nnz;
163 if (is_final && (rlid + 1 < lclA.numRows())) {
164 rowptr(rlid + 2) = my_nnz;
165 }
167 }
168 },
169 nnz);
170
171 entries_type entries(Kokkos::ViewAllocateWithoutInitializing("MueLu::GetThresholdedGraph::indices"), nnz);
172 values_type values(Kokkos::ViewAllocateWithoutInitializing("MueLu::GetThresholdedGraph::values"), nnz);
173 Kokkos::parallel_for(
174 range_type(0, lclA.numRows()), KOKKOS_LAMBDA(const LocalOrdinal rlid) {
175 auto row = lclA.rowConst(rlid);
176
177 for (LocalOrdinal offset = 0; offset < row.length; ++offset) {
178 auto clid = row.colidx(offset);
179 auto val = row.value(offset);
180 if (implATS::magnitude(val) > threshold) {
181 entries(rowptr(rlid + 1)) = clid;
182 values(rowptr(rlid + 1)) = val;
183 ++rowptr(rlid + 1);
185 }
186 });
187
188 thresholdedLocalMatrix = local_matrix_type("thresholdedLocalMatrix", lclA.numRows(), lclA.numCols(), nnz, values, rowptr, entries);
189 }
191 Aout = MatrixFactory::Build(thresholdedLocalMatrix, Ain->getRowMap(), Ain->getColMap(), Ain->getDomainMap(), Ain->getRangeMap());
192 }
193
194 return Aout;
195}
196
197template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
198RCP<Xpetra::CrsGraph<LocalOrdinal, GlobalOrdinal, Node>>
200 GetThresholdedGraph(const RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A, const Magnitude threshold) {
201 RCP<CrsGraph> sparsityPattern;
202 {
203 using matrix_type = Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
204 using graph_type = Xpetra::CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
205 using local_graph_type = typename graph_type::local_graph_device_type;
206 using execution_space = typename Node::execution_space;
207 using rowmap_type = typename local_graph_type::row_map_type::non_const_type;
208 using entries_type = typename local_graph_type::entries_type::non_const_type;
209 using range_type = Kokkos::RangePolicy<execution_space, LocalOrdinal>;
210 using implATS = KokkosKernels::ArithTraits<typename matrix_type::impl_scalar_type>;
211 using magATS = KokkosKernels::ArithTraits<typename implATS::magnitudeType>;
212 auto lclA = A->getLocalMatrixDevice();
213 auto lclRowmap = A->getRowMap()->getLocalMap();
214 auto lclColmap = A->getColMap()->getLocalMap();
215
216 rowmap_type rowptr("MueLu::GetThresholdedGraph::rowptr", lclA.numRows() + 1);
217
218 LocalOrdinal nnz = 0;
219 Kokkos::parallel_scan(
220 range_type(0, lclA.numRows()), KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& my_nnz, const bool is_final) {
221 auto row = lclA.rowConst(rlid);
222 auto rclid = lclColmap.getLocalElement(lclRowmap.getGlobalElement(rlid));
223
224 typename implATS::magnitudeType d = magATS::one();
225 for (LocalOrdinal offset = 0; offset < row.length; ++offset) {
226 auto clid = row.colidx(offset);
227 if (rclid == clid) {
228 auto val = implATS::magnitude(row.value(offset));
229 if (val > implATS::epsilon())
230 d = val;
231 }
232 }
234 for (LocalOrdinal offset = 0; offset < row.length; ++offset) {
235 auto clid = row.colidx(offset);
236 auto val = row.value(offset);
237 if ((rclid == clid) || implATS::magnitude(val) > d * threshold) {
238 ++my_nnz;
239 if (is_final && (rlid + 1 < lclA.numRows())) {
240 rowptr(rlid + 2) = my_nnz;
241 }
242 }
243 }
244 },
245 nnz);
246
247 entries_type entries(Kokkos::ViewAllocateWithoutInitializing("MueLu::GetThresholdedGraph::indices"), nnz);
248 Kokkos::parallel_for(
249 range_type(0, lclA.numRows()), KOKKOS_LAMBDA(const LocalOrdinal rlid) {
250 auto row = lclA.rowConst(rlid);
251 auto rclid = lclColmap.getLocalElement(lclRowmap.getGlobalElement(rlid));
252
253 typename implATS::magnitudeType d = magATS::one();
254 for (LocalOrdinal offset = 0; offset < row.length; ++offset) {
255 auto clid = row.colidx(offset);
256 if (rclid == clid) {
257 auto val = implATS::magnitude(row.value(offset));
258 if (val > implATS::epsilon())
259 d = val;
260 }
261 }
262
263 for (LocalOrdinal offset = 0; offset < row.length; ++offset) {
264 auto clid = row.colidx(offset);
265 auto val = row.value(offset);
266 if ((rclid == clid) || implATS::magnitude(val) > d * threshold) {
267 entries(rowptr(rlid + 1)) = clid;
268 ++rowptr(rlid + 1);
269 }
270 }
271 });
272
273 sparsityPattern = CrsGraphFactory::Build(A->getRowMap(), A->getColMap(), rowptr, entries);
274 sparsityPattern->fillComplete(A->getDomainMap(), A->getRangeMap());
275 }
276
277 return sparsityPattern;
279
280template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
281Teuchos::ArrayRCP<Scalar>
283 GetMatrixDiagonal_arcp(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A) {
284 size_t numRows = A.getRowMap()->getLocalNumElements();
285 Teuchos::ArrayRCP<Scalar> diag(numRows);
286 Teuchos::ArrayView<const LocalOrdinal> cols;
287 Teuchos::ArrayView<const Scalar> vals;
288 for (size_t i = 0; i < numRows; ++i) {
289 A.getLocalRowView(i, cols, vals);
290 LocalOrdinal j = 0;
291 for (; j < cols.size(); ++j) {
292 if (Teuchos::as<size_t>(cols[j]) == i) {
293 diag[i] = vals[j];
294 break;
295 }
297 if (j == cols.size()) {
298 // Diagonal entry is absent
299 diag[i] = Teuchos::ScalarTraits<Scalar>::zero();
300 }
301 }
302 return diag;
303}
304
305template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
306Teuchos::RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
308 GetMatrixDiagonal(const Matrix& A) {
309 const auto rowMap = A.getRowMap();
310 auto diag = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(rowMap, false);
311
312 const CrsMatrixWrap* crsOp = dynamic_cast<const CrsMatrixWrap*>(&A);
313 if ((crsOp != NULL) && (rowMap->lib() == Xpetra::UseTpetra)) {
314 using device_type = typename CrsGraph::device_type;
315 Kokkos::View<size_t*, device_type> offsets("offsets", rowMap->getLocalNumElements());
316 crsOp->getCrsGraph()->getLocalDiagOffsets(offsets);
317 crsOp->getCrsMatrix()->getLocalDiagCopy(*diag, offsets);
318 } else {
319 A.getLocalDiagCopy(*diag);
320 }
321
322 return diag;
323}
324
325// template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
326// RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
327// UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
328// GetMatrixDiagonalInverse(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> & A, Magnitude tol, Scalar valReplacement) {
329// Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase::GetMatrixDiagonalInverse");
330
331// RCP<const Map> rowMap = A.getRowMap();
332// RCP<Vector> diag = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap,true);
333
334// A.getLocalDiagCopy(*diag);
335
336// RCP<Vector> inv = MueLu::UtilitiesBase<Scalar,LocalOrdinal,GlobalOrdinal,Node>::GetInverse(diag, tol, valReplacement);
337
338// return inv;
339// }
340
341template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
342Teuchos::RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
344 GetMatrixDiagonalInverse(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
345 typename Teuchos::ScalarTraits<Scalar>::magnitudeType tol,
346 Scalar valReplacement,
347 const bool doLumped) {
348 Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("Utilities::GetMatrixDiagonalInverse");
349
350 RCP<const BlockedCrsMatrix> bA = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpFromRef(A));
351 if (!bA.is_null()) {
352 RCP<const Map> rowMap = A.getRowMap();
353 RCP<Vector> diag = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(rowMap, false);
354 A.getLocalDiagCopy(*diag);
355 RCP<Vector> inv = MueLu::UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::GetInverse(diag, tol, valReplacement);
356 return inv;
357 }
358
359 // Some useful type definitions
360 using local_matrix_type = typename Matrix::local_matrix_device_type;
361 // using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
362 using value_type = typename local_matrix_type::value_type;
363 using values_type = typename local_matrix_type::values_type;
364 using scalar_type = typename values_type::non_const_value_type;
365 using ordinal_type = typename local_matrix_type::ordinal_type;
366 using execution_space = typename local_matrix_type::execution_space;
367 // using memory_space = typename local_matrix_type::memory_space;
368 // Be careful with this one, if using KokkosKernels::ArithTraits<Scalar>
369 // you are likely to run into errors when handling std::complex<>
370 // a good way to work around that is to use the following:
371 // using KAT = KokkosKernels::ArithTraits<KokkosKernels::ArithTraits<Scalar>::val_type> >
372 // here we have: value_type = KokkosKernels::ArithTraits<Scalar>::val_type
373 using KAT = KokkosKernels::ArithTraits<value_type>;
374
375 // Get/Create distributed objects
376 RCP<const Map> rowMap = A.getRowMap();
377 RCP<Vector> diag = VectorFactory::Build(rowMap, false);
378
379 // Now generate local objects
380 local_matrix_type localMatrix = A.getLocalMatrixDevice();
381 auto diagVals = diag->getLocalViewDevice(Tpetra::Access::ReadWrite);
382
383 ordinal_type numRows = localMatrix.graph.numRows();
384
385 scalar_type valReplacement_dev = valReplacement;
387 // Note: 2019-11-21, LBV
388 // This could be implemented with a TeamPolicy over the rows
389 // and a TeamVectorRange over the entries in a row if performance
390 // becomes more important here.
391 if (!doLumped)
392 Kokkos::parallel_for(
393 "Utilities::GetMatrixDiagonalInverse",
394 Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
395 KOKKOS_LAMBDA(const ordinal_type rowIdx) {
396 bool foundDiagEntry = false;
397 auto myRow = localMatrix.rowConst(rowIdx);
398 for (ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
399 if (myRow.colidx(entryIdx) == rowIdx) {
400 foundDiagEntry = true;
401 if (KAT::magnitude(myRow.value(entryIdx)) > KAT::magnitude(tol)) {
402 diagVals(rowIdx, 0) = KAT::one() / myRow.value(entryIdx);
403 } else {
404 diagVals(rowIdx, 0) = valReplacement_dev;
405 }
406 break;
408 }
409
410 if (!foundDiagEntry) {
411 diagVals(rowIdx, 0) = KAT::zero();
413 });
414 else
415 Kokkos::parallel_for(
416 "Utilities::GetMatrixDiagonalInverse",
417 Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
418 KOKKOS_LAMBDA(const ordinal_type rowIdx) {
419 auto myRow = localMatrix.rowConst(rowIdx);
420 for (ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
421 diagVals(rowIdx, 0) += KAT::magnitude(myRow.value(entryIdx));
422 }
423 if (KAT::magnitude(diagVals(rowIdx, 0)) > KAT::magnitude(tol))
424 diagVals(rowIdx, 0) = KAT::one() / diagVals(rowIdx, 0);
425 else
426 diagVals(rowIdx, 0) = valReplacement_dev;
427 });
428
429 return diag;
430}
432template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
433Teuchos::RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
435 GetLumpedMatrixDiagonal(Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> const& A, const bool doReciprocal,
436 Magnitude tol,
437 Scalar valReplacement,
438 const bool replaceSingleEntryRowWithZero,
439 const bool useAverageAbsDiagVal) {
440 typedef Teuchos::ScalarTraits<Scalar> TST;
442 RCP<Vector> diag = Teuchos::null;
443 const Scalar zero = TST::zero();
444 const Scalar one = TST::one();
445 const Scalar two = one + one;
446
447 Teuchos::RCP<const Matrix> rcpA = Teuchos::rcpFromRef(A);
449 RCP<const Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> bA =
450 Teuchos::rcp_dynamic_cast<const Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(rcpA);
451 if (bA == Teuchos::null) {
452 RCP<const Map> rowMap = rcpA->getRowMap();
453 diag = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(rowMap, false);
455 if (rowMap->lib() == Xpetra::UnderlyingLib::UseTpetra) {
456 Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase::GetLumpedMatrixDiagonal (Kokkos implementation)");
457 // Implement using Kokkos
458 using local_vector_type = typename Vector::dual_view_type::t_dev_um;
459 using local_matrix_type = typename Matrix::local_matrix_device_type;
460 using execution_space = typename local_vector_type::execution_space;
461 // using rowmap_type = typename local_matrix_type::row_map_type;
462 // using entries_type = typename local_matrix_type::index_type;
463 using values_type = typename local_matrix_type::values_type;
464 using scalar_type = typename values_type::non_const_value_type;
465 using mag_type = typename KokkosKernels::ArithTraits<scalar_type>::mag_type;
466 using KAT_S = typename KokkosKernels::ArithTraits<scalar_type>;
467 using KAT_M = typename KokkosKernels::ArithTraits<mag_type>;
468 using size_type = typename local_matrix_type::non_const_size_type;
470 local_vector_type diag_dev = diag->getLocalViewDevice(Tpetra::Access::OverwriteAll);
471 local_matrix_type local_mat_dev = rcpA->getLocalMatrixDevice();
472 Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
473 scalar_type valReplacement_dev = valReplacement;
474
475 if (doReciprocal) {
476 Kokkos::View<int*, execution_space> nnzPerRow("nnz per rows", diag_dev.extent(0));
477 Kokkos::View<scalar_type*, execution_space> regSum("regSum", diag_dev.extent(0));
478 Kokkos::View<mag_type, execution_space> avgAbsDiagVal_dev("avgAbsDiagVal");
479 Kokkos::View<int, execution_space> numDiagsEqualToOne_dev("numDiagsEqualToOne");
480
481 {
482 Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: parallel_for (doReciprocal)");
483 Kokkos::parallel_for(
484 "GetLumpedMatrixDiagonal", my_policy,
485 KOKKOS_LAMBDA(const int rowIdx) {
486 diag_dev(rowIdx, 0) = KAT_S::zero();
487 for (size_type entryIdx = local_mat_dev.graph.row_map(rowIdx);
488 entryIdx < local_mat_dev.graph.row_map(rowIdx + 1);
489 ++entryIdx) {
490 regSum(rowIdx) += local_mat_dev.values(entryIdx);
491 if (KAT_M::zero() < KAT_S::abs(local_mat_dev.values(entryIdx))) {
492 ++nnzPerRow(rowIdx);
493 }
494 diag_dev(rowIdx, 0) += KAT_S::abs(local_mat_dev.values(entryIdx));
495 if (rowIdx == local_mat_dev.graph.entries(entryIdx)) {
496 Kokkos::atomic_add(&avgAbsDiagVal_dev(), KAT_S::abs(local_mat_dev.values(entryIdx)));
497 }
498 }
499
500 if (nnzPerRow(rowIdx) == 1 && KAT_S::magnitude(diag_dev(rowIdx, 0)) == KAT_M::one()) {
501 Kokkos::atomic_add(&numDiagsEqualToOne_dev(), 1);
502 }
503 });
504 }
505 if (useAverageAbsDiagVal) {
506 Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: useAverageAbsDiagVal");
507 typename Kokkos::View<mag_type, execution_space>::host_mirror_type avgAbsDiagVal = Kokkos::create_mirror_view(avgAbsDiagVal_dev);
508 Kokkos::deep_copy(avgAbsDiagVal, avgAbsDiagVal_dev);
509 int numDiagsEqualToOne;
510 Kokkos::deep_copy(numDiagsEqualToOne, numDiagsEqualToOne_dev);
511
512 tol = TST::magnitude(100 * Teuchos::ScalarTraits<Scalar>::eps()) * (avgAbsDiagVal() - numDiagsEqualToOne) / (rowMap->getLocalNumElements() - numDiagsEqualToOne);
513 }
514
515 {
516 Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("ComputeLumpedDiagonalInverse: parallel_for (doReciprocal)");
517 Kokkos::parallel_for(
518 "ComputeLumpedDiagonalInverse", my_policy,
519 KOKKOS_LAMBDA(const int rowIdx) {
520 if (replaceSingleEntryRowWithZero && nnzPerRow(rowIdx) <= 1) {
521 diag_dev(rowIdx, 0) = KAT_S::zero();
522 } else if ((diag_dev(rowIdx, 0) != KAT_S::zero()) && (KAT_S::magnitude(diag_dev(rowIdx, 0)) < KAT_S::magnitude(2 * regSum(rowIdx)))) {
523 diag_dev(rowIdx, 0) = KAT_S::one() / KAT_S::magnitude(2 * regSum(rowIdx));
524 } else {
525 if (KAT_S::magnitude(diag_dev(rowIdx, 0)) > tol) {
526 diag_dev(rowIdx, 0) = KAT_S::one() / diag_dev(rowIdx, 0);
527 } else {
528 diag_dev(rowIdx, 0) = valReplacement_dev;
529 }
530 }
531 });
532 }
533
534 } else {
535 Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: parallel_for");
536 Kokkos::parallel_for(
537 "GetLumpedMatrixDiagonal", my_policy,
538 KOKKOS_LAMBDA(const int rowIdx) {
539 diag_dev(rowIdx, 0) = KAT_S::zero();
540 for (size_type entryIdx = local_mat_dev.graph.row_map(rowIdx);
541 entryIdx < local_mat_dev.graph.row_map(rowIdx + 1);
542 ++entryIdx) {
543 diag_dev(rowIdx, 0) += KAT_S::magnitude(local_mat_dev.values(entryIdx));
544 }
545 });
546 }
547 } else {
548 // Implement using Teuchos
549 Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase: GetLumpedMatrixDiagonal: (Teuchos implementation)");
550 ArrayRCP<Scalar> diagVals = diag->getDataNonConst(0);
551 Teuchos::Array<Scalar> regSum(diag->getLocalLength());
552 Teuchos::ArrayView<const LocalOrdinal> cols;
553 Teuchos::ArrayView<const Scalar> vals;
554
555 std::vector<int> nnzPerRow(rowMap->getLocalNumElements());
556
557 // FIXME 2021-10-22 JHU If this is called with doReciprocal=false, what should the correct behavior be? Currently,
558 // FIXME 2021-10-22 JHU the diagonal entry is set to be the sum of the absolute values of the row entries.
559
560 const Magnitude zeroMagn = TST::magnitude(zero);
561 Magnitude avgAbsDiagVal = TST::magnitude(zero);
562 int numDiagsEqualToOne = 0;
563 for (size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
564 nnzPerRow[i] = 0;
565 rcpA->getLocalRowView(i, cols, vals);
566 diagVals[i] = zero;
567 for (LocalOrdinal j = 0; j < cols.size(); ++j) {
568 regSum[i] += vals[j];
569 const Magnitude rowEntryMagn = TST::magnitude(vals[j]);
570 if (rowEntryMagn > zeroMagn)
571 nnzPerRow[i]++;
572 diagVals[i] += rowEntryMagn;
573 if (static_cast<size_t>(cols[j]) == i)
574 avgAbsDiagVal += rowEntryMagn;
575 }
576 if (nnzPerRow[i] == 1 && TST::magnitude(diagVals[i]) == 1.)
577 numDiagsEqualToOne++;
578 }
579 if (useAverageAbsDiagVal)
580 tol = TST::magnitude(100 * Teuchos::ScalarTraits<Scalar>::eps()) * (avgAbsDiagVal - numDiagsEqualToOne) / (rowMap->getLocalNumElements() - numDiagsEqualToOne);
581 if (doReciprocal) {
582 for (size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
583 if (replaceSingleEntryRowWithZero && nnzPerRow[i] <= static_cast<int>(1))
584 diagVals[i] = zero;
585 else if ((diagVals[i] != zero) && (TST::magnitude(diagVals[i]) < TST::magnitude(two * regSum[i])))
586 diagVals[i] = one / TST::magnitude((two * regSum[i]));
587 else {
588 if (TST::magnitude(diagVals[i]) > tol)
589 diagVals[i] = one / diagVals[i];
590 else {
591 diagVals[i] = valReplacement;
592 }
593 }
594 }
595 }
596 }
597 } else {
598 TEUCHOS_TEST_FOR_EXCEPTION(doReciprocal, Xpetra::Exceptions::RuntimeError,
599 "UtilitiesBase::GetLumpedMatrixDiagonal(): extracting reciprocal of diagonal of a blocked matrix is not supported");
600 diag = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(bA->getRangeMapExtractor()->getFullMap(), true);
601
602 for (size_t row = 0; row < bA->Rows(); ++row) {
603 for (size_t col = 0; col < bA->Cols(); ++col) {
604 if (!bA->getMatrix(row, col).is_null()) {
605 // if we are in Thyra mode, but the block (row,row) is again a blocked operator, we have to use (pseudo) Xpetra-style GIDs with offset!
606 bool bThyraMode = bA->getRangeMapExtractor()->getThyraMode() && (Teuchos::rcp_dynamic_cast<Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(bA->getMatrix(row, col)) == Teuchos::null);
607 RCP<Vector> ddtemp = bA->getRangeMapExtractor()->ExtractVector(diag, row, bThyraMode);
608 RCP<const Vector> dd = GetLumpedMatrixDiagonal(*(bA->getMatrix(row, col)));
609 ddtemp->update(Teuchos::as<Scalar>(1.0), *dd, Teuchos::as<Scalar>(1.0));
610 bA->getRangeMapExtractor()->InsertVector(ddtemp, row, diag, bThyraMode);
611 }
612 }
613 }
614 }
615
616 return diag;
617}
618
619template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
620Teuchos::RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
622 GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A) {
623 // Get/Create distributed objects
624 RCP<const Map> rowMap = A.getRowMap();
625 auto diag = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(rowMap, false);
626
627 // Implement using Kokkos
628 using local_vector_type = typename Vector::dual_view_type::t_dev_um;
629 using local_matrix_type = typename Matrix::local_matrix_device_type;
630 using execution_space = typename local_vector_type::execution_space;
631 using values_type = typename local_matrix_type::values_type;
632 using scalar_type = typename values_type::non_const_value_type;
633 using KAT_S = typename KokkosKernels::ArithTraits<scalar_type>;
634
635 auto diag_dev = diag->getLocalViewDevice(Tpetra::Access::OverwriteAll);
636 auto local_mat_dev = A.getLocalMatrixDevice();
637 Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
638
639 Kokkos::parallel_for(
640 "GetMatrixMaxMinusOffDiagonal", my_policy,
641 KOKKOS_LAMBDA(const LocalOrdinal rowIdx) {
642 auto mymax = KAT_S::zero();
643 auto row = local_mat_dev.rowConst(rowIdx);
644 for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
645 if (rowIdx != row.colidx(entryIdx)) {
646 if (KAT_S::real(mymax) < -KAT_S::real(row.value(entryIdx)))
647 mymax = -KAT_S::real(row.value(entryIdx));
648 }
649 }
650 diag_dev(rowIdx, 0) = mymax;
651 });
652
653 return diag;
654}
655
656template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
657Teuchos::RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
659 GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, const Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>& BlockNumber) {
660 TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "GetMatrixMaxMinusOffDiagonal: BlockNumber must match's A's column map.");
661
662 // Get/Create distributed objects
663 RCP<const Map> rowMap = A.getRowMap();
664 auto diag = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(rowMap, false);
665
666 // Implement using Kokkos
667 using local_vector_type = typename Vector::dual_view_type::t_dev_um;
668 using local_matrix_type = typename Matrix::local_matrix_device_type;
669 using execution_space = typename local_vector_type::execution_space;
670 using values_type = typename local_matrix_type::values_type;
671 using scalar_type = typename values_type::non_const_value_type;
672 using KAT_S = typename KokkosKernels::ArithTraits<scalar_type>;
673
674 auto diag_dev = diag->getLocalViewDevice(Tpetra::Access::OverwriteAll);
675 auto local_mat_dev = A.getLocalMatrixDevice();
676 auto local_block_dev = BlockNumber.getLocalViewDevice(Tpetra::Access::ReadOnly);
677 Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
678
679 Kokkos::parallel_for(
680 "GetMatrixMaxMinusOffDiagonal", my_policy,
681 KOKKOS_LAMBDA(const LocalOrdinal rowIdx) {
682 auto mymax = KAT_S::zero();
683 auto row = local_mat_dev.row(rowIdx);
684 for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
685 if ((rowIdx != row.colidx(entryIdx)) && (local_block_dev(rowIdx, 0) == local_block_dev(row.colidx(entryIdx), 0))) {
686 if (KAT_S::real(mymax) < -KAT_S::real(row.value(entryIdx)))
687 mymax = -KAT_S::real(row.value(entryIdx));
688 }
689 }
690 diag_dev(rowIdx, 0) = mymax;
691 });
692
693 return diag;
694}
695
696template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
697Teuchos::RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
699 GetInverse(Teuchos::RCP<const Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> v, typename Teuchos::ScalarTraits<Scalar>::magnitudeType tol, Scalar valReplacement) {
700 RCP<Vector> ret = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(v->getMap(), true);
701
702 // check whether input vector "v" is a BlockedVector
703 RCP<const BlockedVector> bv = Teuchos::rcp_dynamic_cast<const BlockedVector>(v);
704 if (bv.is_null() == false) {
705 RCP<BlockedVector> bret = Teuchos::rcp_dynamic_cast<BlockedVector>(ret);
706 TEUCHOS_TEST_FOR_EXCEPTION(bret.is_null() == true, MueLu::Exceptions::RuntimeError, "MueLu::UtilitiesBase::GetInverse: return vector should be of type BlockedVector");
707 RCP<const BlockedMap> bmap = bv->getBlockedMap();
708 for (size_t r = 0; r < bmap->getNumMaps(); ++r) {
709 RCP<const MultiVector> submvec = bv->getMultiVector(r, bmap->getThyraMode());
710 RCP<const Vector> subvec = submvec->getVector(0);
711 RCP<Vector> subvecinf = MueLu::UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::GetInverse(subvec, tol, valReplacement);
712 bret->setMultiVector(r, subvecinf, bmap->getThyraMode());
713 }
714 return ret;
715 }
716
717 // v is an {Epetra,Tpetra}Vector: work with the underlying raw data
718 ArrayRCP<Scalar> retVals = ret->getDataNonConst(0);
719 ArrayRCP<const Scalar> inputVals = v->getData(0);
720 for (size_t i = 0; i < v->getMap()->getLocalNumElements(); ++i) {
721 if (Teuchos::ScalarTraits<Scalar>::magnitude(inputVals[i]) > tol)
722 retVals[i] = Teuchos::ScalarTraits<Scalar>::one() / inputVals[i];
723 else
724 retVals[i] = valReplacement;
725 }
726 return ret;
727}
728
729// template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
730// RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
731// UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
732// GetMatrixOverlappedDiagonal(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> & A) {
733// RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
734
735// // Undo block map (if we have one)
736// RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
737// if(!browMap.is_null()) rowMap = browMap->getMap();
738
739// RCP<Vector> localDiag = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap);
740// try {
741// const CrsMatrixWrap* crsOp = dynamic_cast<const CrsMatrixWrap*>(&A);
742// if (crsOp == NULL) {
743// throw Exceptions::RuntimeError("cast to CrsMatrixWrap failed");
744// }
745// Teuchos::ArrayRCP<size_t> offsets;
746// crsOp->getLocalDiagOffsets(offsets);
747// crsOp->getLocalDiagCopy(*localDiag,offsets());
748// }
749// catch (...) {
750// ArrayRCP<Scalar> localDiagVals = localDiag->getDataNonConst(0);
751// Teuchos::ArrayRCP<Scalar> diagVals = GetMatrixDiagonal(A);
752// for (LocalOrdinal i = 0; i < localDiagVals.size(); i++)
753// localDiagVals[i] = diagVals[i];
754// localDiagVals = diagVals = null;
755// }
756
757// RCP<Vector> diagonal = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(colMap);
758// RCP< const Xpetra::Import<LocalOrdinal,GlobalOrdinal,Node> > importer;
759// importer = A.getCrsGraph()->getImporter();
760// if (importer == Teuchos::null) {
761// importer = Xpetra::ImportFactory<LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap, colMap);
762// }
763// diagonal->doImport(*localDiag, *(importer), Xpetra::INSERT);
764// return diagonal;
765// }
766
767template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
768RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
770 GetMatrixOverlappedDiagonal(const Matrix& A) {
771 RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
772 RCP<Vector> localDiag = GetMatrixDiagonal(A);
773 RCP<const Import> importer = A.getCrsGraph()->getImporter();
774 if (importer.is_null() && !rowMap->isSameAs(*colMap)) {
775 importer = ImportFactory::Build(rowMap, colMap);
776 }
777 if (!importer.is_null()) {
778 RCP<Vector> diagonal = VectorFactory::Build(colMap, false);
779 diagonal->doImport(*localDiag, *(importer), Xpetra::INSERT);
780 return diagonal;
781 } else
782 return localDiag;
783}
784
785template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
786RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
788 GetMatrixOverlappedDeletedRowsum(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A) {
789 using STS = typename Teuchos::ScalarTraits<SC>;
790
791 // Undo block map (if we have one)
792 RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
793 RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
794 if (!browMap.is_null()) rowMap = browMap->getMap();
795
796 RCP<Vector> local = Xpetra::VectorFactory<SC, LO, GO, Node>::Build(rowMap);
797 RCP<Vector> ghosted = Xpetra::VectorFactory<SC, LO, GO, Node>::Build(colMap, true);
798 ArrayRCP<SC> localVals = local->getDataNonConst(0);
799
800 for (LO row = 0; row < static_cast<LO>(A.getRowMap()->getLocalNumElements()); ++row) {
801 size_t nnz = A.getNumEntriesInLocalRow(row);
802 ArrayView<const LO> indices;
803 ArrayView<const SC> vals;
804 A.getLocalRowView(row, indices, vals);
805
806 SC si = STS::zero();
807
808 for (LO colID = 0; colID < static_cast<LO>(nnz); colID++) {
809 if (indices[colID] != row) {
810 si += vals[colID];
811 }
812 }
813 localVals[row] = si;
814 }
815
816 RCP<const Xpetra::Import<LO, GO, Node>> importer;
817 importer = A.getCrsGraph()->getImporter();
818 if (importer == Teuchos::null) {
819 importer = Xpetra::ImportFactory<LO, GO, Node>::Build(rowMap, colMap);
820 }
821 ghosted->doImport(*local, *(importer), Xpetra::INSERT);
822 return ghosted;
823}
824
825template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
826RCP<Xpetra::Vector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LocalOrdinal, GlobalOrdinal, Node>>
828 GetMatrixOverlappedAbsDeletedRowsum(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A) {
829 RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
830 using STS = typename Teuchos::ScalarTraits<Scalar>;
831 using MTS = typename Teuchos::ScalarTraits<Magnitude>;
832 using MT = Magnitude;
833 using RealValuedVector = Xpetra::Vector<MT, LO, GO, Node>;
834
835 // Undo block map (if we have one)
836 RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
837 if (!browMap.is_null()) rowMap = browMap->getMap();
838
839 RCP<RealValuedVector> local = Xpetra::VectorFactory<MT, LO, GO, Node>::Build(rowMap);
840 RCP<RealValuedVector> ghosted = Xpetra::VectorFactory<MT, LO, GO, Node>::Build(colMap, true);
841 ArrayRCP<MT> localVals = local->getDataNonConst(0);
842
843 for (LO rowIdx = 0; rowIdx < static_cast<LO>(A.getRowMap()->getLocalNumElements()); ++rowIdx) {
844 size_t nnz = A.getNumEntriesInLocalRow(rowIdx);
845 ArrayView<const LO> indices;
846 ArrayView<const SC> vals;
847 A.getLocalRowView(rowIdx, indices, vals);
848
849 MT si = MTS::zero();
850
851 for (LO colID = 0; colID < static_cast<LO>(nnz); ++colID) {
852 if (indices[colID] != rowIdx) {
853 si += STS::magnitude(vals[colID]);
854 }
855 }
856 localVals[rowIdx] = si;
857 }
858
859 RCP<const Xpetra::Import<LO, GO, Node>> importer;
860 importer = A.getCrsGraph()->getImporter();
861 if (importer == Teuchos::null) {
862 importer = Xpetra::ImportFactory<LO, GO, Node>::Build(rowMap, colMap);
863 }
864 ghosted->doImport(*local, *(importer), Xpetra::INSERT);
865 return ghosted;
866}
867
868template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
871 CountNegativeDiagonalEntries(const Matrix& A) {
872 using local_matrix_type = typename Matrix::local_matrix_device_type;
873 using execution_space = typename local_matrix_type::execution_space;
874 using KAT_S = typename KokkosKernels::ArithTraits<typename local_matrix_type::value_type>;
875
876 auto local_mat_dev = A.getLocalMatrixDevice();
877 Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(local_mat_dev.numRows()));
878 GlobalOrdinal count_l = 0, count_g = 0;
879
880 Kokkos::parallel_reduce(
881 "CountNegativeDiagonalEntries", my_policy,
882 KOKKOS_LAMBDA(const LocalOrdinal rowIdx, GlobalOrdinal& sum) {
883 auto row = local_mat_dev.row(rowIdx);
884 for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
885 if (rowIdx == row.colidx(entryIdx) && KAT_S::real(row.value(entryIdx)) < 0)
886 sum++;
887 }
888 },
889 count_l);
890
891 MueLu_sumAll(A.getRowMap()->getComm(), count_l, count_g);
892 return count_g;
893}
894
895template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
896Teuchos::Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>
898 ResidualNorm(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS) {
899 TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
900 const size_t numVecs = X.getNumVectors();
901 RCP<MultiVector> RES = Residual(Op, X, RHS);
902 Teuchos::Array<Magnitude> norms(numVecs);
903 RES->norm2(norms);
904 return norms;
905}
906
907template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
908Teuchos::Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>
910 ResidualNorm(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS, MultiVector& Resid) {
911 TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
912 const size_t numVecs = X.getNumVectors();
913 Residual(Op, X, RHS, Resid);
914 Teuchos::Array<Magnitude> norms(numVecs);
915 Resid.norm2(norms);
916 return norms;
917}
918
919template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
920RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
922 Residual(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS) {
923 TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
924 const size_t numVecs = X.getNumVectors();
925 // TODO Op.getRangeMap should return a BlockedMap if it is a BlockedCrsOperator
926 RCP<MultiVector> RES = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(RHS.getMap(), numVecs, false); // no need to initialize to zero
927 Op.residual(X, RHS, *RES);
928 return RES;
929}
930
931template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
933 Residual(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS, MultiVector& Resid) {
934 TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides");
935 TEUCHOS_TEST_FOR_EXCEPTION(Resid.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of residual vectors != number of right-hand sides");
936 Op.residual(X, RHS, Resid);
937}
938
939template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
940Scalar
942 PowerMethod(const Matrix& A, bool scaleByDiag,
943 LocalOrdinal niters, typename Teuchos::ScalarTraits<Scalar>::magnitudeType tolerance, typename Teuchos::ScalarTraits<Scalar>::magnitudeType diagonalReplacementTolerance, bool verbose, unsigned int seed) {
944 TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRangeMap()->isSameAs(*(A.getDomainMap()))), Exceptions::Incompatible,
945 "Utils::PowerMethod: operator must have domain and range maps that are equivalent.");
946
947 // power iteration
948 RCP<Vector> diagInvVec;
949 if (scaleByDiag) {
950 diagInvVec = GetMatrixDiagonalInverse(A, diagonalReplacementTolerance);
951 }
952
953 Scalar lambda = PowerMethod(A, diagInvVec, niters, tolerance, verbose, seed);
954 return lambda;
955}
956
957template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
958Scalar
959UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
960 PowerMethod(const Matrix& A, const RCP<Vector>& diagInvVec,
961 LocalOrdinal niters, typename Teuchos::ScalarTraits<Scalar>::magnitudeType tolerance, bool verbose, unsigned int seed) {
962 TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRangeMap()->isSameAs(*(A.getDomainMap()))), Exceptions::Incompatible,
963 "Utils::PowerMethod: operator must have domain and range maps that are equivalent.");
964
965 // Create three vectors, fill z with random numbers
966 RCP<Vector> q = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(A.getDomainMap(), true);
967 RCP<Vector> r = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(A.getRangeMap(), true);
968 RCP<Vector> z = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(A.getRangeMap(), false);
969
970 z->setSeed(seed); // seed random number generator
971 z->randomize(true); // use Xpetra implementation: -> same results for Epetra and Tpetra
972
973 Teuchos::Array<Magnitude> norms(1);
974
975 typedef Teuchos::ScalarTraits<Scalar> STS;
976
977 const Scalar zero = STS::zero(), one = STS::one();
978
979 Scalar lambda = zero;
980 Magnitude residual = STS::magnitude(zero);
981
982 // power iteration
983 for (int iter = 0; iter < niters; ++iter) {
984 z->norm2(norms); // Compute 2-norm of z
985 q->update(one / norms[0], *z, zero); // Set q = z / normz
986 A.apply(*q, *z); // Compute z = A*q
987 if (diagInvVec != Teuchos::null)
988 z->elementWiseMultiply(one, *diagInvVec, *z, zero);
989 lambda = q->dot(*z); // Approximate maximum eigenvalue: lamba = dot(q,z)
990
991 if (iter % 100 == 0 || iter + 1 == niters) {
992 r->update(1.0, *z, -lambda, *q, zero); // Compute A*q - lambda*q
993 r->norm2(norms);
994 residual = STS::magnitude(norms[0] / lambda);
995 if (verbose) {
996 std::cout << "Iter = " << iter
997 << " Lambda = " << lambda
998 << " Residual of A*q - lambda*q = " << residual
999 << std::endl;
1000 }
1001 }
1002 if (residual < tolerance)
1003 break;
1004 }
1005 return lambda;
1006}
1007
1008template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1009RCP<Teuchos::FancyOStream>
1011 MakeFancy(std::ostream& os) {
1012 RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(os));
1013 return fancy;
1014}
1015
1016template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1017typename Teuchos::ScalarTraits<Scalar>::magnitudeType
1019 Distance2(const Teuchos::Array<Teuchos::ArrayRCP<const Scalar>>& v, LocalOrdinal i0, LocalOrdinal i1) {
1020 const size_t numVectors = v.size();
1021
1022 Scalar d = Teuchos::ScalarTraits<Scalar>::zero();
1023 for (size_t j = 0; j < numVectors; j++) {
1024 d += (v[j][i0] - v[j][i1]) * (v[j][i0] - v[j][i1]);
1025 }
1026 return Teuchos::ScalarTraits<Scalar>::magnitude(d);
1027}
1028
1029template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1030typename Teuchos::ScalarTraits<Scalar>::magnitudeType
1032 Distance2(const Teuchos::ArrayView<double>& weight, const Teuchos::Array<Teuchos::ArrayRCP<const Scalar>>& v, LocalOrdinal i0, LocalOrdinal i1) {
1033 const size_t numVectors = v.size();
1034 using MT = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
1035
1036 Scalar d = Teuchos::ScalarTraits<Scalar>::zero();
1037 for (size_t j = 0; j < numVectors; j++) {
1038 d += Teuchos::as<MT>(weight[j]) * (v[j][i0] - v[j][i1]) * (v[j][i0] - v[j][i1]);
1039 }
1040 return Teuchos::ScalarTraits<Scalar>::magnitude(d);
1041}
1042
1043template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1044Teuchos::ArrayRCP<const bool>
1046 DetectDirichletRows(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, const typename Teuchos::ScalarTraits<Scalar>::magnitudeType& tol, bool count_twos_as_dirichlet) {
1047 LocalOrdinal numRows = A.getLocalNumRows();
1048 typedef Teuchos::ScalarTraits<Scalar> STS;
1049 ArrayRCP<bool> boundaryNodes(numRows, true);
1050 if (count_twos_as_dirichlet) {
1051 for (LocalOrdinal row = 0; row < numRows; row++) {
1052 ArrayView<const LocalOrdinal> indices;
1053 ArrayView<const Scalar> vals;
1054 A.getLocalRowView(row, indices, vals);
1055 size_t nnz = A.getNumEntriesInLocalRow(row);
1056 if (nnz > 2) {
1057 size_t col;
1058 for (col = 0; col < nnz; col++)
1059 if ((indices[col] != row) && STS::magnitude(vals[col]) > tol) {
1060 if (!boundaryNodes[row])
1061 break;
1062 boundaryNodes[row] = false;
1063 }
1064 if (col == nnz)
1065 boundaryNodes[row] = true;
1066 }
1067 }
1068 } else {
1069 for (LocalOrdinal row = 0; row < numRows; row++) {
1070 ArrayView<const LocalOrdinal> indices;
1071 ArrayView<const Scalar> vals;
1072 A.getLocalRowView(row, indices, vals);
1073 size_t nnz = A.getNumEntriesInLocalRow(row);
1074 if (nnz > 1)
1075 for (size_t col = 0; col < nnz; col++)
1076 if ((indices[col] != row) && STS::magnitude(vals[col]) > tol) {
1077 boundaryNodes[row] = false;
1078 break;
1079 }
1080 }
1081 }
1082 return boundaryNodes;
1083}
1084
1085template <class CrsMatrix>
1086KOKKOS_FORCEINLINE_FUNCTION bool isDirichletRow(typename CrsMatrix::ordinal_type rowId,
1087 KokkosSparse::SparseRowViewConst<CrsMatrix>& row,
1088 const typename KokkosKernels::ArithTraits<typename CrsMatrix::value_type>::magnitudeType& tol,
1089 const bool count_twos_as_dirichlet) {
1090 using ATS = KokkosKernels::ArithTraits<typename CrsMatrix::value_type>;
1091
1092 auto length = row.length;
1093 bool boundaryNode = true;
1094
1095 if (count_twos_as_dirichlet) {
1096 if (length > 2) {
1097 decltype(length) colID = 0;
1098 for (; colID < length; colID++)
1099 if ((row.colidx(colID) != rowId) &&
1100 (ATS::magnitude(row.value(colID)) > tol)) {
1101 if (!boundaryNode)
1102 break;
1103 boundaryNode = false;
1104 }
1105 if (colID == length)
1106 boundaryNode = true;
1107 }
1108 } else {
1109 for (decltype(length) colID = 0; colID < length; colID++)
1110 if ((row.colidx(colID) != rowId) &&
1111 (ATS::magnitude(row.value(colID)) > tol)) {
1112 boundaryNode = false;
1113 break;
1114 }
1115 }
1116 return boundaryNode;
1117}
1118
1119template <class SC, class LO, class GO, class NO, class memory_space>
1120Kokkos::View<bool*, memory_space>
1121DetectDirichletRows_kokkos(const Xpetra::Matrix<SC, LO, GO, NO>& A,
1122 const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1123 const bool count_twos_as_dirichlet) {
1124 using impl_scalar_type = typename KokkosKernels::ArithTraits<SC>::val_type;
1125 using ATS = KokkosKernels::ArithTraits<impl_scalar_type>;
1126 using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1127 using helpers = Xpetra::Helpers<SC, LO, GO, NO>;
1128
1129 Kokkos::View<bool*, typename NO::device_type::memory_space> boundaryNodes;
1130
1131 if (helpers::isTpetraBlockCrs(A)) {
1132 const Tpetra::BlockCrsMatrix<SC, LO, GO, NO>& Am = toTpetraBlock(A);
1133 auto b_graph = Am.getCrsGraph().getLocalGraphDevice();
1134 auto b_rowptr = Am.getCrsGraph().getLocalRowPtrsDevice();
1135 auto values = Am.getValuesDevice();
1136 LO numBlockRows = Am.getLocalNumRows();
1137 const LO stride = Am.getBlockSize() * Am.getBlockSize();
1138
1139 boundaryNodes = Kokkos::View<bool*, typename NO::device_type::memory_space>(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numBlockRows);
1140
1141 if (count_twos_as_dirichlet)
1142 throw Exceptions::RuntimeError("BlockCrs does not support counting twos as Dirichlet");
1143
1144 Kokkos::parallel_for(
1145 "MueLu:Utils::DetectDirichletRowsBlockCrs", range_type(0, numBlockRows),
1146 KOKKOS_LAMBDA(const LO row) {
1147 auto rowView = b_graph.rowConst(row);
1148 auto length = rowView.length;
1149 LO valstart = b_rowptr[row] * stride;
1150
1151 boundaryNodes(row) = true;
1152 decltype(length) colID = 0;
1153 for (; colID < length; colID++) {
1154 if (rowView.colidx(colID) != row) {
1155 LO current = valstart + colID * stride;
1156 for (LO k = 0; k < stride; k++) {
1157 if (ATS::magnitude(values[current + k]) > tol) {
1158 boundaryNodes(row) = false;
1159 break;
1160 }
1161 }
1162 }
1163 if (boundaryNodes(row) == false)
1164 break;
1165 }
1166 });
1167 } else {
1168 auto localMatrix = A.getLocalMatrixDevice();
1169 LO numRows = A.getLocalNumRows();
1170 boundaryNodes = Kokkos::View<bool*, typename NO::device_type::memory_space>(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numRows);
1171
1172 Kokkos::parallel_for(
1173 "MueLu:Utils::DetectDirichletRows", range_type(0, numRows),
1174 KOKKOS_LAMBDA(const LO row) {
1175 auto rowView = localMatrix.rowConst(row);
1176 boundaryNodes(row) = isDirichletRow(row, rowView, tol, count_twos_as_dirichlet);
1177 });
1178 }
1179 if constexpr (std::is_same<memory_space, typename NO::device_type::memory_space>::value)
1180 return boundaryNodes;
1181 else {
1182 Kokkos::View<bool*, memory_space> boundaryNodes2(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), boundaryNodes.extent(0));
1183 Kokkos::deep_copy(boundaryNodes2, boundaryNodes);
1184 return boundaryNodes2;
1185 }
1186 // CAG: No idea why this is needed to avoid "warning: missing return statement at end of non-void function"
1187 Kokkos::View<bool*, memory_space> dummy("dummy", 0);
1188 return dummy;
1189}
1190
1191template <class SC, class LO, class GO, class NO>
1192Kokkos::View<bool*, typename NO::device_type::memory_space>
1194 DetectDirichletRows_kokkos(const Xpetra::Matrix<SC, LO, GO, NO>& A,
1195 const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1196 const bool count_twos_as_dirichlet) {
1197 return MueLu::DetectDirichletRows_kokkos<SC, LO, GO, NO, typename NO::device_type::memory_space>(A, tol, count_twos_as_dirichlet);
1198}
1199
1200template <class SC, class LO, class GO, class NO>
1201Kokkos::View<bool*, typename Kokkos::HostSpace>
1203 DetectDirichletRows_kokkos_host(const Xpetra::Matrix<SC, LO, GO, NO>& A,
1204 const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1205 const bool count_twos_as_dirichlet) {
1206 return MueLu::DetectDirichletRows_kokkos<SC, LO, GO, NO, typename Kokkos::HostSpace>(A, tol, count_twos_as_dirichlet);
1207}
1208
1209template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1210Teuchos::ArrayRCP<const bool>
1212 DetectDirichletRowsExt(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, bool& bHasZeroDiagonal, const typename Teuchos::ScalarTraits<Scalar>::magnitudeType& tol) {
1213 // assume that there is no zero diagonal in matrix
1214 bHasZeroDiagonal = false;
1215
1216 Teuchos::RCP<Vector> diagVec = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(A.getRowMap());
1217 A.getLocalDiagCopy(*diagVec);
1218 Teuchos::ArrayRCP<const Scalar> diagVecData = diagVec->getData(0);
1219
1220 LocalOrdinal numRows = A.getLocalNumRows();
1221 typedef Teuchos::ScalarTraits<Scalar> STS;
1222 ArrayRCP<bool> boundaryNodes(numRows, false);
1223 for (LocalOrdinal row = 0; row < numRows; row++) {
1224 ArrayView<const LocalOrdinal> indices;
1225 ArrayView<const Scalar> vals;
1226 A.getLocalRowView(row, indices, vals);
1227 size_t nnz = 0; // collect nonzeros in row (excluding the diagonal)
1228 bool bHasDiag = false;
1229 for (decltype(indices.size()) col = 0; col < indices.size(); col++) {
1230 if (indices[col] != row) {
1231 if (STS::magnitude(vals[col] / STS::magnitude(sqrt(STS::magnitude(diagVecData[row]) * STS::magnitude(diagVecData[col])))) > tol) {
1232 nnz++;
1233 }
1234 } else
1235 bHasDiag = true; // found a diagonal entry
1236 }
1237 if (bHasDiag == false)
1238 bHasZeroDiagonal = true; // we found at least one row without a diagonal
1239 else if (nnz == 0)
1240 boundaryNodes[row] = true;
1241 }
1242 return boundaryNodes;
1243}
1244
1245template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1247 EnforceInitialCondition(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1248 const Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& RHS,
1249 Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& InitialGuess,
1250 const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1251 const bool count_twos_as_dirichlet) {
1252 using range_type = Kokkos::RangePolicy<LO, typename Node::execution_space>;
1253
1254 LocalOrdinal numRows = A.getLocalNumRows();
1255 LocalOrdinal numVectors = RHS.getNumVectors();
1256 TEUCHOS_ASSERT_EQUALITY(numVectors, Teuchos::as<LocalOrdinal>(InitialGuess.getNumVectors()));
1257 if (Behavior::debug())
1258 TEUCHOS_ASSERT(RHS.getMap()->isCompatible(*InitialGuess.getMap()));
1259
1260 auto lclRHS = RHS.getLocalViewDevice(Tpetra::Access::ReadOnly);
1261 auto lclInitialGuess = InitialGuess.getLocalViewDevice(Tpetra::Access::ReadWrite);
1262 auto lclA = A.getLocalMatrixDevice();
1263
1264 Kokkos::parallel_for(
1265 "MueLu:Utils::EnforceInitialCondition", range_type(0, numRows),
1266 KOKKOS_LAMBDA(const LO i) {
1267 auto row = lclA.rowConst(i);
1268 if (isDirichletRow(i, row, tol, count_twos_as_dirichlet)) {
1269 for (LocalOrdinal j = 0; j < numVectors; ++j)
1270 lclInitialGuess(i, j) = lclRHS(i, j);
1271 }
1272 });
1273}
1274
1275template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1277 FindNonZeros(const Teuchos::ArrayRCP<const Scalar> vals,
1278 Teuchos::ArrayRCP<bool> nonzeros) {
1279 TEUCHOS_ASSERT(vals.size() == nonzeros.size());
1280 typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitudeType;
1281 const magnitudeType eps = 2.0 * Teuchos::ScalarTraits<magnitudeType>::eps();
1282 for (size_t i = 0; i < static_cast<size_t>(vals.size()); i++) {
1283 nonzeros[i] = (Teuchos::ScalarTraits<Scalar>::magnitude(vals[i]) > eps);
1284 }
1285}
1286
1287// Find Nonzeros in a device view
1288template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1290 FindNonZeros(const typename Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::dual_view_type::t_dev_const_um vals,
1291 Kokkos::View<bool*, typename Node::device_type> nonzeros) {
1292 using ATS = KokkosKernels::ArithTraits<Scalar>;
1293 using impl_ATS = KokkosKernels::ArithTraits<typename ATS::val_type>;
1294 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1295 TEUCHOS_ASSERT(vals.extent(0) == nonzeros.extent(0));
1296 const typename ATS::magnitudeType eps = 2.0 * impl_ATS::eps();
1297
1298 Kokkos::parallel_for(
1299 "MueLu:Maxwell1::FindNonZeros", range_type(0, vals.extent(0)),
1300 KOKKOS_LAMBDA(const size_t i) {
1301 nonzeros(i) = (impl_ATS::magnitude(vals(i, 0)) > eps);
1302 });
1303}
1304
1305template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1307 DetectDirichletColsAndDomains(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1308 const Teuchos::ArrayRCP<bool>& dirichletRows,
1309 Teuchos::ArrayRCP<bool> dirichletCols,
1310 Teuchos::ArrayRCP<bool> dirichletDomain) {
1311 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
1312 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> domMap = A.getDomainMap();
1313 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowMap = A.getRowMap();
1314 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> colMap = A.getColMap();
1315 TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == rowMap->getLocalNumElements());
1316 TEUCHOS_ASSERT(static_cast<size_t>(dirichletCols.size()) == colMap->getLocalNumElements());
1317 TEUCHOS_ASSERT(static_cast<size_t>(dirichletDomain.size()) == domMap->getLocalNumElements());
1318 RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> myColsToZero = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(colMap, 1, /*zeroOut=*/true);
1319 // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1320 for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1321 if (dirichletRows[i]) {
1322 ArrayView<const LocalOrdinal> indices;
1323 ArrayView<const Scalar> values;
1324 A.getLocalRowView(i, indices, values);
1325 for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1326 myColsToZero->replaceLocalValue(indices[j], 0, one);
1327 }
1328 }
1329
1330 RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> globalColsToZero;
1331 RCP<const Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node>> importer = A.getCrsGraph()->getImporter();
1332 if (!importer.is_null()) {
1333 globalColsToZero = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(domMap, 1, /*zeroOut=*/true);
1334 // export to domain map
1335 globalColsToZero->doExport(*myColsToZero, *importer, Xpetra::ADD);
1336 // import to column map
1337 myColsToZero->doImport(*globalColsToZero, *importer, Xpetra::INSERT);
1338 } else
1339 globalColsToZero = myColsToZero;
1340
1341 FindNonZeros(globalColsToZero->getData(0), dirichletDomain);
1342 FindNonZeros(myColsToZero->getData(0), dirichletCols);
1343}
1344
1345template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1347 DetectDirichletColsAndDomains(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1348 const Kokkos::View<bool*, typename Node::device_type>& dirichletRows,
1349 Kokkos::View<bool*, typename Node::device_type> dirichletCols,
1350 Kokkos::View<bool*, typename Node::device_type> dirichletDomain) {
1351 using ATS = KokkosKernels::ArithTraits<Scalar>;
1352 using impl_ATS = KokkosKernels::ArithTraits<typename ATS::val_type>;
1353 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1354 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> domMap = A.getDomainMap();
1355 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowMap = A.getRowMap();
1356 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> colMap = A.getColMap();
1357 TEUCHOS_ASSERT(dirichletRows.extent(0) == rowMap->getLocalNumElements());
1358 TEUCHOS_ASSERT(dirichletCols.extent(0) == colMap->getLocalNumElements());
1359 TEUCHOS_ASSERT(dirichletDomain.extent(0) == domMap->getLocalNumElements());
1360 RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> myColsToZero = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(colMap, /*zeroOut=*/true);
1361 // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1362 auto myColsToZeroView = myColsToZero->getLocalViewDevice(Tpetra::Access::ReadWrite);
1363 auto localMatrix = A.getLocalMatrixDevice();
1364 Kokkos::parallel_for(
1365 "MueLu:Maxwell1::DetectDirichletCols", range_type(0, rowMap->getLocalNumElements()),
1366 KOKKOS_LAMBDA(const LocalOrdinal row) {
1367 if (dirichletRows(row)) {
1368 auto rowView = localMatrix.row(row);
1369 auto length = rowView.length;
1370
1371 for (decltype(length) colID = 0; colID < length; colID++)
1372 myColsToZeroView(rowView.colidx(colID), 0) = impl_ATS::one();
1373 }
1374 });
1375
1376 RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> globalColsToZero;
1377 RCP<const Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node>> importer = A.getCrsGraph()->getImporter();
1378 if (!importer.is_null()) {
1379 globalColsToZero = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(domMap, /*zeroOut=*/true);
1380 // export to domain map
1381 globalColsToZero->doExport(*myColsToZero, *importer, Xpetra::ADD);
1382 // import to column map
1383 myColsToZero->doImport(*globalColsToZero, *importer, Xpetra::INSERT);
1384 } else
1385 globalColsToZero = myColsToZero;
1386 UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::FindNonZeros(globalColsToZero->getLocalViewDevice(Tpetra::Access::ReadOnly), dirichletDomain);
1387 UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::FindNonZeros(myColsToZero->getLocalViewDevice(Tpetra::Access::ReadOnly), dirichletCols);
1388}
1389
1390template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1392 ApplyRowSumCriterion(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol, Teuchos::ArrayRCP<bool>& dirichletRows) {
1393 typedef Teuchos::ScalarTraits<Scalar> STS;
1394 typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
1395 typedef Teuchos::ScalarTraits<MT> MTS;
1396 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowmap = A.getRowMap();
1397 for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1398 size_t nnz = A.getNumEntriesInLocalRow(row);
1399 ArrayView<const LocalOrdinal> indices;
1400 ArrayView<const Scalar> vals;
1401 A.getLocalRowView(row, indices, vals);
1402
1403 Scalar rowsum = STS::zero();
1404 Scalar diagval = STS::zero();
1405
1406 for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1407 LocalOrdinal col = indices[colID];
1408 if (row == col)
1409 diagval = vals[colID];
1410 rowsum += vals[colID];
1411 }
1412 // printf("A(%d,:) row_sum(point) = %6.4e\n",row,rowsum);
1413 if (rowSumTol < MTS::one() && STS::magnitude(rowsum) > STS::magnitude(diagval) * rowSumTol) {
1414 // printf("Row %d triggers rowsum\n",(int)row);
1415 dirichletRows[row] = true;
1416 }
1417 }
1418}
1419
1420template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1421void UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1422 ApplyRowSumCriterion(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, const Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>& BlockNumber, const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol, Teuchos::ArrayRCP<bool>& dirichletRows) {
1423 typedef Teuchos::ScalarTraits<Scalar> STS;
1424 typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
1425 typedef Teuchos::ScalarTraits<MT> MTS;
1426 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowmap = A.getRowMap();
1427
1428 TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "ApplyRowSumCriterion: BlockNumber must match's A's column map.");
1429
1430 Teuchos::ArrayRCP<const LocalOrdinal> block_id = BlockNumber.getData(0);
1431 for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1432 size_t nnz = A.getNumEntriesInLocalRow(row);
1433 ArrayView<const LocalOrdinal> indices;
1434 ArrayView<const Scalar> vals;
1435 A.getLocalRowView(row, indices, vals);
1436
1437 Scalar rowsum = STS::zero();
1438 Scalar diagval = STS::zero();
1439 for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1440 LocalOrdinal col = indices[colID];
1441 if (row == col)
1442 diagval = vals[colID];
1443 if (block_id[row] == block_id[col])
1444 rowsum += vals[colID];
1445 }
1446
1447 // printf("A(%d,:) row_sum(block) = %6.4e\n",row,rowsum);
1448 if (rowSumTol < MTS::one() && STS::magnitude(rowsum) > STS::magnitude(diagval) * rowSumTol) {
1449 // printf("Row %d triggers rowsum\n",(int)row);
1450 dirichletRows[row] = true;
1451 }
1452 }
1453}
1454
1455// Applies rowsum criterion
1456template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class memory_space>
1457void ApplyRowSumCriterion(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1458 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1459 Kokkos::View<bool*, memory_space>& dirichletRows) {
1460 typedef Teuchos::ScalarTraits<Scalar> STS;
1461 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowmap = A.getRowMap();
1462
1463 auto dirichletRowsHost = Kokkos::create_mirror_view(dirichletRows);
1464 Kokkos::deep_copy(dirichletRowsHost, dirichletRows);
1465
1466 for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1467 size_t nnz = A.getNumEntriesInLocalRow(row);
1468 ArrayView<const LocalOrdinal> indices;
1469 ArrayView<const Scalar> vals;
1470 A.getLocalRowView(row, indices, vals);
1471
1472 Scalar rowsum = STS::zero();
1473 Scalar diagval = STS::zero();
1474 for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1475 LocalOrdinal col = indices[colID];
1476 if (row == col)
1477 diagval = vals[colID];
1478 rowsum += vals[colID];
1479 }
1480 if (STS::real(rowsum) > STS::magnitude(diagval) * rowSumTol)
1481 dirichletRowsHost(row) = true;
1482 }
1483
1484 Kokkos::deep_copy(dirichletRows, dirichletRowsHost);
1485}
1486
1487template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1488void UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1489 ApplyRowSumCriterion(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1490 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1491 Kokkos::View<bool*, typename Node::device_type::memory_space>& dirichletRows) {
1492 MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, typename Node::device_type::memory_space>(A, rowSumTol, dirichletRows);
1493}
1494
1495template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1496void UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1497 ApplyRowSumCriterionHost(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1498 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1499 Kokkos::View<bool*, Kokkos::HostSpace>& dirichletRows) {
1500 MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, Kokkos::HostSpace>(A, rowSumTol, dirichletRows);
1501}
1502
1503// Applies rowsum criterion
1504template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class memory_space>
1505void ApplyRowSumCriterion(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1506 const Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>& BlockNumber,
1507 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1508 Kokkos::View<bool*, memory_space>& dirichletRows) {
1509 typedef Teuchos::ScalarTraits<Scalar> STS;
1510 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowmap = A.getRowMap();
1511
1512 TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "ApplyRowSumCriterion: BlockNumber must match's A's column map.");
1513
1514 auto dirichletRowsHost = Kokkos::create_mirror_view(dirichletRows);
1515 Kokkos::deep_copy(dirichletRowsHost, dirichletRows);
1516
1517 Teuchos::ArrayRCP<const LocalOrdinal> block_id = BlockNumber.getData(0);
1518 for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1519 size_t nnz = A.getNumEntriesInLocalRow(row);
1520 ArrayView<const LocalOrdinal> indices;
1521 ArrayView<const Scalar> vals;
1522 A.getLocalRowView(row, indices, vals);
1523
1524 Scalar rowsum = STS::zero();
1525 Scalar diagval = STS::zero();
1526 for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1527 LocalOrdinal col = indices[colID];
1528 if (row == col)
1529 diagval = vals[colID];
1530 if (block_id[row] == block_id[col])
1531 rowsum += vals[colID];
1532 }
1533 if (STS::real(rowsum) > STS::magnitude(diagval) * rowSumTol)
1534 dirichletRowsHost(row) = true;
1535 }
1536
1537 Kokkos::deep_copy(dirichletRows, dirichletRowsHost);
1538}
1539
1540template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1541void UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1542 ApplyRowSumCriterion(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1543 const Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>& BlockNumber,
1544 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1545 Kokkos::View<bool*, typename Node::device_type::memory_space>& dirichletRows) {
1546 MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, typename Node::device_type::memory_space>(A, BlockNumber, rowSumTol, dirichletRows);
1547}
1548
1549template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1550void UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1551 ApplyRowSumCriterionHost(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1552 const Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>& BlockNumber,
1553 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1554 Kokkos::View<bool*, Kokkos::HostSpace>& dirichletRows) {
1555 MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, Kokkos::HostSpace>(A, BlockNumber, rowSumTol, dirichletRows);
1556}
1557
1558template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1559Teuchos::ArrayRCP<const bool>
1561 DetectDirichletCols(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1562 const Teuchos::ArrayRCP<const bool>& dirichletRows) {
1563 Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
1564 Scalar one = Teuchos::ScalarTraits<Scalar>::one();
1565 Teuchos::RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> domMap = A.getDomainMap();
1566 Teuchos::RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> colMap = A.getColMap();
1567 Teuchos::RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> myColsToZero = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(colMap, 1);
1568 myColsToZero->putScalar(zero);
1569 // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1570 for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1571 if (dirichletRows[i]) {
1572 Teuchos::ArrayView<const LocalOrdinal> indices;
1573 Teuchos::ArrayView<const Scalar> values;
1574 A.getLocalRowView(i, indices, values);
1575 for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1576 myColsToZero->replaceLocalValue(indices[j], 0, one);
1577 }
1578 }
1579
1580 Teuchos::RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> globalColsToZero = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(domMap, 1);
1581 globalColsToZero->putScalar(zero);
1582 Teuchos::RCP<Xpetra::Export<LocalOrdinal, GlobalOrdinal, Node>> exporter = Xpetra::ExportFactory<LocalOrdinal, GlobalOrdinal, Node>::Build(colMap, domMap);
1583 // export to domain map
1584 globalColsToZero->doExport(*myColsToZero, *exporter, Xpetra::ADD);
1585 // import to column map
1586 myColsToZero->doImport(*globalColsToZero, *exporter, Xpetra::INSERT);
1587 Teuchos::ArrayRCP<const Scalar> myCols = myColsToZero->getData(0);
1588 Teuchos::ArrayRCP<bool> dirichletCols(colMap->getLocalNumElements(), true);
1589 Magnitude eps = Teuchos::ScalarTraits<Magnitude>::eps();
1590 for (size_t i = 0; i < colMap->getLocalNumElements(); i++) {
1591 dirichletCols[i] = Teuchos::ScalarTraits<Scalar>::magnitude(myCols[i]) > 2.0 * eps;
1592 }
1593 return dirichletCols;
1594}
1595
1596template <class SC, class LO, class GO, class NO>
1597Kokkos::View<bool*, typename NO::device_type>
1599 DetectDirichletCols(const Xpetra::Matrix<SC, LO, GO, NO>& A,
1600 const Kokkos::View<const bool*, typename NO::device_type>& dirichletRows) {
1601 using ATS = KokkosKernels::ArithTraits<SC>;
1602 using impl_ATS = KokkosKernels::ArithTraits<typename ATS::val_type>;
1603 using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1604
1605 SC zero = ATS::zero();
1606
1607 auto localMatrix = A.getLocalMatrixDevice();
1608 LO numRows = A.getLocalNumRows();
1609
1610 Teuchos::RCP<const Xpetra::Map<LO, GO, NO>> domMap = A.getDomainMap();
1611 Teuchos::RCP<const Xpetra::Map<LO, GO, NO>> colMap = A.getColMap();
1612 Teuchos::RCP<Xpetra::MultiVector<SC, LO, GO, NO>> myColsToZero = Xpetra::MultiVectorFactory<SC, LO, GO, NO>::Build(colMap, 1);
1613 myColsToZero->putScalar(zero);
1614 auto myColsToZeroView = myColsToZero->getLocalViewDevice(Tpetra::Access::ReadWrite);
1615 // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1616 Kokkos::parallel_for(
1617 "MueLu:Utils::DetectDirichletCols1", range_type(0, numRows),
1618 KOKKOS_LAMBDA(const LO row) {
1619 if (dirichletRows(row)) {
1620 auto rowView = localMatrix.row(row);
1621 auto length = rowView.length;
1622
1623 for (decltype(length) colID = 0; colID < length; colID++)
1624 myColsToZeroView(rowView.colidx(colID), 0) = impl_ATS::one();
1625 }
1626 });
1627
1628 Teuchos::RCP<Xpetra::MultiVector<SC, LO, GO, NO>> globalColsToZero = Xpetra::MultiVectorFactory<SC, LO, GO, NO>::Build(domMap, 1);
1629 globalColsToZero->putScalar(zero);
1630 Teuchos::RCP<Xpetra::Export<LO, GO, NO>> exporter = Xpetra::ExportFactory<LO, GO, NO>::Build(colMap, domMap);
1631 // export to domain map
1632 globalColsToZero->doExport(*myColsToZero, *exporter, Xpetra::ADD);
1633 // import to column map
1634 myColsToZero->doImport(*globalColsToZero, *exporter, Xpetra::INSERT);
1635
1636 auto myCols = myColsToZero->getLocalViewDevice(Tpetra::Access::ReadOnly);
1637 size_t numColEntries = colMap->getLocalNumElements();
1638 Kokkos::View<bool*, typename NO::device_type> dirichletCols(Kokkos::ViewAllocateWithoutInitializing("dirichletCols"), numColEntries);
1639 const typename ATS::magnitudeType eps = 2.0 * ATS::eps();
1640
1641 Kokkos::parallel_for(
1642 "MueLu:Utils::DetectDirichletCols2", range_type(0, numColEntries),
1643 KOKKOS_LAMBDA(const size_t i) {
1644 dirichletCols(i) = impl_ATS::magnitude(myCols(i, 0)) > eps;
1645 });
1646 return dirichletCols;
1647}
1648
1649template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1650Scalar
1652 Frobenius(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B) {
1653 // We check only row maps. Column may be different. One would hope that they are the same, as we typically
1654 // calculate frobenius norm of the specified sparsity pattern with an updated matrix from the previous step,
1655 // but matrix addition, even when one is submatrix of the other, changes column map (though change may be as
1656 // simple as couple of elements swapped)
1657 TEUCHOS_TEST_FOR_EXCEPTION(!A.getRowMap()->isSameAs(*B.getRowMap()), Exceptions::Incompatible, "MueLu::CGSolver::Frobenius: row maps are incompatible");
1658 TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete() || !B.isFillComplete(), Exceptions::RuntimeError, "Matrices must be fill completed");
1659
1660 const Map& AColMap = *A.getColMap();
1661 const Map& BColMap = *B.getColMap();
1662
1663 Teuchos::ArrayView<const LocalOrdinal> indA, indB;
1664 Teuchos::ArrayView<const Scalar> valA, valB;
1665 size_t nnzA = 0, nnzB = 0;
1666
1667 // We use a simple algorithm
1668 // for each row we fill valBAll array with the values in the corresponding row of B
1669 // as such, it serves as both sorted array and as storage, so we don't need to do a
1670 // tricky problem: "find a value in the row of B corresponding to the specific GID"
1671 // Once we do that, we translate LID of entries of row of A to LID of B, and multiply
1672 // corresponding entries.
1673 // The algorithm should be reasonably cheap, as it does not sort anything, provided
1674 // that getLocalElement and getGlobalElement functions are reasonably effective. It
1675 // *is* possible that the costs are hidden in those functions, but if maps are close
1676 // to linear maps, we should be fine
1677 Teuchos::Array<Scalar> valBAll(BColMap.getLocalNumElements());
1678
1679 LocalOrdinal invalid = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
1680 Scalar zero = Teuchos::ScalarTraits<Scalar>::zero(), f = zero, gf;
1681 size_t numRows = A.getLocalNumRows();
1682 for (size_t i = 0; i < numRows; i++) {
1683 A.getLocalRowView(i, indA, valA);
1684 B.getLocalRowView(i, indB, valB);
1685 nnzA = indA.size();
1686 nnzB = indB.size();
1687
1688 // Set up array values
1689 for (size_t j = 0; j < nnzB; j++)
1690 valBAll[indB[j]] = valB[j];
1691
1692 for (size_t j = 0; j < nnzA; j++) {
1693 // The cost of the whole Frobenius dot product function depends on the
1694 // cost of the getLocalElement and getGlobalElement functions here.
1695 LocalOrdinal ind = BColMap.getLocalElement(AColMap.getGlobalElement(indA[j]));
1696 if (ind != invalid)
1697 f += valBAll[ind] * valA[j];
1698 }
1699
1700 // Clean up array values
1701 for (size_t j = 0; j < nnzB; j++)
1702 valBAll[indB[j]] = zero;
1703 }
1704
1705 MueLu_sumAll(AColMap.getComm(), f, gf);
1706
1707 return gf;
1708}
1709
1710template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1713 // Distribute the seeds evenly in [1,maxint-1]. This guarantees nothing
1714 // about where in random number stream we are, but avoids overflow situations
1715 // in parallel when multiplying by a PID. It would be better to use
1716 // a good parallel random number generator.
1717 double one = 1.0;
1718 int maxint = INT_MAX; //= 2^31-1 = 2147483647 for 32-bit integers
1719 int mySeed = Teuchos::as<int>((maxint - 1) * (one - (comm.getRank() + 1) / (comm.getSize() + one)));
1720 if (mySeed < 1 || mySeed == maxint) {
1721 std::ostringstream errStr;
1722 errStr << "Error detected with random seed = " << mySeed << ". It should be in the interval [1,2^31-2].";
1723 throw Exceptions::RuntimeError(errStr.str());
1724 }
1725 std::srand(mySeed);
1726 // For Tpetra, we could use Kokkos' random number generator here.
1727 Teuchos::ScalarTraits<Scalar>::seedrandom(mySeed);
1728}
1729
1730template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1732 FindDirichletRows(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1733 std::vector<LocalOrdinal>& dirichletRows, bool count_twos_as_dirichlet) {
1734 typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
1735 dirichletRows.resize(0);
1736 for (size_t i = 0; i < A->getLocalNumRows(); i++) {
1737 Teuchos::ArrayView<const LocalOrdinal> indices;
1738 Teuchos::ArrayView<const Scalar> values;
1739 A->getLocalRowView(i, indices, values);
1740 int nnz = 0;
1741 for (size_t j = 0; j < (size_t)indices.size(); j++) {
1742 if (Teuchos::ScalarTraits<Scalar>::magnitude(values[j]) > Teuchos::ScalarTraits<MT>::eps()) {
1743 nnz++;
1744 }
1745 }
1746 if (nnz == 1 || (count_twos_as_dirichlet && nnz == 2)) {
1747 dirichletRows.push_back(i);
1748 }
1749 }
1750}
1751
1752template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1754 ApplyOAZToMatrixRows(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1755 const std::vector<LocalOrdinal>& dirichletRows) {
1756 RCP<const Map> Rmap = A->getRowMap();
1757 RCP<const Map> Cmap = A->getColMap();
1758 Scalar one = Teuchos::ScalarTraits<Scalar>::one();
1759 Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
1760
1761 for (size_t i = 0; i < dirichletRows.size(); i++) {
1762 GlobalOrdinal row_gid = Rmap->getGlobalElement(dirichletRows[i]);
1763
1764 Teuchos::ArrayView<const LocalOrdinal> indices;
1765 Teuchos::ArrayView<const Scalar> values;
1766 A->getLocalRowView(dirichletRows[i], indices, values);
1767 // NOTE: This won't work with fancy node types.
1768 Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1769 for (size_t j = 0; j < (size_t)indices.size(); j++) {
1770 if (Cmap->getGlobalElement(indices[j]) == row_gid)
1771 valuesNC[j] = one;
1772 else
1773 valuesNC[j] = zero;
1774 }
1775 }
1776}
1777
1778template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1780 ApplyOAZToMatrixRows(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1781 const Teuchos::ArrayRCP<const bool>& dirichletRows) {
1782 TEUCHOS_ASSERT(A->isFillComplete());
1783 RCP<const Map> domMap = A->getDomainMap();
1784 RCP<const Map> ranMap = A->getRangeMap();
1785 RCP<const Map> Rmap = A->getRowMap();
1786 RCP<const Map> Cmap = A->getColMap();
1787 TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == Rmap->getLocalNumElements());
1788 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
1789 const Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
1790 A->resumeFill();
1791 for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1792 if (dirichletRows[i]) {
1793 GlobalOrdinal row_gid = Rmap->getGlobalElement(i);
1794
1795 Teuchos::ArrayView<const LocalOrdinal> indices;
1796 Teuchos::ArrayView<const Scalar> values;
1797 A->getLocalRowView(i, indices, values);
1798
1799 Teuchos::ArrayRCP<Scalar> valuesNC(values.size());
1800 for (size_t j = 0; j < (size_t)indices.size(); j++) {
1801 if (Cmap->getGlobalElement(indices[j]) == row_gid)
1802 valuesNC[j] = one;
1803 else
1804 valuesNC[j] = zero;
1805 }
1806 A->replaceLocalValues(i, indices, valuesNC());
1807 }
1808 }
1809 A->fillComplete(domMap, ranMap);
1810}
1811
1812template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1814 ApplyOAZToMatrixRows(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1815 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows) {
1816 TEUCHOS_ASSERT(A->isFillComplete());
1817 using ATS = KokkosKernels::ArithTraits<Scalar>;
1818 using impl_ATS = KokkosKernels::ArithTraits<typename ATS::val_type>;
1819 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1820
1821 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> domMap = A->getDomainMap();
1822 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> ranMap = A->getRangeMap();
1823 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> Rmap = A->getRowMap();
1824 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> Cmap = A->getColMap();
1825
1826 TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == Rmap->getLocalNumElements());
1827
1828 auto localMatrix = A->getLocalMatrixDevice();
1829 auto localRmap = Rmap->getLocalMap();
1830 auto localCmap = Cmap->getLocalMap();
1831
1832 Kokkos::parallel_for(
1833 "MueLu::Utils::ApplyOAZ", range_type(0, dirichletRows.extent(0)),
1834 KOKKOS_LAMBDA(const LocalOrdinal row) {
1835 if (dirichletRows(row)) {
1836 auto rowView = localMatrix.row(row);
1837 auto length = rowView.length;
1838 auto row_gid = localRmap.getGlobalElement(row);
1839 auto row_lid = localCmap.getLocalElement(row_gid);
1840
1841 for (decltype(length) colID = 0; colID < length; colID++)
1842 if (rowView.colidx(colID) == row_lid)
1843 rowView.value(colID) = impl_ATS::one();
1844 else
1845 rowView.value(colID) = impl_ATS::zero();
1846 }
1847 });
1848}
1849
1850template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1852 ZeroDirichletRows(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1853 const std::vector<LocalOrdinal>& dirichletRows,
1854 Scalar replaceWith) {
1855 for (size_t i = 0; i < dirichletRows.size(); i++) {
1856 Teuchos::ArrayView<const LocalOrdinal> indices;
1857 Teuchos::ArrayView<const Scalar> values;
1858 A->getLocalRowView(dirichletRows[i], indices, values);
1859 // NOTE: This won't work with fancy node types.
1860 Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1861 for (size_t j = 0; j < (size_t)indices.size(); j++)
1862 valuesNC[j] = replaceWith;
1863 }
1864}
1865
1866template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1868 ZeroDirichletRows(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1869 const Teuchos::ArrayRCP<const bool>& dirichletRows,
1870 Scalar replaceWith) {
1871 TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == A->getRowMap()->getLocalNumElements());
1872 for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1873 if (dirichletRows[i]) {
1874 Teuchos::ArrayView<const LocalOrdinal> indices;
1875 Teuchos::ArrayView<const Scalar> values;
1876 A->getLocalRowView(i, indices, values);
1877 // NOTE: This won't work with fancy node types.
1878 Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1879 for (size_t j = 0; j < (size_t)indices.size(); j++)
1880 valuesNC[j] = replaceWith;
1881 }
1882 }
1883}
1884
1885template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1887 ZeroDirichletRows(Teuchos::RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& X,
1888 const Teuchos::ArrayRCP<const bool>& dirichletRows,
1889 Scalar replaceWith) {
1890 TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == X->getMap()->getLocalNumElements());
1891 for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1892 if (dirichletRows[i]) {
1893 for (size_t j = 0; j < X->getNumVectors(); j++)
1894 X->replaceLocalValue(i, j, replaceWith);
1895 }
1896 }
1897}
1898
1899template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1901 ZeroDirichletRows(RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1902 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
1903 Scalar replaceWith) {
1904 using ATS = KokkosKernels::ArithTraits<Scalar>;
1905 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1906
1907 typename ATS::val_type impl_replaceWith = replaceWith;
1908
1909 auto localMatrix = A->getLocalMatrixDevice();
1910 LocalOrdinal numRows = A->getLocalNumRows();
1911
1912 Kokkos::parallel_for(
1913 "MueLu:Utils::ZeroDirichletRows", range_type(0, numRows),
1914 KOKKOS_LAMBDA(const LocalOrdinal row) {
1915 if (dirichletRows(row)) {
1916 auto rowView = localMatrix.row(row);
1917 auto length = rowView.length;
1918 for (decltype(length) colID = 0; colID < length; colID++)
1919 rowView.value(colID) = impl_replaceWith;
1920 }
1921 });
1922}
1923
1924template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1925void UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1926 ZeroDirichletRows(RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& X,
1927 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
1928 Scalar replaceWith) {
1929 using ATS = KokkosKernels::ArithTraits<Scalar>;
1930 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1931
1932 typename ATS::val_type impl_replaceWith = replaceWith;
1933
1934 auto myCols = X->getLocalViewDevice(Tpetra::Access::ReadWrite);
1935 size_t numVecs = X->getNumVectors();
1936 Kokkos::parallel_for(
1937 "MueLu:Utils::ZeroDirichletRows_MV", range_type(0, dirichletRows.size()),
1938 KOKKOS_LAMBDA(const size_t i) {
1939 if (dirichletRows(i)) {
1940 for (size_t j = 0; j < numVecs; j++)
1941 myCols(i, j) = impl_replaceWith;
1942 }
1943 });
1944}
1945
1946template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1948 ZeroDirichletCols(Teuchos::RCP<Matrix>& A,
1949 const Teuchos::ArrayRCP<const bool>& dirichletCols,
1950 Scalar replaceWith) {
1951 TEUCHOS_ASSERT(static_cast<size_t>(dirichletCols.size()) == A->getColMap()->getLocalNumElements());
1952 for (size_t i = 0; i < A->getLocalNumRows(); i++) {
1953 Teuchos::ArrayView<const LocalOrdinal> indices;
1954 Teuchos::ArrayView<const Scalar> values;
1955 A->getLocalRowView(i, indices, values);
1956 // NOTE: This won't work with fancy node types.
1957 Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1958 for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1959 if (dirichletCols[indices[j]])
1960 valuesNC[j] = replaceWith;
1961 }
1962}
1963
1964template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1966 ZeroDirichletCols(RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1967 const Kokkos::View<const bool*, typename Node::device_type>& dirichletCols,
1968 Scalar replaceWith) {
1969 using ATS = KokkosKernels::ArithTraits<Scalar>;
1970 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1971
1972 typename ATS::val_type impl_replaceWith = replaceWith;
1973
1974 auto localMatrix = A->getLocalMatrixDevice();
1975 LocalOrdinal numRows = A->getLocalNumRows();
1976
1977 Kokkos::parallel_for(
1978 "MueLu:Utils::ZeroDirichletCols", range_type(0, numRows),
1979 KOKKOS_LAMBDA(const LocalOrdinal row) {
1980 auto rowView = localMatrix.row(row);
1981 auto length = rowView.length;
1982 for (decltype(length) colID = 0; colID < length; colID++)
1983 if (dirichletCols(rowView.colidx(colID))) {
1984 rowView.value(colID) = impl_replaceWith;
1985 }
1986 });
1987}
1988
1989template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1991 FindDirichletRowsAndPropagateToCols(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1992 Teuchos::RCP<Xpetra::Vector<int, LocalOrdinal, GlobalOrdinal, Node>>& isDirichletRow,
1993 Teuchos::RCP<Xpetra::Vector<int, LocalOrdinal, GlobalOrdinal, Node>>& isDirichletCol) {
1994 // Make sure A's RowMap == DomainMap
1995 if (!A->getRowMap()->isSameAs(*A->getDomainMap())) {
1996 throw std::runtime_error("UtilitiesBase::FindDirichletRowsAndPropagateToCols row and domain maps must match.");
1997 }
1998 RCP<const Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node>> importer = A->getCrsGraph()->getImporter();
1999 bool has_import = !importer.is_null();
2000
2001 // Find the Dirichlet rows
2002 std::vector<LocalOrdinal> dirichletRows;
2003 FindDirichletRows(A, dirichletRows);
2004
2005#if 0
2006 printf("[%d] DirichletRow Ids = ",A->getRowMap()->getComm()->getRank());
2007 for(size_t i=0; i<(size_t) dirichletRows.size(); i++)
2008 printf("%d ",dirichletRows[i]);
2009 printf("\n");
2010 fflush(stdout);
2011#endif
2012 // Allocate all as non-Dirichlet
2013 isDirichletRow = Xpetra::VectorFactory<int, LocalOrdinal, GlobalOrdinal, Node>::Build(A->getRowMap(), true);
2014 isDirichletCol = Xpetra::VectorFactory<int, LocalOrdinal, GlobalOrdinal, Node>::Build(A->getColMap(), true);
2015
2016 {
2017 Teuchos::ArrayRCP<int> dr_rcp = isDirichletRow->getDataNonConst(0);
2018 Teuchos::ArrayView<int> dr = dr_rcp();
2019 Teuchos::ArrayRCP<int> dc_rcp = isDirichletCol->getDataNonConst(0);
2020 Teuchos::ArrayView<int> dc = dc_rcp();
2021 for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
2022 dr[dirichletRows[i]] = 1;
2023 if (!has_import) dc[dirichletRows[i]] = 1;
2024 }
2025 }
2026
2027 if (has_import)
2028 isDirichletCol->doImport(*isDirichletRow, *importer, Xpetra::CombineMode::ADD);
2029}
2030
2031template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2032RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
2034 ReplaceNonZerosWithOnes(const RCP<Matrix>& original) {
2035 using ISC = typename KokkosKernels::ArithTraits<Scalar>::val_type;
2036 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
2037 using local_matrix_type = typename CrsMatrix::local_matrix_device_type;
2038 using values_type = typename local_matrix_type::values_type;
2039
2040 const ISC ONE = KokkosKernels::ArithTraits<ISC>::one();
2041 const ISC ZERO = KokkosKernels::ArithTraits<ISC>::zero();
2042
2043 // Copy the values array of the old matrix to a new array, replacing all the non-zeros with one
2044 auto localMatrix = original->getLocalMatrixDevice();
2045 TEUCHOS_TEST_FOR_EXCEPTION(!original->hasCrsGraph(), Exceptions::RuntimeError, "ReplaceNonZerosWithOnes: Cannot get CrsGraph");
2046 values_type new_values("values", localMatrix.nnz());
2047
2048 Kokkos::parallel_for(
2049 "ReplaceNonZerosWithOnes", range_type(0, localMatrix.nnz()), KOKKOS_LAMBDA(const size_t i) {
2050 if (localMatrix.values(i) != ZERO)
2051 new_values(i) = ONE;
2052 else
2053 new_values(i) = ZERO;
2054 });
2055
2056 // Build the new matrix
2057 RCP<Matrix> NewMatrix = Xpetra::MatrixFactory<SC, LO, GO, NO>::Build(original->getCrsGraph(), new_values);
2058 TEUCHOS_TEST_FOR_EXCEPTION(NewMatrix.is_null(), Exceptions::RuntimeError, "ReplaceNonZerosWithOnes: MatrixFactory::Build() did not return matrix");
2059 NewMatrix->fillComplete(original->getDomainMap(), original->getRangeMap());
2060 return NewMatrix;
2061}
2062
2063template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2064RCP<const Xpetra::BlockedMap<LocalOrdinal, GlobalOrdinal, Node>>
2066 GeneratedBlockedTargetMap(const Xpetra::BlockedMap<LocalOrdinal, GlobalOrdinal, Node>& sourceBlockedMap,
2067 const Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node>& Importer) {
2068 typedef Xpetra::Vector<int, LocalOrdinal, GlobalOrdinal, Node> IntVector;
2069 Xpetra::UnderlyingLib lib = sourceBlockedMap.lib();
2070
2071 // De-stride the map if we have to (might regret this later)
2072 RCP<const Map> fullMap = sourceBlockedMap.getMap();
2073 RCP<const Map> stridedMap = Teuchos::rcp_dynamic_cast<const Xpetra::StridedMap<LocalOrdinal, GlobalOrdinal, Node>>(fullMap);
2074 if (!stridedMap.is_null()) fullMap = stridedMap->getMap();
2075
2076 // Initial sanity checking for map compatibil
2077 const size_t numSubMaps = sourceBlockedMap.getNumMaps();
2078 if (!Importer.getSourceMap()->isCompatible(*fullMap))
2079 throw std::runtime_error("GenerateBlockedTargetMap(): Map compatibility error");
2080
2081 // Build an indicator vector
2082 RCP<IntVector> block_ids = Xpetra::VectorFactory<int, LocalOrdinal, GlobalOrdinal, Node>::Build(fullMap);
2083
2084 for (size_t i = 0; i < numSubMaps; i++) {
2085 RCP<const Map> map = sourceBlockedMap.getMap(i);
2086
2087 for (size_t j = 0; j < map->getLocalNumElements(); j++) {
2088 LocalOrdinal jj = fullMap->getLocalElement(map->getGlobalElement(j));
2089 block_ids->replaceLocalValue(jj, (int)i);
2090 }
2091 }
2092
2093 // Get the block ids for the new map
2094 RCP<const Map> targetMap = Importer.getTargetMap();
2095 RCP<IntVector> new_block_ids = Xpetra::VectorFactory<int, LocalOrdinal, GlobalOrdinal, Node>::Build(targetMap);
2096 new_block_ids->doImport(*block_ids, Importer, Xpetra::CombineMode::ADD);
2097 Teuchos::ArrayRCP<const int> dataRCP = new_block_ids->getData(0);
2098 Teuchos::ArrayView<const int> data = dataRCP();
2099
2100 // Get the GIDs for each subblock
2101 Teuchos::Array<Teuchos::Array<GlobalOrdinal>> elementsInSubMap(numSubMaps);
2102 for (size_t i = 0; i < targetMap->getLocalNumElements(); i++) {
2103 elementsInSubMap[data[i]].push_back(targetMap->getGlobalElement(i));
2104 }
2105
2106 // Generate the new submaps
2107 std::vector<RCP<const Map>> subMaps(numSubMaps);
2108 for (size_t i = 0; i < numSubMaps; i++) {
2109 subMaps[i] = Xpetra::MapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build(lib, Teuchos::OrdinalTraits<GlobalOrdinal>::invalid(), elementsInSubMap[i](), targetMap->getIndexBase(), targetMap->getComm());
2110 }
2111
2112 // Build the BlockedMap
2113 return rcp(new BlockedMap(targetMap, subMaps));
2114}
2115
2116template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2118 MapsAreNested(const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>& rowMap, const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>& colMap) {
2119 if ((rowMap.lib() == Xpetra::UseTpetra) && (colMap.lib() == Xpetra::UseTpetra)) {
2120 auto tpRowMap = toTpetra(rowMap);
2121 auto tpColMap = toTpetra(colMap);
2122 return tpColMap.isLocallyFitted(tpRowMap);
2123 }
2124
2125 ArrayView<const GlobalOrdinal> rowElements = rowMap.getLocalElementList();
2126 ArrayView<const GlobalOrdinal> colElements = colMap.getLocalElementList();
2127
2128 const size_t numElements = rowElements.size();
2129
2130 if (size_t(colElements.size()) < numElements)
2131 return false;
2132
2133 bool goodMap = true;
2134 for (size_t i = 0; i < numElements; i++)
2135 if (rowElements[i] != colElements[i]) {
2136 goodMap = false;
2137 break;
2138 }
2139
2140 return goodMap;
2141}
2142
2143template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2144Teuchos::RCP<Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>>
2146 ReverseCuthillMcKee(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op) {
2147 using local_matrix_type = typename Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type;
2148 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
2149 using lno_nnz_view_t = typename local_graph_type::entries_type::non_const_type;
2150 using device = typename local_graph_type::device_type;
2151 using execution_space = typename local_matrix_type::execution_space;
2152 using ordinal_type = typename local_matrix_type::ordinal_type;
2153
2154 local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
2155
2156 lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm<device, typename local_graph_type::row_map_type, typename local_graph_type::entries_type, lno_nnz_view_t>(localGraph.row_map, localGraph.entries);
2157
2158 RCP<Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>> retval =
2159 Xpetra::VectorFactory<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(Op.getRowMap());
2160
2161 // Copy out and reorder data
2162 auto view1D = Kokkos::subview(retval->getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
2163 Kokkos::parallel_for(
2164 "Utilities::ReverseCuthillMcKee",
2165 Kokkos::RangePolicy<ordinal_type, execution_space>(0, localGraph.numRows()),
2166 KOKKOS_LAMBDA(const ordinal_type rowIdx) {
2167 view1D(rcmOrder(rowIdx)) = rowIdx;
2168 });
2169 return retval;
2170}
2171
2172template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2173Teuchos::RCP<Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>>
2175 CuthillMcKee(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op) {
2176 using local_matrix_type = typename Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type;
2177 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
2178 using lno_nnz_view_t = typename local_graph_type::entries_type::non_const_type;
2179 using device = typename local_graph_type::device_type;
2180 using execution_space = typename local_matrix_type::execution_space;
2181 using ordinal_type = typename local_matrix_type::ordinal_type;
2182
2183 local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
2184 LocalOrdinal numRows = localGraph.numRows();
2185
2186 lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm<device, typename local_graph_type::row_map_type, typename local_graph_type::entries_type, lno_nnz_view_t>(localGraph.row_map, localGraph.entries);
2187
2188 RCP<Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>> retval =
2189 Xpetra::VectorFactory<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(Op.getRowMap());
2190
2191 // Copy out data
2192 auto view1D = Kokkos::subview(retval->getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
2193 // Since KokkosKernels produced RCM, also reverse the order of the view to get CM
2194 Kokkos::parallel_for(
2195 "Utilities::ReverseCuthillMcKee",
2196 Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
2197 KOKKOS_LAMBDA(const ordinal_type rowIdx) {
2198 view1D(rcmOrder(numRows - 1 - rowIdx)) = rowIdx;
2199 });
2200 return retval;
2201}
2202
2203template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2205 TripleMatrixProduct(const Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& R,
2206 const Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
2207 const Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& P,
2208 Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& Ac,
2209 const Teuchos::ParameterList& pL,
2210 const MueLu::BaseClass& verbObj,
2211 Teuchos::RCP<Teuchos::ParameterList>& APparams,
2212 Teuchos::RCP<Teuchos::ParameterList>& RAPparams,
2213 Level* coarseLevel) {
2214 using Matrix = Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
2215 using MatrixMatrix = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
2216 using MatrixFactory = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
2218
2219 const bool doTranspose = true;
2220 const bool doFillComplete = true;
2221 const bool doOptimizeStorage = true;
2222
2223 const bool useImplicit = pL.get<bool>("transpose: use implicit");
2224
2225 const std::string matrixName = (pL.isType<std::string>("Matrix name")) ? pL.get<std::string>("Matrix name") : "A";
2226 const std::string prolongatorName = (pL.isType<std::string>("Prolongator name")) ? pL.get<std::string>("Prolongator name") : "P";
2227 const std::string restrictorName = (pL.isType<std::string>("Restrictor name")) ? pL.get<std::string>("Restrictor name") : "R";
2228 std::string coarseMatrixName;
2229 if (pL.isType<std::string>("coarseMatrixName"))
2230 coarseMatrixName = pL.get<std::string>("coarseMatrixName");
2231 else {
2232 if (matrixName.size() == 1)
2233 coarseMatrixName = matrixName + "c";
2234 else
2235 coarseMatrixName = matrixName + "_coarse";
2236 }
2237
2238 std::string levelstr, labelstr;
2239 if (coarseLevel != nullptr) {
2240 std::ostringstream levelss;
2241 levelss << coarseLevel->GetLevelID();
2242 levelstr = levelss.str();
2243 labelstr = FormattingHelper::getColonLabel(coarseLevel->getObjectLabel());
2244 }
2245
2246 bool isGPU = Node::is_gpu;
2247
2248 // Reuse coarse matrix memory if available (multiple solve)
2249 if (RAPparams.is_null())
2250 RAPparams = rcp(new ParameterList);
2251 if (pL.isSublist("matrixmatrix: kernel params"))
2252 RAPparams->setParameters(pL.sublist("matrixmatrix: kernel params"));
2253
2254 if (RAPparams->isParameter("graph")) {
2255 Ac = RAPparams->get<RCP<Matrix>>("graph");
2256
2257 // Some eigenvalue may have been cached with the matrix in the previous run.
2258 // As the matrix values will be updated, we need to reset the eigenvalue.
2259 Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<Scalar>::one());
2260 }
2261
2262 // We *always* need global constants for the RAP, but not for the temps
2263 RAPparams->set("compute global constants: temporaries", RAPparams->get("compute global constants: temporaries", false));
2264 RAPparams->set("compute global constants", true);
2265
2266 if (pL.get<bool>("rap: triple product") == false || isGPU) {
2267 if (pL.get<bool>("rap: triple product") && isGPU)
2268 verbObj.GetOStream(Warnings1) << "Switching from triple product to R x (A x P) since triple product has not been implemented for "
2269 << Node::execution_space::name() << std::endl;
2270
2271 RCP<Matrix> AP;
2272
2273 // Reuse pattern if available (multiple solve)
2274 if (APparams.is_null())
2275 APparams = rcp(new ParameterList);
2276 if (pL.isSublist("matrixmatrix: kernel params"))
2277 APparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
2278
2279 // By default, we don't need global constants for A*P
2280 APparams->set("compute global constants: temporaries", APparams->get("compute global constants: temporaries", false));
2281 APparams->set("compute global constants", APparams->get("compute global constants", false));
2282
2283 if (APparams->isParameter("graph"))
2284 AP = APparams->get<RCP<Matrix>>("graph");
2285
2286 std::string monitorstrAP = "MxM: " + matrixName + " x " + prolongatorName;
2287 std::string timerstrAP = "MueLu::" + matrixName + "*" + prolongatorName;
2288 if (!labelstr.empty())
2289 timerstrAP = labelstr + timerstrAP;
2290 if (!levelstr.empty())
2291 timerstrAP = timerstrAP + "-" + levelstr;
2292
2293 {
2294 SubFactoryMonitor subM(verbObj, monitorstrAP, *coarseLevel);
2295
2296 AP = MatrixMatrix::Multiply(*A, !doTranspose, *P, !doTranspose, AP, verbObj.GetOStream(Statistics2),
2297 doFillComplete, doOptimizeStorage, timerstrAP, APparams);
2298 }
2299
2300 // Allow optimization of storage.
2301 // This is necessary for new faster Epetra MM kernels.
2302 // Seems to work with matrix modifications to repair diagonal entries.
2303
2304 std::string timerstrRAP, monitorstrRAP;
2305 if (useImplicit) {
2306 monitorstrRAP = "MxM: " + prolongatorName + "' x (" + matrixName + prolongatorName + ") (implicit)";
2307 timerstrRAP = "MueLu::" + restrictorName + "*(" + matrixName + "*" + prolongatorName + ")-implicit";
2308 } else {
2309 monitorstrRAP = "MxM: " + restrictorName + " x (" + matrixName + prolongatorName + ") (explicit)";
2310 timerstrRAP = "MueLu::" + restrictorName + "*(" + matrixName + "*" + prolongatorName + ")-explicit";
2311 }
2312 if (!labelstr.empty())
2313 timerstrRAP = labelstr + timerstrRAP;
2314 if (!levelstr.empty())
2315 timerstrRAP = timerstrRAP + "-" + levelstr;
2316
2317 if (useImplicit) {
2318 SubFactoryMonitor m2(verbObj, monitorstrRAP, *coarseLevel);
2319
2320 Ac = MatrixMatrix::Multiply(*P, doTranspose, *AP, !doTranspose, Ac, verbObj.GetOStream(Statistics2),
2321 doFillComplete, doOptimizeStorage, timerstrRAP, RAPparams);
2322
2323 } else {
2324 SubFactoryMonitor m2(verbObj, monitorstrRAP, *coarseLevel);
2325
2326 Ac = MatrixMatrix::Multiply(*R, !doTranspose, *AP, !doTranspose, Ac, verbObj.GetOStream(Statistics2),
2327 doFillComplete, doOptimizeStorage, timerstrRAP, RAPparams);
2328 }
2329
2330 if (!isGPU) {
2331 APparams->set("graph", AP);
2332 }
2333
2334 } else {
2335 std::string monitorstrRAP;
2336 std::string timerstrRAP;
2337 if (useImplicit) {
2338 monitorstrRAP = "MxMxM: " + restrictorName + " x " + matrixName + " x " + prolongatorName + " (implicit)";
2339 timerstrRAP = "MueLu::" + restrictorName + "*" + matrixName + "*" + prolongatorName + "-implicit";
2340 } else {
2341 monitorstrRAP = "MxMxM: " + restrictorName + " x " + matrixName + " x " + prolongatorName + " (explicit)";
2342 timerstrRAP = "MueLu::" + restrictorName + "*" + matrixName + "*" + prolongatorName + "-explicit";
2343 }
2344 if (!labelstr.empty())
2345 timerstrRAP = labelstr + timerstrRAP;
2346 if (!levelstr.empty())
2347 timerstrRAP = timerstrRAP + "-" + levelstr;
2348
2349 if (useImplicit) {
2350 Ac = MatrixFactory::Build(P->getDomainMap(), Teuchos::as<LocalOrdinal>(0));
2351
2352 SubFactoryMonitor m2(verbObj, monitorstrRAP, *coarseLevel);
2353
2354 Xpetra::TripleMatrixMultiply<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2355 MultiplyRAP(*P, doTranspose, *A, !doTranspose, *P, !doTranspose, *Ac, doFillComplete,
2356 doOptimizeStorage, timerstrRAP,
2357 RAPparams);
2358 } else {
2359 Ac = MatrixFactory::Build(R->getRowMap(), Teuchos::as<LocalOrdinal>(0));
2360
2361 SubFactoryMonitor m2(verbObj, monitorstrRAP, *coarseLevel);
2362
2363 Xpetra::TripleMatrixMultiply<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2364 MultiplyRAP(*R, !doTranspose, *A, !doTranspose, *P, !doTranspose, *Ac, doFillComplete,
2365 doOptimizeStorage, timerstrRAP,
2366 RAPparams);
2367 }
2368 }
2369
2370 Teuchos::ArrayView<const double> relativeFloor = pL.get<Teuchos::Array<double>>("rap: relative diagonal floor")();
2371 if (relativeFloor.size() > 0) {
2372 Xpetra::MatrixUtils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::RelativeDiagonalBoost(Ac, relativeFloor, verbObj.GetOStream(Statistics2));
2373 }
2374
2375 bool repairZeroDiagonals = pL.get<bool>("RepairMainDiagonal") || pL.get<bool>("rap: fix zero diagonals");
2376 bool checkAc = pL.get<bool>("CheckMainDiagonal") || pL.get<bool>("rap: fix zero diagonals");
2377
2378 if (checkAc || repairZeroDiagonals) {
2379 using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
2380 magnitudeType threshold;
2381 if (pL.isType<magnitudeType>("rap: fix zero diagonals threshold"))
2382 threshold = pL.get<magnitudeType>("rap: fix zero diagonals threshold");
2383 else
2384 threshold = Teuchos::as<magnitudeType>(pL.get<double>("rap: fix zero diagonals threshold"));
2385 Scalar replacement = Teuchos::as<Scalar>(pL.get<double>("rap: fix zero diagonals replacement"));
2386 Xpetra::MatrixUtils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::CheckRepairMainDiagonal(Ac, repairZeroDiagonals, verbObj.GetOStream(Warnings1), threshold, replacement);
2387 }
2388
2389 if (verbObj.IsPrint(Statistics2)) {
2390 RCP<ParameterList> params = rcp(new ParameterList());
2391 params->set("printLoadBalancingInfo", true);
2392 params->set("printCommInfo", true);
2393 verbObj.GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ac, coarseMatrixName, params);
2394 }
2395
2396 if (!isGPU) {
2397 RAPparams->set("graph", Ac);
2398 }
2399
2400 if (Behavior::debug())
2401 MatrixUtils::checkLocalRowMapMatchesColMap(*Ac);
2402}
2403
2404} // namespace MueLu
2405
2406#define MUELU_UTILITIESBASE_SHORT
2407#endif // MUELU_UTILITIESBASE_DEF_HPP
2408
2409// LocalWords: LocalOrdinal
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
MueLu::DefaultNode Node
Base class for MueLu classes.
Exception throws to report incompatible objects (like maps).
Exception throws to report errors in the internal logical of the program.
Class that holds all level-specific information.
int GetLevelID() const
Return level number.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
Teuchos::ScalarTraits< Scalar >::magnitudeType Magnitude
static Teuchos::RCP< Vector > GetInverse(Teuchos::RCP< const Vector > v, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps() *100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero())
Return vector containing inverse of input vector.
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps() *100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool doLumped=false)
Extract Matrix Diagonal.
static RCP< Xpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > GetThresholdedGraph(const RCP< Matrix > &A, const Magnitude threshold)
Threshold a graph.
static Teuchos::ArrayRCP< Scalar > GetMatrixDiagonal_arcp(const Matrix &A)
Extract Matrix Diagonal.
static RCP< Matrix > GetThresholdedMatrix(const RCP< Matrix > &Ain, const Magnitude threshold, const bool keepDiagonal=true)
Threshold a matrix.
static RCP< Matrix > Crs2Op(RCP< CrsMatrix > Op)
static RCP< Vector > GetMatrixDiagonal(const Matrix &A)
Extract Matrix Diagonal.
Teuchos::FancyOStream & GetOStream(MsgType type, int thisProcRankOnly=0) const
Get an output stream for outputting the input message type.
bool IsPrint(MsgType type, int thisProcRankOnly=-1) const
Find out whether we need to print out information for a specific message type.
Namespace for MueLu classes and methods.
Kokkos::View< bool *, memory_space > DetectDirichletRows_kokkos(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
@ Statistics2
Print even more statistics.
@ Warnings1
Additional warnings.
void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, memory_space > &dirichletRows)
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > removeSmallEntries(Teuchos::RCP< Xpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType threshold, const bool keepDiagonal)
KOKKOS_FORCEINLINE_FUNCTION bool isDirichletRow(typename CrsMatrix::ordinal_type rowId, KokkosSparse::SparseRowViewConst< CrsMatrix > &row, const typename KokkosKernels::ArithTraits< typename CrsMatrix::value_type >::magnitudeType &tol, const bool count_twos_as_dirichlet)