MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_UtilitiesBase_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_UTILITIESBASE_DEF_HPP
11#define MUELU_UTILITIESBASE_DEF_HPP
12
13#include "KokkosKernels_ArithTraits.hpp"
14#include "MueLu_ConfigDefs.hpp"
15
17
18#include "MueLu_PerfUtils.hpp"
19#include "MueLu_Monitor.hpp"
20#include <Xpetra_MatrixMatrix.hpp>
21#include <Xpetra_MatrixFactory.hpp>
22#include <Xpetra_MatrixUtils.hpp>
23#include <Xpetra_TripleMatrixMultiply.hpp>
24
25#include <Kokkos_Core.hpp>
26#include <KokkosSparse_CrsMatrix.hpp>
27#include <KokkosSparse_getDiagCopy.hpp>
28
29#include <Xpetra_BlockedVector.hpp>
30#include <Xpetra_BlockedMap.hpp>
31#include <Xpetra_BlockedMultiVector.hpp>
32#include <Xpetra_ExportFactory.hpp>
33
34#include <Xpetra_Import.hpp>
35#include <Xpetra_ImportFactory.hpp>
36#include <Xpetra_MatrixMatrix.hpp>
37#include <Xpetra_CrsGraph.hpp>
38#include <Xpetra_CrsGraphFactory.hpp>
39#include <Xpetra_CrsMatrixWrap.hpp>
40#include <Xpetra_MatrixFactory.hpp>
41#include <Xpetra_StridedMap.hpp>
42
43#include "MueLu_Exceptions.hpp"
44#include "MueLu_Behavior.hpp"
45#include "Xpetra_CrsMatrixFactory.hpp"
46
47#include <KokkosKernels_Handle.hpp>
48#include <KokkosGraph_RCM.hpp>
49#include "MueLu_Behavior.hpp"
50#include <MueLu_Level.hpp>
51#include <MueLu_FactoryManager.hpp>
52#include <MueLu_InverseApproximationFactory.hpp>
53
54namespace MueLu {
55
56template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
57RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
59 Crs2Op(RCP<Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op) {
60 if (Op.is_null())
61 return Teuchos::null;
62 return rcp(new CrsMatrixWrap(Op));
63}
64
65template <class Scalar,
66 class LocalOrdinal,
67 class GlobalOrdinal,
68 class Node>
69Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
70removeSmallEntries(Teuchos::RCP<Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
71 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType threshold,
72 const bool keepDiagonal) {
73 using ATS = KokkosKernels::ArithTraits<Scalar>;
74 using impl_SC = typename ATS::val_type;
75 using impl_ATS = KokkosKernels::ArithTraits<impl_SC>;
76
77 RCP<Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> crsA;
78 if (keepDiagonal) {
79 crsA = applyFilter_GID(
80 A, KOKKOS_LAMBDA(GlobalOrdinal rgid,
81 GlobalOrdinal cgid,
82 impl_SC val) {
83 return ((impl_ATS::magnitude(val) > threshold) || (rgid == cgid));
84 });
86 } else {
87 crsA = applyFilter_vals(
88 A, KOKKOS_LAMBDA(impl_SC val) {
89 return (impl_ATS::magnitude(val) > threshold);
90 });
91 }
92 return rcp(new Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>(crsA));
94
95template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
96RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
98 GetThresholdedMatrix(const RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& Ain, const typename Teuchos::ScalarTraits<Scalar>::magnitudeType threshold, const bool keepDiagonal) {
99 RCP<Matrix> Aout;
100 {
101 using matrix_type = Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
102 using local_matrix_type = typename matrix_type::local_matrix_type;
103 using local_graph_type = typename matrix_type::local_graph_type;
104 using execution_space = typename Node::execution_space;
105 using rowmap_type = typename local_graph_type::row_map_type::non_const_type;
106 using entries_type = typename local_graph_type::entries_type::non_const_type;
107 using values_type = typename local_matrix_type::values_type::non_const_type;
108 using range_type = Kokkos::RangePolicy<execution_space, LocalOrdinal>;
109 using implATS = KokkosKernels::ArithTraits<typename matrix_type::impl_scalar_type>;
110 auto lclA = Ain->getLocalMatrixDevice();
111 auto lclRowmap = Ain->getRowMap()->getLocalMap();
112 auto lclColmap = Ain->getColMap()->getLocalMap();
113
114 local_matrix_type thresholdedLocalMatrix;
115 if (keepDiagonal) {
116 rowmap_type rowptr("MueLu::GetThresholdedMatrix::rowptr", lclA.numRows() + 1);
117 LocalOrdinal nnz = 0;
118 Kokkos::parallel_scan(
119 range_type(0, lclA.numRows()), KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& my_nnz, const bool is_final) {
120 auto row = lclA.rowConst(rlid);
121 auto rclid = lclColmap.getLocalElement(lclRowmap.getGlobalElement(rlid));
122
123 for (LocalOrdinal offset = 0; offset < row.length; ++offset) {
124 auto clid = row.colidx(offset);
125 auto val = row.value(offset);
126 if ((rclid == clid) || implATS::magnitude(val) > threshold) {
127 ++my_nnz;
128 if (is_final && (rlid + 1 < lclA.numRows())) {
129 rowptr(rlid + 2) = my_nnz;
130 }
131 }
132 }
133 },
134 nnz);
135
136 entries_type entries(Kokkos::ViewAllocateWithoutInitializing("MueLu::GetThresholdedGraph::indices"), nnz);
137 values_type values(Kokkos::ViewAllocateWithoutInitializing("MueLu::GetThresholdedGraph::values"), nnz);
138 Kokkos::parallel_for(
139 range_type(0, lclA.numRows()), KOKKOS_LAMBDA(const LocalOrdinal rlid) {
140 auto row = lclA.rowConst(rlid);
141 auto rclid = lclColmap.getLocalElement(lclRowmap.getGlobalElement(rlid));
142
143 for (LocalOrdinal offset = 0; offset < row.length; ++offset) {
144 auto clid = row.colidx(offset);
145 auto val = row.value(offset);
146 if ((rclid == clid) || implATS::magnitude(val) > threshold) {
147 entries(rowptr(rlid + 1)) = clid;
148 values(rowptr(rlid + 1)) = val;
149 ++rowptr(rlid + 1);
150 }
151 }
152 });
153
154 thresholdedLocalMatrix = local_matrix_type("thresholdedLocalMatrix", lclA.numRows(), lclA.numCols(), nnz, values, rowptr, entries);
155 } else {
156 rowmap_type rowptr("MueLu::GetThresholdedMatrix::rowptr", lclA.numRows() + 1);
157 LocalOrdinal nnz = 0;
158 Kokkos::parallel_scan(
159 range_type(0, lclA.numRows()), KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& my_nnz, const bool is_final) {
160 auto row = lclA.rowConst(rlid);
161
162 for (LocalOrdinal offset = 0; offset < row.length; ++offset) {
163 auto val = row.value(offset);
164 if (implATS::magnitude(val) > threshold) {
165 ++my_nnz;
166 if (is_final && (rlid + 1 < lclA.numRows())) {
167 rowptr(rlid + 2) = my_nnz;
169 }
170 }
171 },
172 nnz);
173
174 entries_type entries(Kokkos::ViewAllocateWithoutInitializing("MueLu::GetThresholdedGraph::indices"), nnz);
175 values_type values(Kokkos::ViewAllocateWithoutInitializing("MueLu::GetThresholdedGraph::values"), nnz);
176 Kokkos::parallel_for(
177 range_type(0, lclA.numRows()), KOKKOS_LAMBDA(const LocalOrdinal rlid) {
178 auto row = lclA.rowConst(rlid);
179
180 for (LocalOrdinal offset = 0; offset < row.length; ++offset) {
181 auto clid = row.colidx(offset);
182 auto val = row.value(offset);
183 if (implATS::magnitude(val) > threshold) {
184 entries(rowptr(rlid + 1)) = clid;
185 values(rowptr(rlid + 1)) = val;
186 ++rowptr(rlid + 1);
187 }
189 });
191 thresholdedLocalMatrix = local_matrix_type("thresholdedLocalMatrix", lclA.numRows(), lclA.numCols(), nnz, values, rowptr, entries);
193
194 Aout = MatrixFactory::Build(thresholdedLocalMatrix, Ain->getRowMap(), Ain->getColMap(), Ain->getDomainMap(), Ain->getRangeMap());
195 }
196
197 return Aout;
198}
199
200template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
201RCP<Xpetra::CrsGraph<LocalOrdinal, GlobalOrdinal, Node>>
203 GetThresholdedGraph(const RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A, const Magnitude threshold) {
204 RCP<CrsGraph> sparsityPattern;
205 {
206 using matrix_type = Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
207 using graph_type = Xpetra::CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
208 using local_graph_type = typename graph_type::local_graph_device_type;
209 using execution_space = typename Node::execution_space;
210 using rowmap_type = typename local_graph_type::row_map_type::non_const_type;
211 using entries_type = typename local_graph_type::entries_type::non_const_type;
212 using range_type = Kokkos::RangePolicy<execution_space, LocalOrdinal>;
213 using implATS = KokkosKernels::ArithTraits<typename matrix_type::impl_scalar_type>;
214 using magATS = KokkosKernels::ArithTraits<typename implATS::magnitudeType>;
215 auto lclA = A->getLocalMatrixDevice();
216 auto lclRowmap = A->getRowMap()->getLocalMap();
217 auto lclColmap = A->getColMap()->getLocalMap();
218
219 rowmap_type rowptr("MueLu::GetThresholdedGraph::rowptr", lclA.numRows() + 1);
220
221 LocalOrdinal nnz = 0;
222 Kokkos::parallel_scan(
223 range_type(0, lclA.numRows()), KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& my_nnz, const bool is_final) {
224 auto row = lclA.rowConst(rlid);
225 auto rclid = lclColmap.getLocalElement(lclRowmap.getGlobalElement(rlid));
226
227 typename implATS::magnitudeType d = magATS::one();
228 for (LocalOrdinal offset = 0; offset < row.length; ++offset) {
229 auto clid = row.colidx(offset);
230 if (rclid == clid) {
231 auto val = implATS::magnitude(row.value(offset));
232 if (val > implATS::epsilon())
233 d = val;
234 }
236
237 for (LocalOrdinal offset = 0; offset < row.length; ++offset) {
238 auto clid = row.colidx(offset);
239 auto val = row.value(offset);
240 if ((rclid == clid) || implATS::magnitude(val) > d * threshold) {
241 ++my_nnz;
242 if (is_final && (rlid + 1 < lclA.numRows())) {
243 rowptr(rlid + 2) = my_nnz;
244 }
245 }
246 }
247 },
248 nnz);
250 entries_type entries(Kokkos::ViewAllocateWithoutInitializing("MueLu::GetThresholdedGraph::indices"), nnz);
251 Kokkos::parallel_for(
252 range_type(0, lclA.numRows()), KOKKOS_LAMBDA(const LocalOrdinal rlid) {
253 auto row = lclA.rowConst(rlid);
254 auto rclid = lclColmap.getLocalElement(lclRowmap.getGlobalElement(rlid));
255
256 typename implATS::magnitudeType d = magATS::one();
257 for (LocalOrdinal offset = 0; offset < row.length; ++offset) {
258 auto clid = row.colidx(offset);
259 if (rclid == clid) {
260 auto val = implATS::magnitude(row.value(offset));
261 if (val > implATS::epsilon())
262 d = val;
263 }
264 }
265
266 for (LocalOrdinal offset = 0; offset < row.length; ++offset) {
267 auto clid = row.colidx(offset);
268 auto val = row.value(offset);
269 if ((rclid == clid) || implATS::magnitude(val) > d * threshold) {
270 entries(rowptr(rlid + 1)) = clid;
271 ++rowptr(rlid + 1);
272 }
274 });
275
276 sparsityPattern = CrsGraphFactory::Build(A->getRowMap(), A->getColMap(), rowptr, entries);
277 sparsityPattern->fillComplete(A->getDomainMap(), A->getRangeMap());
278 }
279
280 return sparsityPattern;
281}
282
283template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
284Teuchos::ArrayRCP<Scalar>
286 GetMatrixDiagonal_arcp(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A) {
287 size_t numRows = A.getRowMap()->getLocalNumElements();
288 Teuchos::ArrayRCP<Scalar> diag(numRows);
289 Teuchos::ArrayView<const LocalOrdinal> cols;
290 Teuchos::ArrayView<const Scalar> vals;
291 for (size_t i = 0; i < numRows; ++i) {
292 A.getLocalRowView(i, cols, vals);
293 LocalOrdinal j = 0;
294 for (; j < cols.size(); ++j) {
295 if (Teuchos::as<size_t>(cols[j]) == i) {
296 diag[i] = vals[j];
297 break;
299 }
300 if (j == cols.size()) {
301 // Diagonal entry is absent
302 diag[i] = Teuchos::ScalarTraits<Scalar>::zero();
303 }
304 }
305 return diag;
306}
307
308template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
309Teuchos::RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
311 GetMatrixDiagonal(const Matrix& A) {
312 const auto rowMap = A.getRowMap();
313 auto diag = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(rowMap, false);
314
315 const CrsMatrixWrap* crsOp = dynamic_cast<const CrsMatrixWrap*>(&A);
316 if ((crsOp != NULL) && (rowMap->lib() == Xpetra::UseTpetra)) {
317 using device_type = typename CrsGraph::device_type;
318 Kokkos::View<size_t*, device_type> offsets("offsets", rowMap->getLocalNumElements());
319 crsOp->getCrsGraph()->getLocalDiagOffsets(offsets);
320 crsOp->getCrsMatrix()->getLocalDiagCopy(*diag, offsets);
321 } else {
322 A.getLocalDiagCopy(*diag);
323 }
324
325 return diag;
326}
327
328// template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
329// RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
330// UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
331// GetMatrixDiagonalInverse(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> & A, Magnitude tol, Scalar valReplacement) {
332// Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase::GetMatrixDiagonalInverse");
333
334// RCP<const Map> rowMap = A.getRowMap();
335// RCP<Vector> diag = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap,true);
336
337// A.getLocalDiagCopy(*diag);
338
339// RCP<Vector> inv = MueLu::UtilitiesBase<Scalar,LocalOrdinal,GlobalOrdinal,Node>::GetInverse(diag, tol, valReplacement);
340
341// return inv;
342// }
343
344template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
345Teuchos::RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
347 GetMatrixDiagonalInverse(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
348 typename Teuchos::ScalarTraits<Scalar>::magnitudeType tol,
349 Scalar valReplacement,
350 const bool doLumped) {
351 Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("Utilities::GetMatrixDiagonalInverse");
352
353 RCP<const BlockedCrsMatrix> bA = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpFromRef(A));
354 if (!bA.is_null()) {
355 RCP<const Map> rowMap = A.getRowMap();
356 RCP<Vector> diag = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(rowMap, false);
357 A.getLocalDiagCopy(*diag);
358 RCP<Vector> inv = MueLu::UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::GetInverse(diag, tol, valReplacement);
359 return inv;
360 }
361
362 // Some useful type definitions
363 using local_matrix_type = typename Matrix::local_matrix_device_type;
364 // using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
365 using value_type = typename local_matrix_type::value_type;
366 using values_type = typename local_matrix_type::values_type;
367 using scalar_type = typename values_type::non_const_value_type;
368 using ordinal_type = typename local_matrix_type::ordinal_type;
369 using execution_space = typename local_matrix_type::execution_space;
370 // using memory_space = typename local_matrix_type::memory_space;
371 // Be careful with this one, if using KokkosKernels::ArithTraits<Scalar>
372 // you are likely to run into errors when handling std::complex<>
373 // a good way to work around that is to use the following:
374 // using KAT = KokkosKernels::ArithTraits<KokkosKernels::ArithTraits<Scalar>::val_type> >
375 // here we have: value_type = KokkosKernels::ArithTraits<Scalar>::val_type
376 using KAT = KokkosKernels::ArithTraits<value_type>;
377
378 // Get/Create distributed objects
379 RCP<const Map> rowMap = A.getRowMap();
380 RCP<Vector> diag = VectorFactory::Build(rowMap, false);
381
382 // Now generate local objects
383 local_matrix_type localMatrix = A.getLocalMatrixDevice();
384 auto diagVals = diag->getLocalViewDevice(Tpetra::Access::ReadWrite);
385
386 ordinal_type numRows = localMatrix.graph.numRows();
387
388 scalar_type valReplacement_dev = valReplacement;
389
390 // Note: 2019-11-21, LBV
391 // This could be implemented with a TeamPolicy over the rows
392 // and a TeamVectorRange over the entries in a row if performance
393 // becomes more important here.
394 if (!doLumped)
395 Kokkos::parallel_for(
396 "Utilities::GetMatrixDiagonalInverse",
397 Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
398 KOKKOS_LAMBDA(const ordinal_type rowIdx) {
399 bool foundDiagEntry = false;
400 auto myRow = localMatrix.rowConst(rowIdx);
401 for (ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
402 if (myRow.colidx(entryIdx) == rowIdx) {
403 foundDiagEntry = true;
404 if (KAT::magnitude(myRow.value(entryIdx)) > KAT::magnitude(tol)) {
405 diagVals(rowIdx, 0) = KAT::one() / myRow.value(entryIdx);
406 } else {
407 diagVals(rowIdx, 0) = valReplacement_dev;
408 }
409 break;
410 }
411 }
412
413 if (!foundDiagEntry) {
414 diagVals(rowIdx, 0) = KAT::zero();
415 }
416 });
417 else
418 Kokkos::parallel_for(
419 "Utilities::GetMatrixDiagonalInverse",
420 Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
421 KOKKOS_LAMBDA(const ordinal_type rowIdx) {
422 auto myRow = localMatrix.rowConst(rowIdx);
423 for (ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
424 diagVals(rowIdx, 0) += KAT::magnitude(myRow.value(entryIdx));
425 }
426 if (KAT::magnitude(diagVals(rowIdx, 0)) > KAT::magnitude(tol))
427 diagVals(rowIdx, 0) = KAT::one() / diagVals(rowIdx, 0);
428 else
429 diagVals(rowIdx, 0) = valReplacement_dev;
430 });
431
432 return diag;
434
435template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
436Teuchos::RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
438 GetLumpedMatrixDiagonal(Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> const& A, const bool doReciprocal,
439 Magnitude tol,
440 Scalar valReplacement,
441 const bool replaceSingleEntryRowWithZero,
442 const bool useAverageAbsDiagVal) {
443 typedef Teuchos::ScalarTraits<Scalar> TST;
444
445 RCP<Vector> diag = Teuchos::null;
446 const Scalar zero = TST::zero();
447 const Scalar one = TST::one();
448 const Scalar two = one + one;
449
450 Teuchos::RCP<const Matrix> rcpA = Teuchos::rcpFromRef(A);
451
452 RCP<const Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> bA =
453 Teuchos::rcp_dynamic_cast<const Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(rcpA);
454 if (bA == Teuchos::null) {
455 RCP<const Map> rowMap = rcpA->getRowMap();
456 diag = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(rowMap, false);
457
458 if (rowMap->lib() == Xpetra::UnderlyingLib::UseTpetra) {
459 Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase::GetLumpedMatrixDiagonal (Kokkos implementation)");
460 // Implement using Kokkos
461 using local_vector_type = typename Vector::dual_view_type::t_dev_um;
462 using local_matrix_type = typename Matrix::local_matrix_device_type;
463 using execution_space = typename local_vector_type::execution_space;
464 // using rowmap_type = typename local_matrix_type::row_map_type;
465 // using entries_type = typename local_matrix_type::index_type;
466 using values_type = typename local_matrix_type::values_type;
467 using scalar_type = typename values_type::non_const_value_type;
468 using mag_type = typename KokkosKernels::ArithTraits<scalar_type>::mag_type;
469 using KAT_S = typename KokkosKernels::ArithTraits<scalar_type>;
470 using KAT_M = typename KokkosKernels::ArithTraits<mag_type>;
471 using size_type = typename local_matrix_type::non_const_size_type;
472
473 local_vector_type diag_dev = diag->getLocalViewDevice(Tpetra::Access::OverwriteAll);
474 local_matrix_type local_mat_dev = rcpA->getLocalMatrixDevice();
475 Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
476 scalar_type valReplacement_dev = valReplacement;
477
478 if (doReciprocal) {
479 Kokkos::View<int*, execution_space> nnzPerRow("nnz per rows", diag_dev.extent(0));
480 Kokkos::View<scalar_type*, execution_space> regSum("regSum", diag_dev.extent(0));
481 Kokkos::View<mag_type, execution_space> avgAbsDiagVal_dev("avgAbsDiagVal");
482 Kokkos::View<int, execution_space> numDiagsEqualToOne_dev("numDiagsEqualToOne");
483
484 {
485 Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: parallel_for (doReciprocal)");
486 Kokkos::parallel_for(
487 "GetLumpedMatrixDiagonal", my_policy,
488 KOKKOS_LAMBDA(const int rowIdx) {
489 diag_dev(rowIdx, 0) = KAT_S::zero();
490 for (size_type entryIdx = local_mat_dev.graph.row_map(rowIdx);
491 entryIdx < local_mat_dev.graph.row_map(rowIdx + 1);
492 ++entryIdx) {
493 regSum(rowIdx) += local_mat_dev.values(entryIdx);
494 if (KAT_M::zero() < KAT_S::abs(local_mat_dev.values(entryIdx))) {
495 ++nnzPerRow(rowIdx);
496 }
497 diag_dev(rowIdx, 0) += KAT_S::abs(local_mat_dev.values(entryIdx));
498 if (rowIdx == local_mat_dev.graph.entries(entryIdx)) {
499 Kokkos::atomic_add(&avgAbsDiagVal_dev(), KAT_S::abs(local_mat_dev.values(entryIdx)));
500 }
501 }
502
503 if (nnzPerRow(rowIdx) == 1 && KAT_S::magnitude(diag_dev(rowIdx, 0)) == KAT_M::one()) {
504 Kokkos::atomic_add(&numDiagsEqualToOne_dev(), 1);
505 }
506 });
507 }
508 if (useAverageAbsDiagVal) {
509 Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: useAverageAbsDiagVal");
510 typename Kokkos::View<mag_type, execution_space>::host_mirror_type avgAbsDiagVal = Kokkos::create_mirror_view(avgAbsDiagVal_dev);
511 Kokkos::deep_copy(avgAbsDiagVal, avgAbsDiagVal_dev);
512 int numDiagsEqualToOne;
513 Kokkos::deep_copy(numDiagsEqualToOne, numDiagsEqualToOne_dev);
514
515 tol = TST::magnitude(100 * Teuchos::ScalarTraits<Scalar>::eps()) * (avgAbsDiagVal() - numDiagsEqualToOne) / (rowMap->getLocalNumElements() - numDiagsEqualToOne);
516 }
517
518 {
519 Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("ComputeLumpedDiagonalInverse: parallel_for (doReciprocal)");
520 Kokkos::parallel_for(
521 "ComputeLumpedDiagonalInverse", my_policy,
522 KOKKOS_LAMBDA(const int rowIdx) {
523 if (replaceSingleEntryRowWithZero && nnzPerRow(rowIdx) <= 1) {
524 diag_dev(rowIdx, 0) = KAT_S::zero();
525 } else if ((diag_dev(rowIdx, 0) != KAT_S::zero()) && (KAT_S::magnitude(diag_dev(rowIdx, 0)) < KAT_S::magnitude(2 * regSum(rowIdx)))) {
526 diag_dev(rowIdx, 0) = KAT_S::one() / KAT_S::magnitude(2 * regSum(rowIdx));
527 } else {
528 if (KAT_S::magnitude(diag_dev(rowIdx, 0)) > tol) {
529 diag_dev(rowIdx, 0) = KAT_S::one() / diag_dev(rowIdx, 0);
530 } else {
531 diag_dev(rowIdx, 0) = valReplacement_dev;
532 }
533 }
534 });
535 }
536
537 } else {
538 Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: parallel_for");
539 Kokkos::parallel_for(
540 "GetLumpedMatrixDiagonal", my_policy,
541 KOKKOS_LAMBDA(const int rowIdx) {
542 diag_dev(rowIdx, 0) = KAT_S::zero();
543 for (size_type entryIdx = local_mat_dev.graph.row_map(rowIdx);
544 entryIdx < local_mat_dev.graph.row_map(rowIdx + 1);
545 ++entryIdx) {
546 diag_dev(rowIdx, 0) += KAT_S::magnitude(local_mat_dev.values(entryIdx));
547 }
548 });
549 }
550 } else {
551 // Implement using Teuchos
552 Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase: GetLumpedMatrixDiagonal: (Teuchos implementation)");
553 ArrayRCP<Scalar> diagVals = diag->getDataNonConst(0);
554 Teuchos::Array<Scalar> regSum(diag->getLocalLength());
555 Teuchos::ArrayView<const LocalOrdinal> cols;
556 Teuchos::ArrayView<const Scalar> vals;
557
558 std::vector<int> nnzPerRow(rowMap->getLocalNumElements());
559
560 // FIXME 2021-10-22 JHU If this is called with doReciprocal=false, what should the correct behavior be? Currently,
561 // FIXME 2021-10-22 JHU the diagonal entry is set to be the sum of the absolute values of the row entries.
562
563 const Magnitude zeroMagn = TST::magnitude(zero);
564 Magnitude avgAbsDiagVal = TST::magnitude(zero);
565 int numDiagsEqualToOne = 0;
566 for (size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
567 nnzPerRow[i] = 0;
568 rcpA->getLocalRowView(i, cols, vals);
569 diagVals[i] = zero;
570 for (LocalOrdinal j = 0; j < cols.size(); ++j) {
571 regSum[i] += vals[j];
572 const Magnitude rowEntryMagn = TST::magnitude(vals[j]);
573 if (rowEntryMagn > zeroMagn)
574 nnzPerRow[i]++;
575 diagVals[i] += rowEntryMagn;
576 if (static_cast<size_t>(cols[j]) == i)
577 avgAbsDiagVal += rowEntryMagn;
578 }
579 if (nnzPerRow[i] == 1 && TST::magnitude(diagVals[i]) == 1.)
580 numDiagsEqualToOne++;
581 }
582 if (useAverageAbsDiagVal)
583 tol = TST::magnitude(100 * Teuchos::ScalarTraits<Scalar>::eps()) * (avgAbsDiagVal - numDiagsEqualToOne) / (rowMap->getLocalNumElements() - numDiagsEqualToOne);
584 if (doReciprocal) {
585 for (size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
586 if (replaceSingleEntryRowWithZero && nnzPerRow[i] <= static_cast<int>(1))
587 diagVals[i] = zero;
588 else if ((diagVals[i] != zero) && (TST::magnitude(diagVals[i]) < TST::magnitude(two * regSum[i])))
589 diagVals[i] = one / TST::magnitude((two * regSum[i]));
590 else {
591 if (TST::magnitude(diagVals[i]) > tol)
592 diagVals[i] = one / diagVals[i];
593 else {
594 diagVals[i] = valReplacement;
595 }
596 }
597 }
598 }
599 }
600 } else {
601 TEUCHOS_TEST_FOR_EXCEPTION(doReciprocal, Xpetra::Exceptions::RuntimeError,
602 "UtilitiesBase::GetLumpedMatrixDiagonal(): extracting reciprocal of diagonal of a blocked matrix is not supported");
603 diag = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(bA->getRangeMapExtractor()->getFullMap(), true);
604
605 for (size_t row = 0; row < bA->Rows(); ++row) {
606 for (size_t col = 0; col < bA->Cols(); ++col) {
607 if (!bA->getMatrix(row, col).is_null()) {
608 // if we are in Thyra mode, but the block (row,row) is again a blocked operator, we have to use (pseudo) Xpetra-style GIDs with offset!
609 bool bThyraMode = bA->getRangeMapExtractor()->getThyraMode() && (Teuchos::rcp_dynamic_cast<Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(bA->getMatrix(row, col)) == Teuchos::null);
610 RCP<Vector> ddtemp = bA->getRangeMapExtractor()->ExtractVector(diag, row, bThyraMode);
611 RCP<const Vector> dd = GetLumpedMatrixDiagonal(*(bA->getMatrix(row, col)));
612 ddtemp->update(Teuchos::as<Scalar>(1.0), *dd, Teuchos::as<Scalar>(1.0));
613 bA->getRangeMapExtractor()->InsertVector(ddtemp, row, diag, bThyraMode);
614 }
615 }
616 }
617 }
618
619 return diag;
620}
621
622template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
623Teuchos::RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
625 GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A) {
626 // Get/Create distributed objects
627 RCP<const Map> rowMap = A.getRowMap();
628 auto diag = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(rowMap, false);
629
630 // Implement using Kokkos
631 using local_vector_type = typename Vector::dual_view_type::t_dev_um;
632 using local_matrix_type = typename Matrix::local_matrix_device_type;
633 using execution_space = typename local_vector_type::execution_space;
634 using values_type = typename local_matrix_type::values_type;
635 using scalar_type = typename values_type::non_const_value_type;
636 using KAT_S = typename KokkosKernels::ArithTraits<scalar_type>;
637
638 auto diag_dev = diag->getLocalViewDevice(Tpetra::Access::OverwriteAll);
639 auto local_mat_dev = A.getLocalMatrixDevice();
640 Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
641
642 Kokkos::parallel_for(
643 "GetMatrixMaxMinusOffDiagonal", my_policy,
644 KOKKOS_LAMBDA(const LocalOrdinal rowIdx) {
645 auto mymax = KAT_S::zero();
646 auto row = local_mat_dev.rowConst(rowIdx);
647 for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
648 if (rowIdx != row.colidx(entryIdx)) {
649 if (KAT_S::real(mymax) < -KAT_S::real(row.value(entryIdx)))
650 mymax = -KAT_S::real(row.value(entryIdx));
651 }
652 }
653 diag_dev(rowIdx, 0) = mymax;
654 });
655
656 return diag;
657}
658
659template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
660Teuchos::RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
662 GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, const Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>& BlockNumber) {
663 TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "GetMatrixMaxMinusOffDiagonal: BlockNumber must match's A's column map.");
664
665 // Get/Create distributed objects
666 RCP<const Map> rowMap = A.getRowMap();
667 auto diag = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(rowMap, false);
668
669 // Implement using Kokkos
670 using local_vector_type = typename Vector::dual_view_type::t_dev_um;
671 using local_matrix_type = typename Matrix::local_matrix_device_type;
672 using execution_space = typename local_vector_type::execution_space;
673 using values_type = typename local_matrix_type::values_type;
674 using scalar_type = typename values_type::non_const_value_type;
675 using KAT_S = typename KokkosKernels::ArithTraits<scalar_type>;
676
677 auto diag_dev = diag->getLocalViewDevice(Tpetra::Access::OverwriteAll);
678 auto local_mat_dev = A.getLocalMatrixDevice();
679 auto local_block_dev = BlockNumber.getLocalViewDevice(Tpetra::Access::ReadOnly);
680 Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
681
682 Kokkos::parallel_for(
683 "GetMatrixMaxMinusOffDiagonal", my_policy,
684 KOKKOS_LAMBDA(const LocalOrdinal rowIdx) {
685 auto mymax = KAT_S::zero();
686 auto row = local_mat_dev.row(rowIdx);
687 for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
688 if ((rowIdx != row.colidx(entryIdx)) && (local_block_dev(rowIdx, 0) == local_block_dev(row.colidx(entryIdx), 0))) {
689 if (KAT_S::real(mymax) < -KAT_S::real(row.value(entryIdx)))
690 mymax = -KAT_S::real(row.value(entryIdx));
691 }
692 }
693 diag_dev(rowIdx, 0) = mymax;
694 });
695
696 return diag;
697}
698
699template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
700Teuchos::RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
702 GetInverse(Teuchos::RCP<const Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> v, typename Teuchos::ScalarTraits<Scalar>::magnitudeType tol, Scalar valReplacement) {
703 RCP<Vector> ret = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(v->getMap(), true);
704
705 // check whether input vector "v" is a BlockedVector
706 RCP<const BlockedVector> bv = Teuchos::rcp_dynamic_cast<const BlockedVector>(v);
707 if (bv.is_null() == false) {
708 RCP<BlockedVector> bret = Teuchos::rcp_dynamic_cast<BlockedVector>(ret);
709 TEUCHOS_TEST_FOR_EXCEPTION(bret.is_null() == true, MueLu::Exceptions::RuntimeError, "MueLu::UtilitiesBase::GetInverse: return vector should be of type BlockedVector");
710 RCP<const BlockedMap> bmap = bv->getBlockedMap();
711 for (size_t r = 0; r < bmap->getNumMaps(); ++r) {
712 RCP<const MultiVector> submvec = bv->getMultiVector(r, bmap->getThyraMode());
713 RCP<const Vector> subvec = submvec->getVector(0);
714 RCP<Vector> subvecinf = MueLu::UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::GetInverse(subvec, tol, valReplacement);
715 bret->setMultiVector(r, subvecinf, bmap->getThyraMode());
716 }
717 return ret;
718 }
719
720 // v is an {Epetra,Tpetra}Vector: work with the underlying raw data
721 ArrayRCP<Scalar> retVals = ret->getDataNonConst(0);
722 ArrayRCP<const Scalar> inputVals = v->getData(0);
723 for (size_t i = 0; i < v->getMap()->getLocalNumElements(); ++i) {
724 if (Teuchos::ScalarTraits<Scalar>::magnitude(inputVals[i]) > tol)
725 retVals[i] = Teuchos::ScalarTraits<Scalar>::one() / inputVals[i];
726 else
727 retVals[i] = valReplacement;
728 }
729 return ret;
730}
731
732// template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
733// RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
734// UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
735// GetMatrixOverlappedDiagonal(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> & A) {
736// RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
737
738// // Undo block map (if we have one)
739// RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
740// if(!browMap.is_null()) rowMap = browMap->getMap();
741
742// RCP<Vector> localDiag = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap);
743// try {
744// const CrsMatrixWrap* crsOp = dynamic_cast<const CrsMatrixWrap*>(&A);
745// if (crsOp == NULL) {
746// throw Exceptions::RuntimeError("cast to CrsMatrixWrap failed");
747// }
748// Teuchos::ArrayRCP<size_t> offsets;
749// crsOp->getLocalDiagOffsets(offsets);
750// crsOp->getLocalDiagCopy(*localDiag,offsets());
751// }
752// catch (...) {
753// ArrayRCP<Scalar> localDiagVals = localDiag->getDataNonConst(0);
754// Teuchos::ArrayRCP<Scalar> diagVals = GetMatrixDiagonal(A);
755// for (LocalOrdinal i = 0; i < localDiagVals.size(); i++)
756// localDiagVals[i] = diagVals[i];
757// localDiagVals = diagVals = null;
758// }
759
760// RCP<Vector> diagonal = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(colMap);
761// RCP< const Xpetra::Import<LocalOrdinal,GlobalOrdinal,Node> > importer;
762// importer = A.getCrsGraph()->getImporter();
763// if (importer == Teuchos::null) {
764// importer = Xpetra::ImportFactory<LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap, colMap);
765// }
766// diagonal->doImport(*localDiag, *(importer), Xpetra::INSERT);
767// return diagonal;
768// }
769
770template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
771RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
773 GetMatrixOverlappedDiagonal(const Matrix& A) {
774 RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
775 RCP<Vector> localDiag = GetMatrixDiagonal(A);
776 RCP<const Import> importer = A.getCrsGraph()->getImporter();
777 if (importer.is_null() && !rowMap->isSameAs(*colMap)) {
778 importer = ImportFactory::Build(rowMap, colMap);
779 }
780 if (!importer.is_null()) {
781 RCP<Vector> diagonal = VectorFactory::Build(colMap, false);
782 diagonal->doImport(*localDiag, *(importer), Xpetra::INSERT);
783 return diagonal;
784 } else
785 return localDiag;
786}
787
788template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
789RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
791 GetMatrixOverlappedDeletedRowsum(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A) {
792 using STS = typename Teuchos::ScalarTraits<SC>;
793
794 // Undo block map (if we have one)
795 RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
796 RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
797 if (!browMap.is_null()) rowMap = browMap->getMap();
798
799 RCP<Vector> local = Xpetra::VectorFactory<SC, LO, GO, Node>::Build(rowMap);
800 RCP<Vector> ghosted = Xpetra::VectorFactory<SC, LO, GO, Node>::Build(colMap, true);
801 ArrayRCP<SC> localVals = local->getDataNonConst(0);
802
803 for (LO row = 0; row < static_cast<LO>(A.getRowMap()->getLocalNumElements()); ++row) {
804 size_t nnz = A.getNumEntriesInLocalRow(row);
805 ArrayView<const LO> indices;
806 ArrayView<const SC> vals;
807 A.getLocalRowView(row, indices, vals);
808
809 SC si = STS::zero();
810
811 for (LO colID = 0; colID < static_cast<LO>(nnz); colID++) {
812 if (indices[colID] != row) {
813 si += vals[colID];
814 }
815 }
816 localVals[row] = si;
817 }
818
819 RCP<const Xpetra::Import<LO, GO, Node>> importer;
820 importer = A.getCrsGraph()->getImporter();
821 if (importer == Teuchos::null) {
822 importer = Xpetra::ImportFactory<LO, GO, Node>::Build(rowMap, colMap);
823 }
824 ghosted->doImport(*local, *(importer), Xpetra::INSERT);
825 return ghosted;
826}
827
828template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
829RCP<Xpetra::Vector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LocalOrdinal, GlobalOrdinal, Node>>
831 GetMatrixOverlappedAbsDeletedRowsum(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A) {
832 RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
833 using STS = typename Teuchos::ScalarTraits<Scalar>;
834 using MTS = typename Teuchos::ScalarTraits<Magnitude>;
835 using MT = Magnitude;
836 using RealValuedVector = Xpetra::Vector<MT, LO, GO, Node>;
837
838 // Undo block map (if we have one)
839 RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
840 if (!browMap.is_null()) rowMap = browMap->getMap();
841
842 RCP<RealValuedVector> local = Xpetra::VectorFactory<MT, LO, GO, Node>::Build(rowMap);
843 RCP<RealValuedVector> ghosted = Xpetra::VectorFactory<MT, LO, GO, Node>::Build(colMap, true);
844 ArrayRCP<MT> localVals = local->getDataNonConst(0);
845
846 for (LO rowIdx = 0; rowIdx < static_cast<LO>(A.getRowMap()->getLocalNumElements()); ++rowIdx) {
847 size_t nnz = A.getNumEntriesInLocalRow(rowIdx);
848 ArrayView<const LO> indices;
849 ArrayView<const SC> vals;
850 A.getLocalRowView(rowIdx, indices, vals);
851
852 MT si = MTS::zero();
853
854 for (LO colID = 0; colID < static_cast<LO>(nnz); ++colID) {
855 if (indices[colID] != rowIdx) {
856 si += STS::magnitude(vals[colID]);
857 }
858 }
859 localVals[rowIdx] = si;
860 }
861
862 RCP<const Xpetra::Import<LO, GO, Node>> importer;
863 importer = A.getCrsGraph()->getImporter();
864 if (importer == Teuchos::null) {
865 importer = Xpetra::ImportFactory<LO, GO, Node>::Build(rowMap, colMap);
866 }
867 ghosted->doImport(*local, *(importer), Xpetra::INSERT);
868 return ghosted;
869}
870
871template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
874 CountNegativeDiagonalEntries(const Matrix& A) {
875 using local_matrix_type = typename Matrix::local_matrix_device_type;
876 using execution_space = typename local_matrix_type::execution_space;
877 using KAT_S = typename KokkosKernels::ArithTraits<typename local_matrix_type::value_type>;
878
879 auto local_mat_dev = A.getLocalMatrixDevice();
880 Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(local_mat_dev.numRows()));
881 GlobalOrdinal count_l = 0, count_g = 0;
882
883 Kokkos::parallel_reduce(
884 "CountNegativeDiagonalEntries", my_policy,
885 KOKKOS_LAMBDA(const LocalOrdinal rowIdx, GlobalOrdinal& sum) {
886 auto row = local_mat_dev.row(rowIdx);
887 for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
888 if (rowIdx == row.colidx(entryIdx) && KAT_S::real(row.value(entryIdx)) < 0)
889 sum++;
890 }
891 },
892 count_l);
893
894 MueLu_sumAll(A.getRowMap()->getComm(), count_l, count_g);
895 return count_g;
896}
897
898template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
899Teuchos::Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>
901 ResidualNorm(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS) {
902 TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
903 const size_t numVecs = X.getNumVectors();
904 RCP<MultiVector> RES = Residual(Op, X, RHS);
905 Teuchos::Array<Magnitude> norms(numVecs);
906 RES->norm2(norms);
907 return norms;
908}
909
910template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
911Teuchos::Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>
913 ResidualNorm(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS, MultiVector& Resid) {
914 TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
915 const size_t numVecs = X.getNumVectors();
916 Residual(Op, X, RHS, Resid);
917 Teuchos::Array<Magnitude> norms(numVecs);
918 Resid.norm2(norms);
919 return norms;
920}
921
922template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
923RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
925 Residual(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS) {
926 TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
927 const size_t numVecs = X.getNumVectors();
928 // TODO Op.getRangeMap should return a BlockedMap if it is a BlockedCrsOperator
929 RCP<MultiVector> RES = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(RHS.getMap(), numVecs, false); // no need to initialize to zero
930 Op.residual(X, RHS, *RES);
931 return RES;
932}
933
934template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
936 Residual(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS, MultiVector& Resid) {
937 TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides");
938 TEUCHOS_TEST_FOR_EXCEPTION(Resid.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of residual vectors != number of right-hand sides");
939 Op.residual(X, RHS, Resid);
940}
941
942template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
943Scalar
945 PowerMethod(const Matrix& A, bool scaleByDiag,
946 LocalOrdinal niters, typename Teuchos::ScalarTraits<Scalar>::magnitudeType tolerance, typename Teuchos::ScalarTraits<Scalar>::magnitudeType diagonalReplacementTolerance, bool verbose, unsigned int seed) {
947 TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRangeMap()->isSameAs(*(A.getDomainMap()))), Exceptions::Incompatible,
948 "Utils::PowerMethod: operator must have domain and range maps that are equivalent.");
949
950 // power iteration
951 RCP<Vector> diagInvVec;
952 if (scaleByDiag) {
953 diagInvVec = GetMatrixDiagonalInverse(A, diagonalReplacementTolerance);
954 }
955
956 Scalar lambda = PowerMethod(A, diagInvVec, niters, tolerance, verbose, seed);
957 return lambda;
958}
959
960template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
961Scalar
962UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
963 PowerMethod(const Matrix& A, const RCP<Vector>& diagInvVec,
964 LocalOrdinal niters, typename Teuchos::ScalarTraits<Scalar>::magnitudeType tolerance, bool verbose, unsigned int seed) {
965 TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRangeMap()->isSameAs(*(A.getDomainMap()))), Exceptions::Incompatible,
966 "Utils::PowerMethod: operator must have domain and range maps that are equivalent.");
967
968 // Create three vectors, fill z with random numbers
969 RCP<Vector> q = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(A.getDomainMap(), true);
970 RCP<Vector> r = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(A.getRangeMap(), true);
971 RCP<Vector> z = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(A.getRangeMap(), false);
972
973 z->setSeed(seed); // seed random number generator
974 z->randomize(true); // use Xpetra implementation: -> same results for Epetra and Tpetra
975
976 Teuchos::Array<Magnitude> norms(1);
977
978 typedef Teuchos::ScalarTraits<Scalar> STS;
979
980 const Scalar zero = STS::zero(), one = STS::one();
981
982 Scalar lambda = zero;
983 Magnitude residual = STS::magnitude(zero);
984
985 // power iteration
986 for (int iter = 0; iter < niters; ++iter) {
987 z->norm2(norms); // Compute 2-norm of z
988 q->update(one / norms[0], *z, zero); // Set q = z / normz
989 A.apply(*q, *z); // Compute z = A*q
990 if (diagInvVec != Teuchos::null)
991 z->elementWiseMultiply(one, *diagInvVec, *z, zero);
992 lambda = q->dot(*z); // Approximate maximum eigenvalue: lamba = dot(q,z)
993
994 if (iter % 100 == 0 || iter + 1 == niters) {
995 r->update(1.0, *z, -lambda, *q, zero); // Compute A*q - lambda*q
996 r->norm2(norms);
997 residual = STS::magnitude(norms[0] / lambda);
998 if (verbose) {
999 std::cout << "Iter = " << iter
1000 << " Lambda = " << lambda
1001 << " Residual of A*q - lambda*q = " << residual
1002 << std::endl;
1003 }
1004 }
1005 if (residual < tolerance)
1006 break;
1007 }
1008 return lambda;
1009}
1010
1011template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1012RCP<Teuchos::FancyOStream>
1014 MakeFancy(std::ostream& os) {
1015 RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(os));
1016 return fancy;
1017}
1018
1019template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1020typename Teuchos::ScalarTraits<Scalar>::magnitudeType
1022 Distance2(const Teuchos::Array<Teuchos::ArrayRCP<const Scalar>>& v, LocalOrdinal i0, LocalOrdinal i1) {
1023 const size_t numVectors = v.size();
1024
1025 Scalar d = Teuchos::ScalarTraits<Scalar>::zero();
1026 for (size_t j = 0; j < numVectors; j++) {
1027 d += (v[j][i0] - v[j][i1]) * (v[j][i0] - v[j][i1]);
1028 }
1029 return Teuchos::ScalarTraits<Scalar>::magnitude(d);
1030}
1031
1032template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1033typename Teuchos::ScalarTraits<Scalar>::magnitudeType
1035 Distance2(const Teuchos::ArrayView<double>& weight, const Teuchos::Array<Teuchos::ArrayRCP<const Scalar>>& v, LocalOrdinal i0, LocalOrdinal i1) {
1036 const size_t numVectors = v.size();
1037 using MT = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
1038
1039 Scalar d = Teuchos::ScalarTraits<Scalar>::zero();
1040 for (size_t j = 0; j < numVectors; j++) {
1041 d += Teuchos::as<MT>(weight[j]) * (v[j][i0] - v[j][i1]) * (v[j][i0] - v[j][i1]);
1042 }
1043 return Teuchos::ScalarTraits<Scalar>::magnitude(d);
1044}
1045
1046template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1047Teuchos::ArrayRCP<const bool>
1049 DetectDirichletRows(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, const typename Teuchos::ScalarTraits<Scalar>::magnitudeType& tol, bool count_twos_as_dirichlet) {
1050 LocalOrdinal numRows = A.getLocalNumRows();
1051 typedef Teuchos::ScalarTraits<Scalar> STS;
1052 ArrayRCP<bool> boundaryNodes(numRows, true);
1053 if (count_twos_as_dirichlet) {
1054 for (LocalOrdinal row = 0; row < numRows; row++) {
1055 ArrayView<const LocalOrdinal> indices;
1056 ArrayView<const Scalar> vals;
1057 A.getLocalRowView(row, indices, vals);
1058 size_t nnz = A.getNumEntriesInLocalRow(row);
1059 if (nnz > 2) {
1060 size_t col;
1061 for (col = 0; col < nnz; col++)
1062 if ((indices[col] != row) && STS::magnitude(vals[col]) > tol) {
1063 if (!boundaryNodes[row])
1064 break;
1065 boundaryNodes[row] = false;
1066 }
1067 if (col == nnz)
1068 boundaryNodes[row] = true;
1069 }
1070 }
1071 } else {
1072 for (LocalOrdinal row = 0; row < numRows; row++) {
1073 ArrayView<const LocalOrdinal> indices;
1074 ArrayView<const Scalar> vals;
1075 A.getLocalRowView(row, indices, vals);
1076 size_t nnz = A.getNumEntriesInLocalRow(row);
1077 if (nnz > 1)
1078 for (size_t col = 0; col < nnz; col++)
1079 if ((indices[col] != row) && STS::magnitude(vals[col]) > tol) {
1080 boundaryNodes[row] = false;
1081 break;
1082 }
1083 }
1084 }
1085 return boundaryNodes;
1086}
1087
1088template <class CrsMatrix>
1089KOKKOS_FORCEINLINE_FUNCTION bool isDirichletRow(typename CrsMatrix::ordinal_type rowId,
1090 KokkosSparse::SparseRowViewConst<CrsMatrix>& row,
1091 const typename KokkosKernels::ArithTraits<typename CrsMatrix::value_type>::magnitudeType& tol,
1092 const bool count_twos_as_dirichlet) {
1093 using ATS = KokkosKernels::ArithTraits<typename CrsMatrix::value_type>;
1094
1095 auto length = row.length;
1096 bool boundaryNode = true;
1097
1098 if (count_twos_as_dirichlet) {
1099 if (length > 2) {
1100 decltype(length) colID = 0;
1101 for (; colID < length; colID++)
1102 if ((row.colidx(colID) != rowId) &&
1103 (ATS::magnitude(row.value(colID)) > tol)) {
1104 if (!boundaryNode)
1105 break;
1106 boundaryNode = false;
1107 }
1108 if (colID == length)
1109 boundaryNode = true;
1110 }
1111 } else {
1112 for (decltype(length) colID = 0; colID < length; colID++)
1113 if ((row.colidx(colID) != rowId) &&
1114 (ATS::magnitude(row.value(colID)) > tol)) {
1115 boundaryNode = false;
1116 break;
1117 }
1118 }
1119 return boundaryNode;
1120}
1121
1122template <class SC, class LO, class GO, class NO, class memory_space>
1123Kokkos::View<bool*, memory_space>
1124DetectDirichletRows_kokkos(const Xpetra::Matrix<SC, LO, GO, NO>& A,
1125 const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1126 const bool count_twos_as_dirichlet) {
1127 using impl_scalar_type = typename KokkosKernels::ArithTraits<SC>::val_type;
1128 using ATS = KokkosKernels::ArithTraits<impl_scalar_type>;
1129 using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1130 using helpers = Xpetra::Helpers<SC, LO, GO, NO>;
1131
1132 Kokkos::View<bool*, typename NO::device_type::memory_space> boundaryNodes;
1133
1134 if (helpers::isTpetraBlockCrs(A)) {
1135 const Tpetra::BlockCrsMatrix<SC, LO, GO, NO>& Am = toTpetraBlock(A);
1136 auto b_graph = Am.getCrsGraph().getLocalGraphDevice();
1137 auto b_rowptr = Am.getCrsGraph().getLocalRowPtrsDevice();
1138 auto values = Am.getValuesDevice();
1139 LO numBlockRows = Am.getLocalNumRows();
1140 const LO stride = Am.getBlockSize() * Am.getBlockSize();
1141
1142 boundaryNodes = Kokkos::View<bool*, typename NO::device_type::memory_space>(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numBlockRows);
1143
1144 if (count_twos_as_dirichlet)
1145 throw Exceptions::RuntimeError("BlockCrs does not support counting twos as Dirichlet");
1146
1147 Kokkos::parallel_for(
1148 "MueLu:Utils::DetectDirichletRowsBlockCrs", range_type(0, numBlockRows),
1149 KOKKOS_LAMBDA(const LO row) {
1150 auto rowView = b_graph.rowConst(row);
1151 auto length = rowView.length;
1152 LO valstart = b_rowptr[row] * stride;
1153
1154 boundaryNodes(row) = true;
1155 decltype(length) colID = 0;
1156 for (; colID < length; colID++) {
1157 if (rowView.colidx(colID) != row) {
1158 LO current = valstart + colID * stride;
1159 for (LO k = 0; k < stride; k++) {
1160 if (ATS::magnitude(values[current + k]) > tol) {
1161 boundaryNodes(row) = false;
1162 break;
1163 }
1164 }
1165 }
1166 if (boundaryNodes(row) == false)
1167 break;
1168 }
1169 });
1170 } else {
1171 auto localMatrix = A.getLocalMatrixDevice();
1172 LO numRows = A.getLocalNumRows();
1173 boundaryNodes = Kokkos::View<bool*, typename NO::device_type::memory_space>(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numRows);
1174
1175 Kokkos::parallel_for(
1176 "MueLu:Utils::DetectDirichletRows", range_type(0, numRows),
1177 KOKKOS_LAMBDA(const LO row) {
1178 auto rowView = localMatrix.rowConst(row);
1179 boundaryNodes(row) = isDirichletRow(row, rowView, tol, count_twos_as_dirichlet);
1180 });
1181 }
1182 if constexpr (std::is_same<memory_space, typename NO::device_type::memory_space>::value)
1183 return boundaryNodes;
1184 else {
1185 Kokkos::View<bool*, memory_space> boundaryNodes2(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), boundaryNodes.extent(0));
1186 Kokkos::deep_copy(boundaryNodes2, boundaryNodes);
1187 return boundaryNodes2;
1188 }
1189 // CAG: No idea why this is needed to avoid "warning: missing return statement at end of non-void function"
1190 Kokkos::View<bool*, memory_space> dummy("dummy", 0);
1191 return dummy;
1192}
1193
1194template <class SC, class LO, class GO, class NO>
1195Kokkos::View<bool*, typename NO::device_type::memory_space>
1197 DetectDirichletRows_kokkos(const Xpetra::Matrix<SC, LO, GO, NO>& A,
1198 const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1199 const bool count_twos_as_dirichlet) {
1200 return MueLu::DetectDirichletRows_kokkos<SC, LO, GO, NO, typename NO::device_type::memory_space>(A, tol, count_twos_as_dirichlet);
1201}
1202
1203template <class SC, class LO, class GO, class NO>
1204Kokkos::View<bool*, typename Kokkos::HostSpace>
1206 DetectDirichletRows_kokkos_host(const Xpetra::Matrix<SC, LO, GO, NO>& A,
1207 const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1208 const bool count_twos_as_dirichlet) {
1209 return MueLu::DetectDirichletRows_kokkos<SC, LO, GO, NO, typename Kokkos::HostSpace>(A, tol, count_twos_as_dirichlet);
1210}
1211
1212template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1213Teuchos::ArrayRCP<const bool>
1215 DetectDirichletRowsExt(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, bool& bHasZeroDiagonal, const typename Teuchos::ScalarTraits<Scalar>::magnitudeType& tol) {
1216 // assume that there is no zero diagonal in matrix
1217 bHasZeroDiagonal = false;
1218
1219 Teuchos::RCP<Vector> diagVec = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(A.getRowMap());
1220 A.getLocalDiagCopy(*diagVec);
1221 Teuchos::ArrayRCP<const Scalar> diagVecData = diagVec->getData(0);
1222
1223 LocalOrdinal numRows = A.getLocalNumRows();
1224 typedef Teuchos::ScalarTraits<Scalar> STS;
1225 ArrayRCP<bool> boundaryNodes(numRows, false);
1226 for (LocalOrdinal row = 0; row < numRows; row++) {
1227 ArrayView<const LocalOrdinal> indices;
1228 ArrayView<const Scalar> vals;
1229 A.getLocalRowView(row, indices, vals);
1230 size_t nnz = 0; // collect nonzeros in row (excluding the diagonal)
1231 bool bHasDiag = false;
1232 for (decltype(indices.size()) col = 0; col < indices.size(); col++) {
1233 if (indices[col] != row) {
1234 if (STS::magnitude(vals[col] / STS::magnitude(sqrt(STS::magnitude(diagVecData[row]) * STS::magnitude(diagVecData[col])))) > tol) {
1235 nnz++;
1236 }
1237 } else
1238 bHasDiag = true; // found a diagonal entry
1239 }
1240 if (bHasDiag == false)
1241 bHasZeroDiagonal = true; // we found at least one row without a diagonal
1242 else if (nnz == 0)
1243 boundaryNodes[row] = true;
1244 }
1245 return boundaryNodes;
1246}
1247
1248template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1250 EnforceInitialCondition(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1251 const Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& RHS,
1252 Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& InitialGuess,
1253 const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1254 const bool count_twos_as_dirichlet) {
1255 using range_type = Kokkos::RangePolicy<LO, typename Node::execution_space>;
1256
1257 LocalOrdinal numRows = A.getLocalNumRows();
1258 LocalOrdinal numVectors = RHS.getNumVectors();
1259 TEUCHOS_ASSERT_EQUALITY(numVectors, Teuchos::as<LocalOrdinal>(InitialGuess.getNumVectors()));
1260 if (Behavior::debug())
1261 TEUCHOS_ASSERT(RHS.getMap()->isCompatible(*InitialGuess.getMap()));
1262
1263 auto lclRHS = RHS.getLocalViewDevice(Tpetra::Access::ReadOnly);
1264 auto lclInitialGuess = InitialGuess.getLocalViewDevice(Tpetra::Access::ReadWrite);
1265 auto lclA = A.getLocalMatrixDevice();
1266
1267 Kokkos::parallel_for(
1268 "MueLu:Utils::EnforceInitialCondition", range_type(0, numRows),
1269 KOKKOS_LAMBDA(const LO i) {
1270 auto row = lclA.rowConst(i);
1271 if (isDirichletRow(i, row, tol, count_twos_as_dirichlet)) {
1272 for (LocalOrdinal j = 0; j < numVectors; ++j)
1273 lclInitialGuess(i, j) = lclRHS(i, j);
1274 }
1275 });
1276}
1277
1278template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1280 FindNonZeros(const Teuchos::ArrayRCP<const Scalar> vals,
1281 Teuchos::ArrayRCP<bool> nonzeros) {
1282 TEUCHOS_ASSERT(vals.size() == nonzeros.size());
1283 typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitudeType;
1284 const magnitudeType eps = 2.0 * Teuchos::ScalarTraits<magnitudeType>::eps();
1285 for (size_t i = 0; i < static_cast<size_t>(vals.size()); i++) {
1286 nonzeros[i] = (Teuchos::ScalarTraits<Scalar>::magnitude(vals[i]) > eps);
1287 }
1288}
1289
1290// Find Nonzeros in a device view
1291template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1293 FindNonZeros(const typename Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::dual_view_type::t_dev_const_um vals,
1294 Kokkos::View<bool*, typename Node::device_type> nonzeros) {
1295 using ATS = KokkosKernels::ArithTraits<Scalar>;
1296 using impl_ATS = KokkosKernels::ArithTraits<typename ATS::val_type>;
1297 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1298 TEUCHOS_ASSERT(vals.extent(0) == nonzeros.extent(0));
1299 const typename ATS::magnitudeType eps = 2.0 * impl_ATS::eps();
1300
1301 Kokkos::parallel_for(
1302 "MueLu:Maxwell1::FindNonZeros", range_type(0, vals.extent(0)),
1303 KOKKOS_LAMBDA(const size_t i) {
1304 nonzeros(i) = (impl_ATS::magnitude(vals(i, 0)) > eps);
1305 });
1306}
1307
1308template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1310 DetectDirichletColsAndDomains(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1311 const Teuchos::ArrayRCP<bool>& dirichletRows,
1312 Teuchos::ArrayRCP<bool> dirichletCols,
1313 Teuchos::ArrayRCP<bool> dirichletDomain) {
1314 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
1315 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> domMap = A.getDomainMap();
1316 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowMap = A.getRowMap();
1317 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> colMap = A.getColMap();
1318 TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == rowMap->getLocalNumElements());
1319 TEUCHOS_ASSERT(static_cast<size_t>(dirichletCols.size()) == colMap->getLocalNumElements());
1320 TEUCHOS_ASSERT(static_cast<size_t>(dirichletDomain.size()) == domMap->getLocalNumElements());
1321 RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> myColsToZero = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(colMap, 1, /*zeroOut=*/true);
1322 // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1323 for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1324 if (dirichletRows[i]) {
1325 ArrayView<const LocalOrdinal> indices;
1326 ArrayView<const Scalar> values;
1327 A.getLocalRowView(i, indices, values);
1328 for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1329 myColsToZero->replaceLocalValue(indices[j], 0, one);
1330 }
1331 }
1332
1333 RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> globalColsToZero;
1334 RCP<const Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node>> importer = A.getCrsGraph()->getImporter();
1335 if (!importer.is_null()) {
1336 globalColsToZero = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(domMap, 1, /*zeroOut=*/true);
1337 // export to domain map
1338 globalColsToZero->doExport(*myColsToZero, *importer, Xpetra::ADD);
1339 // import to column map
1340 myColsToZero->doImport(*globalColsToZero, *importer, Xpetra::INSERT);
1341 } else
1342 globalColsToZero = myColsToZero;
1343
1344 FindNonZeros(globalColsToZero->getData(0), dirichletDomain);
1345 FindNonZeros(myColsToZero->getData(0), dirichletCols);
1346}
1347
1348template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1350 DetectDirichletColsAndDomains(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1351 const Kokkos::View<bool*, typename Node::device_type>& dirichletRows,
1352 Kokkos::View<bool*, typename Node::device_type> dirichletCols,
1353 Kokkos::View<bool*, typename Node::device_type> dirichletDomain) {
1354 using ATS = KokkosKernels::ArithTraits<Scalar>;
1355 using impl_ATS = KokkosKernels::ArithTraits<typename ATS::val_type>;
1356 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1357 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> domMap = A.getDomainMap();
1358 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowMap = A.getRowMap();
1359 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> colMap = A.getColMap();
1360 TEUCHOS_ASSERT(dirichletRows.extent(0) == rowMap->getLocalNumElements());
1361 TEUCHOS_ASSERT(dirichletCols.extent(0) == colMap->getLocalNumElements());
1362 TEUCHOS_ASSERT(dirichletDomain.extent(0) == domMap->getLocalNumElements());
1363 RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> myColsToZero = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(colMap, /*zeroOut=*/true);
1364 // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1365 auto myColsToZeroView = myColsToZero->getLocalViewDevice(Tpetra::Access::ReadWrite);
1366 auto localMatrix = A.getLocalMatrixDevice();
1367 Kokkos::parallel_for(
1368 "MueLu:Maxwell1::DetectDirichletCols", range_type(0, rowMap->getLocalNumElements()),
1369 KOKKOS_LAMBDA(const LocalOrdinal row) {
1370 if (dirichletRows(row)) {
1371 auto rowView = localMatrix.row(row);
1372 auto length = rowView.length;
1373
1374 for (decltype(length) colID = 0; colID < length; colID++)
1375 myColsToZeroView(rowView.colidx(colID), 0) = impl_ATS::one();
1376 }
1377 });
1378
1379 RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> globalColsToZero;
1380 RCP<const Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node>> importer = A.getCrsGraph()->getImporter();
1381 if (!importer.is_null()) {
1382 globalColsToZero = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(domMap, /*zeroOut=*/true);
1383 // export to domain map
1384 globalColsToZero->doExport(*myColsToZero, *importer, Xpetra::ADD);
1385 // import to column map
1386 myColsToZero->doImport(*globalColsToZero, *importer, Xpetra::INSERT);
1387 } else
1388 globalColsToZero = myColsToZero;
1389 UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::FindNonZeros(globalColsToZero->getLocalViewDevice(Tpetra::Access::ReadOnly), dirichletDomain);
1390 UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::FindNonZeros(myColsToZero->getLocalViewDevice(Tpetra::Access::ReadOnly), dirichletCols);
1391}
1392
1393template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1395 ApplyRowSumCriterion(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol, Teuchos::ArrayRCP<bool>& dirichletRows) {
1396 typedef Teuchos::ScalarTraits<Scalar> STS;
1397 typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
1398 typedef Teuchos::ScalarTraits<MT> MTS;
1399 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowmap = A.getRowMap();
1400 for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1401 size_t nnz = A.getNumEntriesInLocalRow(row);
1402 ArrayView<const LocalOrdinal> indices;
1403 ArrayView<const Scalar> vals;
1404 A.getLocalRowView(row, indices, vals);
1405
1406 Scalar rowsum = STS::zero();
1407 Scalar diagval = STS::zero();
1408
1409 for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1410 LocalOrdinal col = indices[colID];
1411 if (row == col)
1412 diagval = vals[colID];
1413 rowsum += vals[colID];
1414 }
1415 // printf("A(%d,:) row_sum(point) = %6.4e\n",row,rowsum);
1416 if (rowSumTol < MTS::one() && STS::magnitude(rowsum) > STS::magnitude(diagval) * rowSumTol) {
1417 // printf("Row %d triggers rowsum\n",(int)row);
1418 dirichletRows[row] = true;
1419 }
1420 }
1421}
1422
1423template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1424void UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1425 ApplyRowSumCriterion(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, const Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>& BlockNumber, const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol, Teuchos::ArrayRCP<bool>& dirichletRows) {
1426 typedef Teuchos::ScalarTraits<Scalar> STS;
1427 typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
1428 typedef Teuchos::ScalarTraits<MT> MTS;
1429 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowmap = A.getRowMap();
1430
1431 TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "ApplyRowSumCriterion: BlockNumber must match's A's column map.");
1432
1433 Teuchos::ArrayRCP<const LocalOrdinal> block_id = BlockNumber.getData(0);
1434 for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1435 size_t nnz = A.getNumEntriesInLocalRow(row);
1436 ArrayView<const LocalOrdinal> indices;
1437 ArrayView<const Scalar> vals;
1438 A.getLocalRowView(row, indices, vals);
1439
1440 Scalar rowsum = STS::zero();
1441 Scalar diagval = STS::zero();
1442 for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1443 LocalOrdinal col = indices[colID];
1444 if (row == col)
1445 diagval = vals[colID];
1446 if (block_id[row] == block_id[col])
1447 rowsum += vals[colID];
1448 }
1449
1450 // printf("A(%d,:) row_sum(block) = %6.4e\n",row,rowsum);
1451 if (rowSumTol < MTS::one() && STS::magnitude(rowsum) > STS::magnitude(diagval) * rowSumTol) {
1452 // printf("Row %d triggers rowsum\n",(int)row);
1453 dirichletRows[row] = true;
1454 }
1455 }
1456}
1457
1458// Applies rowsum criterion
1459template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class memory_space>
1460void ApplyRowSumCriterion(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1461 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1462 Kokkos::View<bool*, memory_space>& dirichletRows) {
1463 typedef Teuchos::ScalarTraits<Scalar> STS;
1464 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowmap = A.getRowMap();
1465
1466 auto dirichletRowsHost = Kokkos::create_mirror_view(dirichletRows);
1467 Kokkos::deep_copy(dirichletRowsHost, dirichletRows);
1468
1469 for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1470 size_t nnz = A.getNumEntriesInLocalRow(row);
1471 ArrayView<const LocalOrdinal> indices;
1472 ArrayView<const Scalar> vals;
1473 A.getLocalRowView(row, indices, vals);
1474
1475 Scalar rowsum = STS::zero();
1476 Scalar diagval = STS::zero();
1477 for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1478 LocalOrdinal col = indices[colID];
1479 if (row == col)
1480 diagval = vals[colID];
1481 rowsum += vals[colID];
1482 }
1483 if (STS::real(rowsum) > STS::magnitude(diagval) * rowSumTol)
1484 dirichletRowsHost(row) = true;
1485 }
1486
1487 Kokkos::deep_copy(dirichletRows, dirichletRowsHost);
1488}
1489
1490template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1491void UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1492 ApplyRowSumCriterion(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1493 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1494 Kokkos::View<bool*, typename Node::device_type::memory_space>& dirichletRows) {
1495 MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, typename Node::device_type::memory_space>(A, rowSumTol, dirichletRows);
1496}
1497
1498template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1499void UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1500 ApplyRowSumCriterionHost(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1501 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1502 Kokkos::View<bool*, Kokkos::HostSpace>& dirichletRows) {
1503 MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, Kokkos::HostSpace>(A, rowSumTol, dirichletRows);
1504}
1505
1506// Applies rowsum criterion
1507template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class memory_space>
1508void ApplyRowSumCriterion(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1509 const Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>& BlockNumber,
1510 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1511 Kokkos::View<bool*, memory_space>& dirichletRows) {
1512 typedef Teuchos::ScalarTraits<Scalar> STS;
1513 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowmap = A.getRowMap();
1514
1515 TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "ApplyRowSumCriterion: BlockNumber must match's A's column map.");
1516
1517 auto dirichletRowsHost = Kokkos::create_mirror_view(dirichletRows);
1518 Kokkos::deep_copy(dirichletRowsHost, dirichletRows);
1519
1520 Teuchos::ArrayRCP<const LocalOrdinal> block_id = BlockNumber.getData(0);
1521 for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1522 size_t nnz = A.getNumEntriesInLocalRow(row);
1523 ArrayView<const LocalOrdinal> indices;
1524 ArrayView<const Scalar> vals;
1525 A.getLocalRowView(row, indices, vals);
1526
1527 Scalar rowsum = STS::zero();
1528 Scalar diagval = STS::zero();
1529 for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1530 LocalOrdinal col = indices[colID];
1531 if (row == col)
1532 diagval = vals[colID];
1533 if (block_id[row] == block_id[col])
1534 rowsum += vals[colID];
1535 }
1536 if (STS::real(rowsum) > STS::magnitude(diagval) * rowSumTol)
1537 dirichletRowsHost(row) = true;
1538 }
1539
1540 Kokkos::deep_copy(dirichletRows, dirichletRowsHost);
1541}
1542
1543template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1544void UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1545 ApplyRowSumCriterion(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1546 const Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>& BlockNumber,
1547 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1548 Kokkos::View<bool*, typename Node::device_type::memory_space>& dirichletRows) {
1549 MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, typename Node::device_type::memory_space>(A, BlockNumber, rowSumTol, dirichletRows);
1550}
1551
1552template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1553void UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1554 ApplyRowSumCriterionHost(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1555 const Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>& BlockNumber,
1556 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1557 Kokkos::View<bool*, Kokkos::HostSpace>& dirichletRows) {
1558 MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, Kokkos::HostSpace>(A, BlockNumber, rowSumTol, dirichletRows);
1559}
1560
1561template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1562Teuchos::ArrayRCP<const bool>
1564 DetectDirichletCols(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1565 const Teuchos::ArrayRCP<const bool>& dirichletRows) {
1566 Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
1567 Scalar one = Teuchos::ScalarTraits<Scalar>::one();
1568 Teuchos::RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> domMap = A.getDomainMap();
1569 Teuchos::RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> colMap = A.getColMap();
1570 Teuchos::RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> myColsToZero = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(colMap, 1);
1571 myColsToZero->putScalar(zero);
1572 // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1573 for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1574 if (dirichletRows[i]) {
1575 Teuchos::ArrayView<const LocalOrdinal> indices;
1576 Teuchos::ArrayView<const Scalar> values;
1577 A.getLocalRowView(i, indices, values);
1578 for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1579 myColsToZero->replaceLocalValue(indices[j], 0, one);
1580 }
1581 }
1582
1583 Teuchos::RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> globalColsToZero = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(domMap, 1);
1584 globalColsToZero->putScalar(zero);
1585 Teuchos::RCP<Xpetra::Export<LocalOrdinal, GlobalOrdinal, Node>> exporter = Xpetra::ExportFactory<LocalOrdinal, GlobalOrdinal, Node>::Build(colMap, domMap);
1586 // export to domain map
1587 globalColsToZero->doExport(*myColsToZero, *exporter, Xpetra::ADD);
1588 // import to column map
1589 myColsToZero->doImport(*globalColsToZero, *exporter, Xpetra::INSERT);
1590 Teuchos::ArrayRCP<const Scalar> myCols = myColsToZero->getData(0);
1591 Teuchos::ArrayRCP<bool> dirichletCols(colMap->getLocalNumElements(), true);
1592 Magnitude eps = Teuchos::ScalarTraits<Magnitude>::eps();
1593 for (size_t i = 0; i < colMap->getLocalNumElements(); i++) {
1594 dirichletCols[i] = Teuchos::ScalarTraits<Scalar>::magnitude(myCols[i]) > 2.0 * eps;
1595 }
1596 return dirichletCols;
1597}
1598
1599template <class SC, class LO, class GO, class NO>
1600Kokkos::View<bool*, typename NO::device_type>
1602 DetectDirichletCols(const Xpetra::Matrix<SC, LO, GO, NO>& A,
1603 const Kokkos::View<const bool*, typename NO::device_type>& dirichletRows) {
1604 using ATS = KokkosKernels::ArithTraits<SC>;
1605 using impl_ATS = KokkosKernels::ArithTraits<typename ATS::val_type>;
1606 using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1607
1608 SC zero = ATS::zero();
1609
1610 auto localMatrix = A.getLocalMatrixDevice();
1611 LO numRows = A.getLocalNumRows();
1612
1613 Teuchos::RCP<const Xpetra::Map<LO, GO, NO>> domMap = A.getDomainMap();
1614 Teuchos::RCP<const Xpetra::Map<LO, GO, NO>> colMap = A.getColMap();
1615 Teuchos::RCP<Xpetra::MultiVector<SC, LO, GO, NO>> myColsToZero = Xpetra::MultiVectorFactory<SC, LO, GO, NO>::Build(colMap, 1);
1616 myColsToZero->putScalar(zero);
1617 auto myColsToZeroView = myColsToZero->getLocalViewDevice(Tpetra::Access::ReadWrite);
1618 // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1619 Kokkos::parallel_for(
1620 "MueLu:Utils::DetectDirichletCols1", range_type(0, numRows),
1621 KOKKOS_LAMBDA(const LO row) {
1622 if (dirichletRows(row)) {
1623 auto rowView = localMatrix.row(row);
1624 auto length = rowView.length;
1625
1626 for (decltype(length) colID = 0; colID < length; colID++)
1627 myColsToZeroView(rowView.colidx(colID), 0) = impl_ATS::one();
1628 }
1629 });
1630
1631 Teuchos::RCP<Xpetra::MultiVector<SC, LO, GO, NO>> globalColsToZero = Xpetra::MultiVectorFactory<SC, LO, GO, NO>::Build(domMap, 1);
1632 globalColsToZero->putScalar(zero);
1633 Teuchos::RCP<Xpetra::Export<LO, GO, NO>> exporter = Xpetra::ExportFactory<LO, GO, NO>::Build(colMap, domMap);
1634 // export to domain map
1635 globalColsToZero->doExport(*myColsToZero, *exporter, Xpetra::ADD);
1636 // import to column map
1637 myColsToZero->doImport(*globalColsToZero, *exporter, Xpetra::INSERT);
1638
1639 auto myCols = myColsToZero->getLocalViewDevice(Tpetra::Access::ReadOnly);
1640 size_t numColEntries = colMap->getLocalNumElements();
1641 Kokkos::View<bool*, typename NO::device_type> dirichletCols(Kokkos::ViewAllocateWithoutInitializing("dirichletCols"), numColEntries);
1642 const typename ATS::magnitudeType eps = 2.0 * ATS::eps();
1643
1644 Kokkos::parallel_for(
1645 "MueLu:Utils::DetectDirichletCols2", range_type(0, numColEntries),
1646 KOKKOS_LAMBDA(const size_t i) {
1647 dirichletCols(i) = impl_ATS::magnitude(myCols(i, 0)) > eps;
1648 });
1649 return dirichletCols;
1650}
1651
1652template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1653Scalar
1655 Frobenius(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B) {
1656 // We check only row maps. Column may be different. One would hope that they are the same, as we typically
1657 // calculate frobenius norm of the specified sparsity pattern with an updated matrix from the previous step,
1658 // but matrix addition, even when one is submatrix of the other, changes column map (though change may be as
1659 // simple as couple of elements swapped)
1660 TEUCHOS_TEST_FOR_EXCEPTION(!A.getRowMap()->isSameAs(*B.getRowMap()), Exceptions::Incompatible, "MueLu::CGSolver::Frobenius: row maps are incompatible");
1661 TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete() || !B.isFillComplete(), Exceptions::RuntimeError, "Matrices must be fill completed");
1662
1663 const Map& AColMap = *A.getColMap();
1664 const Map& BColMap = *B.getColMap();
1665
1666 Teuchos::ArrayView<const LocalOrdinal> indA, indB;
1667 Teuchos::ArrayView<const Scalar> valA, valB;
1668 size_t nnzA = 0, nnzB = 0;
1669
1670 // We use a simple algorithm
1671 // for each row we fill valBAll array with the values in the corresponding row of B
1672 // as such, it serves as both sorted array and as storage, so we don't need to do a
1673 // tricky problem: "find a value in the row of B corresponding to the specific GID"
1674 // Once we do that, we translate LID of entries of row of A to LID of B, and multiply
1675 // corresponding entries.
1676 // The algorithm should be reasonably cheap, as it does not sort anything, provided
1677 // that getLocalElement and getGlobalElement functions are reasonably effective. It
1678 // *is* possible that the costs are hidden in those functions, but if maps are close
1679 // to linear maps, we should be fine
1680 Teuchos::Array<Scalar> valBAll(BColMap.getLocalNumElements());
1681
1682 LocalOrdinal invalid = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
1683 Scalar zero = Teuchos::ScalarTraits<Scalar>::zero(), f = zero, gf;
1684 size_t numRows = A.getLocalNumRows();
1685 for (size_t i = 0; i < numRows; i++) {
1686 A.getLocalRowView(i, indA, valA);
1687 B.getLocalRowView(i, indB, valB);
1688 nnzA = indA.size();
1689 nnzB = indB.size();
1690
1691 // Set up array values
1692 for (size_t j = 0; j < nnzB; j++)
1693 valBAll[indB[j]] = valB[j];
1694
1695 for (size_t j = 0; j < nnzA; j++) {
1696 // The cost of the whole Frobenius dot product function depends on the
1697 // cost of the getLocalElement and getGlobalElement functions here.
1698 LocalOrdinal ind = BColMap.getLocalElement(AColMap.getGlobalElement(indA[j]));
1699 if (ind != invalid)
1700 f += valBAll[ind] * valA[j];
1701 }
1702
1703 // Clean up array values
1704 for (size_t j = 0; j < nnzB; j++)
1705 valBAll[indB[j]] = zero;
1706 }
1707
1708 MueLu_sumAll(AColMap.getComm(), f, gf);
1709
1710 return gf;
1711}
1712
1713template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1716 // Distribute the seeds evenly in [1,maxint-1]. This guarantees nothing
1717 // about where in random number stream we are, but avoids overflow situations
1718 // in parallel when multiplying by a PID. It would be better to use
1719 // a good parallel random number generator.
1720 double one = 1.0;
1721 int maxint = INT_MAX; //= 2^31-1 = 2147483647 for 32-bit integers
1722 int mySeed = Teuchos::as<int>((maxint - 1) * (one - (comm.getRank() + 1) / (comm.getSize() + one)));
1723 if (mySeed < 1 || mySeed == maxint) {
1724 std::ostringstream errStr;
1725 errStr << "Error detected with random seed = " << mySeed << ". It should be in the interval [1,2^31-2].";
1726 throw Exceptions::RuntimeError(errStr.str());
1727 }
1728 std::srand(mySeed);
1729 // For Tpetra, we could use Kokkos' random number generator here.
1730 Teuchos::ScalarTraits<Scalar>::seedrandom(mySeed);
1731}
1732
1733template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1735 FindDirichletRows(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1736 std::vector<LocalOrdinal>& dirichletRows, bool count_twos_as_dirichlet) {
1737 typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
1738 dirichletRows.resize(0);
1739 for (size_t i = 0; i < A->getLocalNumRows(); i++) {
1740 Teuchos::ArrayView<const LocalOrdinal> indices;
1741 Teuchos::ArrayView<const Scalar> values;
1742 A->getLocalRowView(i, indices, values);
1743 int nnz = 0;
1744 for (size_t j = 0; j < (size_t)indices.size(); j++) {
1745 if (Teuchos::ScalarTraits<Scalar>::magnitude(values[j]) > Teuchos::ScalarTraits<MT>::eps()) {
1746 nnz++;
1747 }
1748 }
1749 if (nnz == 1 || (count_twos_as_dirichlet && nnz == 2)) {
1750 dirichletRows.push_back(i);
1751 }
1752 }
1753}
1754
1755template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1757 ApplyOAZToMatrixRows(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1758 const std::vector<LocalOrdinal>& dirichletRows) {
1759 RCP<const Map> Rmap = A->getRowMap();
1760 RCP<const Map> Cmap = A->getColMap();
1761 Scalar one = Teuchos::ScalarTraits<Scalar>::one();
1762 Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
1763
1764 for (size_t i = 0; i < dirichletRows.size(); i++) {
1765 GlobalOrdinal row_gid = Rmap->getGlobalElement(dirichletRows[i]);
1766
1767 Teuchos::ArrayView<const LocalOrdinal> indices;
1768 Teuchos::ArrayView<const Scalar> values;
1769 A->getLocalRowView(dirichletRows[i], indices, values);
1770 // NOTE: This won't work with fancy node types.
1771 Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1772 for (size_t j = 0; j < (size_t)indices.size(); j++) {
1773 if (Cmap->getGlobalElement(indices[j]) == row_gid)
1774 valuesNC[j] = one;
1775 else
1776 valuesNC[j] = zero;
1777 }
1778 }
1779}
1780
1781template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1783 ApplyOAZToMatrixRows(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1784 const Teuchos::ArrayRCP<const bool>& dirichletRows) {
1785 TEUCHOS_ASSERT(A->isFillComplete());
1786 RCP<const Map> domMap = A->getDomainMap();
1787 RCP<const Map> ranMap = A->getRangeMap();
1788 RCP<const Map> Rmap = A->getRowMap();
1789 RCP<const Map> Cmap = A->getColMap();
1790 TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == Rmap->getLocalNumElements());
1791 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
1792 const Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
1793 A->resumeFill();
1794 for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1795 if (dirichletRows[i]) {
1796 GlobalOrdinal row_gid = Rmap->getGlobalElement(i);
1797
1798 Teuchos::ArrayView<const LocalOrdinal> indices;
1799 Teuchos::ArrayView<const Scalar> values;
1800 A->getLocalRowView(i, indices, values);
1801
1802 Teuchos::ArrayRCP<Scalar> valuesNC(values.size());
1803 for (size_t j = 0; j < (size_t)indices.size(); j++) {
1804 if (Cmap->getGlobalElement(indices[j]) == row_gid)
1805 valuesNC[j] = one;
1806 else
1807 valuesNC[j] = zero;
1808 }
1809 A->replaceLocalValues(i, indices, valuesNC());
1810 }
1811 }
1812 A->fillComplete(domMap, ranMap);
1813}
1814
1815template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1817 ApplyOAZToMatrixRows(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1818 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows) {
1819 TEUCHOS_ASSERT(A->isFillComplete());
1820 using ATS = KokkosKernels::ArithTraits<Scalar>;
1821 using impl_ATS = KokkosKernels::ArithTraits<typename ATS::val_type>;
1822 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1823
1824 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> domMap = A->getDomainMap();
1825 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> ranMap = A->getRangeMap();
1826 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> Rmap = A->getRowMap();
1827 RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> Cmap = A->getColMap();
1828
1829 TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == Rmap->getLocalNumElements());
1830
1831 auto localMatrix = A->getLocalMatrixDevice();
1832 auto localRmap = Rmap->getLocalMap();
1833 auto localCmap = Cmap->getLocalMap();
1834
1835 Kokkos::parallel_for(
1836 "MueLu::Utils::ApplyOAZ", range_type(0, dirichletRows.extent(0)),
1837 KOKKOS_LAMBDA(const LocalOrdinal row) {
1838 if (dirichletRows(row)) {
1839 auto rowView = localMatrix.row(row);
1840 auto length = rowView.length;
1841 auto row_gid = localRmap.getGlobalElement(row);
1842 auto row_lid = localCmap.getLocalElement(row_gid);
1843
1844 for (decltype(length) colID = 0; colID < length; colID++)
1845 if (rowView.colidx(colID) == row_lid)
1846 rowView.value(colID) = impl_ATS::one();
1847 else
1848 rowView.value(colID) = impl_ATS::zero();
1849 }
1850 });
1851}
1852
1853template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1855 ZeroDirichletRows(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1856 const std::vector<LocalOrdinal>& dirichletRows,
1857 Scalar replaceWith) {
1858 for (size_t i = 0; i < dirichletRows.size(); i++) {
1859 Teuchos::ArrayView<const LocalOrdinal> indices;
1860 Teuchos::ArrayView<const Scalar> values;
1861 A->getLocalRowView(dirichletRows[i], indices, values);
1862 // NOTE: This won't work with fancy node types.
1863 Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1864 for (size_t j = 0; j < (size_t)indices.size(); j++)
1865 valuesNC[j] = replaceWith;
1866 }
1867}
1868
1869template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1871 ZeroDirichletRows(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1872 const Teuchos::ArrayRCP<const bool>& dirichletRows,
1873 Scalar replaceWith) {
1874 TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == A->getRowMap()->getLocalNumElements());
1875 for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1876 if (dirichletRows[i]) {
1877 Teuchos::ArrayView<const LocalOrdinal> indices;
1878 Teuchos::ArrayView<const Scalar> values;
1879 A->getLocalRowView(i, indices, values);
1880 // NOTE: This won't work with fancy node types.
1881 Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1882 for (size_t j = 0; j < (size_t)indices.size(); j++)
1883 valuesNC[j] = replaceWith;
1884 }
1885 }
1886}
1887
1888template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1890 ZeroDirichletRows(Teuchos::RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& X,
1891 const Teuchos::ArrayRCP<const bool>& dirichletRows,
1892 Scalar replaceWith) {
1893 TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == X->getMap()->getLocalNumElements());
1894 for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1895 if (dirichletRows[i]) {
1896 for (size_t j = 0; j < X->getNumVectors(); j++)
1897 X->replaceLocalValue(i, j, replaceWith);
1898 }
1899 }
1900}
1901
1902template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1904 ZeroDirichletRows(RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1905 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
1906 Scalar replaceWith) {
1907 using ATS = KokkosKernels::ArithTraits<Scalar>;
1908 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1909
1910 typename ATS::val_type impl_replaceWith = replaceWith;
1911
1912 auto localMatrix = A->getLocalMatrixDevice();
1913 LocalOrdinal numRows = A->getLocalNumRows();
1914
1915 Kokkos::parallel_for(
1916 "MueLu:Utils::ZeroDirichletRows", range_type(0, numRows),
1917 KOKKOS_LAMBDA(const LocalOrdinal row) {
1918 if (dirichletRows(row)) {
1919 auto rowView = localMatrix.row(row);
1920 auto length = rowView.length;
1921 for (decltype(length) colID = 0; colID < length; colID++)
1922 rowView.value(colID) = impl_replaceWith;
1923 }
1924 });
1925}
1926
1927template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1928void UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1929 ZeroDirichletRows(RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& X,
1930 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
1931 Scalar replaceWith) {
1932 using ATS = KokkosKernels::ArithTraits<Scalar>;
1933 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1934
1935 typename ATS::val_type impl_replaceWith = replaceWith;
1936
1937 auto myCols = X->getLocalViewDevice(Tpetra::Access::ReadWrite);
1938 size_t numVecs = X->getNumVectors();
1939 Kokkos::parallel_for(
1940 "MueLu:Utils::ZeroDirichletRows_MV", range_type(0, dirichletRows.size()),
1941 KOKKOS_LAMBDA(const size_t i) {
1942 if (dirichletRows(i)) {
1943 for (size_t j = 0; j < numVecs; j++)
1944 myCols(i, j) = impl_replaceWith;
1945 }
1946 });
1947}
1948
1949template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1951 ZeroDirichletCols(Teuchos::RCP<Matrix>& A,
1952 const Teuchos::ArrayRCP<const bool>& dirichletCols,
1953 Scalar replaceWith) {
1954 TEUCHOS_ASSERT(static_cast<size_t>(dirichletCols.size()) == A->getColMap()->getLocalNumElements());
1955 for (size_t i = 0; i < A->getLocalNumRows(); i++) {
1956 Teuchos::ArrayView<const LocalOrdinal> indices;
1957 Teuchos::ArrayView<const Scalar> values;
1958 A->getLocalRowView(i, indices, values);
1959 // NOTE: This won't work with fancy node types.
1960 Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1961 for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1962 if (dirichletCols[indices[j]])
1963 valuesNC[j] = replaceWith;
1964 }
1965}
1966
1967template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1969 ZeroDirichletCols(RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1970 const Kokkos::View<const bool*, typename Node::device_type>& dirichletCols,
1971 Scalar replaceWith) {
1972 using ATS = KokkosKernels::ArithTraits<Scalar>;
1973 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1974
1975 typename ATS::val_type impl_replaceWith = replaceWith;
1976
1977 auto localMatrix = A->getLocalMatrixDevice();
1978 LocalOrdinal numRows = A->getLocalNumRows();
1979
1980 Kokkos::parallel_for(
1981 "MueLu:Utils::ZeroDirichletCols", range_type(0, numRows),
1982 KOKKOS_LAMBDA(const LocalOrdinal row) {
1983 auto rowView = localMatrix.row(row);
1984 auto length = rowView.length;
1985 for (decltype(length) colID = 0; colID < length; colID++)
1986 if (dirichletCols(rowView.colidx(colID))) {
1987 rowView.value(colID) = impl_replaceWith;
1988 }
1989 });
1990}
1991
1992template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1994 FindDirichletRowsAndPropagateToCols(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
1995 Teuchos::RCP<Xpetra::Vector<int, LocalOrdinal, GlobalOrdinal, Node>>& isDirichletRow,
1996 Teuchos::RCP<Xpetra::Vector<int, LocalOrdinal, GlobalOrdinal, Node>>& isDirichletCol) {
1997 // Make sure A's RowMap == DomainMap
1998 if (!A->getRowMap()->isSameAs(*A->getDomainMap())) {
1999 throw std::runtime_error("UtilitiesBase::FindDirichletRowsAndPropagateToCols row and domain maps must match.");
2000 }
2001 RCP<const Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node>> importer = A->getCrsGraph()->getImporter();
2002 bool has_import = !importer.is_null();
2003
2004 // Find the Dirichlet rows
2005 std::vector<LocalOrdinal> dirichletRows;
2006 FindDirichletRows(A, dirichletRows);
2007
2008#if 0
2009 printf("[%d] DirichletRow Ids = ",A->getRowMap()->getComm()->getRank());
2010 for(size_t i=0; i<(size_t) dirichletRows.size(); i++)
2011 printf("%d ",dirichletRows[i]);
2012 printf("\n");
2013 fflush(stdout);
2014#endif
2015 // Allocate all as non-Dirichlet
2016 isDirichletRow = Xpetra::VectorFactory<int, LocalOrdinal, GlobalOrdinal, Node>::Build(A->getRowMap(), true);
2017 isDirichletCol = Xpetra::VectorFactory<int, LocalOrdinal, GlobalOrdinal, Node>::Build(A->getColMap(), true);
2018
2019 {
2020 Teuchos::ArrayRCP<int> dr_rcp = isDirichletRow->getDataNonConst(0);
2021 Teuchos::ArrayView<int> dr = dr_rcp();
2022 Teuchos::ArrayRCP<int> dc_rcp = isDirichletCol->getDataNonConst(0);
2023 Teuchos::ArrayView<int> dc = dc_rcp();
2024 for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
2025 dr[dirichletRows[i]] = 1;
2026 if (!has_import) dc[dirichletRows[i]] = 1;
2027 }
2028 }
2029
2030 if (has_import)
2031 isDirichletCol->doImport(*isDirichletRow, *importer, Xpetra::CombineMode::ADD);
2032}
2033
2034template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2035RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
2037 ReplaceNonZerosWithOnes(const RCP<Matrix>& original) {
2038 using ISC = typename KokkosKernels::ArithTraits<Scalar>::val_type;
2039 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
2040 using local_matrix_type = typename CrsMatrix::local_matrix_device_type;
2041 using values_type = typename local_matrix_type::values_type;
2042
2043 const ISC ONE = KokkosKernels::ArithTraits<ISC>::one();
2044 const ISC ZERO = KokkosKernels::ArithTraits<ISC>::zero();
2045
2046 // Copy the values array of the old matrix to a new array, replacing all the non-zeros with one
2047 auto localMatrix = original->getLocalMatrixDevice();
2048 TEUCHOS_TEST_FOR_EXCEPTION(!original->hasCrsGraph(), Exceptions::RuntimeError, "ReplaceNonZerosWithOnes: Cannot get CrsGraph");
2049 values_type new_values("values", localMatrix.nnz());
2050
2051 Kokkos::parallel_for(
2052 "ReplaceNonZerosWithOnes", range_type(0, localMatrix.nnz()), KOKKOS_LAMBDA(const size_t i) {
2053 if (localMatrix.values(i) != ZERO)
2054 new_values(i) = ONE;
2055 else
2056 new_values(i) = ZERO;
2057 });
2058
2059 // Build the new matrix
2060 RCP<Matrix> NewMatrix = Xpetra::MatrixFactory<SC, LO, GO, NO>::Build(original->getCrsGraph(), new_values);
2061 TEUCHOS_TEST_FOR_EXCEPTION(NewMatrix.is_null(), Exceptions::RuntimeError, "ReplaceNonZerosWithOnes: MatrixFactory::Build() did not return matrix");
2062 NewMatrix->fillComplete(original->getDomainMap(), original->getRangeMap());
2063 return NewMatrix;
2064}
2065
2066template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2067RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
2069 // Create Minv via sparse apprximate inverse
2070
2071 Level miniLevel;
2072 Teuchos::RCP<MueLu::FactoryManager<SC, LO, GO, NO>> factoryHandler = Teuchos::rcp(new MueLu::FactoryManager<SC, LO, GO, NO>());
2073 miniLevel.SetFactoryManager(factoryHandler);
2074 miniLevel.SetLevelID(0);
2075#ifdef HAVE_MUELU_TIMER_SYNCHRONIZATION
2076 miniLevel.SetComm(M->getRowMap()->getComm());
2077#endif
2078 miniLevel.Set("A", M);
2079
2080 auto invapproxFact = rcp(new InverseApproximationFactory());
2081 invapproxFact->SetFactory("A", MueLu::NoFactory::getRCP());
2082 invapproxFact->SetParameter("inverse: approximation type", Teuchos::ParameterEntry(std::string("sparseapproxinverse")));
2083 miniLevel.Request("Ainv", invapproxFact.get());
2084 invapproxFact->Build(miniLevel);
2085 RCP<Matrix> NewMatrix = miniLevel.Get<RCP<Matrix>>("Ainv", invapproxFact.get());
2086 TEUCHOS_TEST_FOR_EXCEPTION(NewMatrix.is_null(), Exceptions::RuntimeError, "SPAI: MatrixFactory::Build() did not return matrix");
2087 return NewMatrix;
2088}
2089
2090template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2091RCP<const Xpetra::BlockedMap<LocalOrdinal, GlobalOrdinal, Node>>
2093 GeneratedBlockedTargetMap(const Xpetra::BlockedMap<LocalOrdinal, GlobalOrdinal, Node>& sourceBlockedMap,
2094 const Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node>& Importer) {
2095 typedef Xpetra::Vector<int, LocalOrdinal, GlobalOrdinal, Node> IntVector;
2096 Xpetra::UnderlyingLib lib = sourceBlockedMap.lib();
2097
2098 // De-stride the map if we have to (might regret this later)
2099 RCP<const Map> fullMap = sourceBlockedMap.getMap();
2100 RCP<const Map> stridedMap = Teuchos::rcp_dynamic_cast<const Xpetra::StridedMap<LocalOrdinal, GlobalOrdinal, Node>>(fullMap);
2101 if (!stridedMap.is_null()) fullMap = stridedMap->getMap();
2102
2103 // Initial sanity checking for map compatibil
2104 const size_t numSubMaps = sourceBlockedMap.getNumMaps();
2105 if (!Importer.getSourceMap()->isCompatible(*fullMap))
2106 throw std::runtime_error("GenerateBlockedTargetMap(): Map compatibility error");
2107
2108 // Build an indicator vector
2109 RCP<IntVector> block_ids = Xpetra::VectorFactory<int, LocalOrdinal, GlobalOrdinal, Node>::Build(fullMap);
2110
2111 for (size_t i = 0; i < numSubMaps; i++) {
2112 RCP<const Map> map = sourceBlockedMap.getMap(i);
2113
2114 for (size_t j = 0; j < map->getLocalNumElements(); j++) {
2115 LocalOrdinal jj = fullMap->getLocalElement(map->getGlobalElement(j));
2116 block_ids->replaceLocalValue(jj, (int)i);
2117 }
2118 }
2119
2120 // Get the block ids for the new map
2121 RCP<const Map> targetMap = Importer.getTargetMap();
2122 RCP<IntVector> new_block_ids = Xpetra::VectorFactory<int, LocalOrdinal, GlobalOrdinal, Node>::Build(targetMap);
2123 new_block_ids->doImport(*block_ids, Importer, Xpetra::CombineMode::ADD);
2124 Teuchos::ArrayRCP<const int> dataRCP = new_block_ids->getData(0);
2125 Teuchos::ArrayView<const int> data = dataRCP();
2126
2127 // Get the GIDs for each subblock
2128 Teuchos::Array<Teuchos::Array<GlobalOrdinal>> elementsInSubMap(numSubMaps);
2129 for (size_t i = 0; i < targetMap->getLocalNumElements(); i++) {
2130 elementsInSubMap[data[i]].push_back(targetMap->getGlobalElement(i));
2131 }
2132
2133 // Generate the new submaps
2134 std::vector<RCP<const Map>> subMaps(numSubMaps);
2135 for (size_t i = 0; i < numSubMaps; i++) {
2136 subMaps[i] = Xpetra::MapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build(lib, Teuchos::OrdinalTraits<GlobalOrdinal>::invalid(), elementsInSubMap[i](), targetMap->getIndexBase(), targetMap->getComm());
2137 }
2138
2139 // Build the BlockedMap
2140 return rcp(new BlockedMap(targetMap, subMaps));
2141}
2142
2143template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2145 MapsAreNested(const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>& rowMap, const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>& colMap) {
2146 if ((rowMap.lib() == Xpetra::UseTpetra) && (colMap.lib() == Xpetra::UseTpetra)) {
2147 auto tpRowMap = toTpetra(rowMap);
2148 auto tpColMap = toTpetra(colMap);
2149 return tpColMap.isLocallyFitted(tpRowMap);
2150 }
2151
2152 ArrayView<const GlobalOrdinal> rowElements = rowMap.getLocalElementList();
2153 ArrayView<const GlobalOrdinal> colElements = colMap.getLocalElementList();
2154
2155 const size_t numElements = rowElements.size();
2156
2157 if (size_t(colElements.size()) < numElements)
2158 return false;
2159
2160 bool goodMap = true;
2161 for (size_t i = 0; i < numElements; i++)
2162 if (rowElements[i] != colElements[i]) {
2163 goodMap = false;
2164 break;
2165 }
2166
2167 return goodMap;
2168}
2169
2170template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2171Teuchos::RCP<Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>>
2173 ReverseCuthillMcKee(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op) {
2174 using local_matrix_type = typename Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type;
2175 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
2176 using lno_nnz_view_t = typename local_graph_type::entries_type::non_const_type;
2177 using device = typename local_graph_type::device_type;
2178 using execution_space = typename local_matrix_type::execution_space;
2179 using ordinal_type = typename local_matrix_type::ordinal_type;
2180
2181 local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
2182
2183 lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm<device, typename local_graph_type::row_map_type, typename local_graph_type::entries_type, lno_nnz_view_t>(localGraph.row_map, localGraph.entries);
2184
2185 RCP<Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>> retval =
2186 Xpetra::VectorFactory<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(Op.getRowMap());
2187
2188 // Copy out and reorder data
2189 auto view1D = Kokkos::subview(retval->getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
2190 Kokkos::parallel_for(
2191 "Utilities::ReverseCuthillMcKee",
2192 Kokkos::RangePolicy<ordinal_type, execution_space>(0, localGraph.numRows()),
2193 KOKKOS_LAMBDA(const ordinal_type rowIdx) {
2194 view1D(rcmOrder(rowIdx)) = rowIdx;
2195 });
2196 return retval;
2197}
2198
2199template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2200Teuchos::RCP<Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>>
2202 CuthillMcKee(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op) {
2203 using local_matrix_type = typename Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type;
2204 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
2205 using lno_nnz_view_t = typename local_graph_type::entries_type::non_const_type;
2206 using device = typename local_graph_type::device_type;
2207 using execution_space = typename local_matrix_type::execution_space;
2208 using ordinal_type = typename local_matrix_type::ordinal_type;
2209
2210 local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
2211 LocalOrdinal numRows = localGraph.numRows();
2212
2213 lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm<device, typename local_graph_type::row_map_type, typename local_graph_type::entries_type, lno_nnz_view_t>(localGraph.row_map, localGraph.entries);
2214
2215 RCP<Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>> retval =
2216 Xpetra::VectorFactory<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(Op.getRowMap());
2217
2218 // Copy out data
2219 auto view1D = Kokkos::subview(retval->getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
2220 // Since KokkosKernels produced RCM, also reverse the order of the view to get CM
2221 Kokkos::parallel_for(
2222 "Utilities::ReverseCuthillMcKee",
2223 Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
2224 KOKKOS_LAMBDA(const ordinal_type rowIdx) {
2225 view1D(rcmOrder(numRows - 1 - rowIdx)) = rowIdx;
2226 });
2227 return retval;
2228}
2229
2230template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2232 TripleMatrixProduct(const Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& R,
2233 const Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
2234 const Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& P,
2235 Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& Ac,
2236 const Teuchos::ParameterList& pL,
2237 const MueLu::BaseClass& verbObj,
2238 Teuchos::RCP<Teuchos::ParameterList>& APparams,
2239 Teuchos::RCP<Teuchos::ParameterList>& RAPparams,
2240 Level* coarseLevel) {
2241 using Matrix = Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
2242 using MatrixMatrix = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
2243 using MatrixFactory = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
2245
2246 const bool doTranspose = true;
2247 const bool doFillComplete = true;
2248 const bool doOptimizeStorage = true;
2249
2250 const bool useImplicit = pL.get<bool>("transpose: use implicit");
2251
2252 const std::string matrixName = (pL.isType<std::string>("Matrix name")) ? pL.get<std::string>("Matrix name") : "A";
2253 const std::string prolongatorName = (pL.isType<std::string>("Prolongator name")) ? pL.get<std::string>("Prolongator name") : "P";
2254 const std::string restrictorName = (pL.isType<std::string>("Restrictor name")) ? pL.get<std::string>("Restrictor name") : "R";
2255 std::string coarseMatrixName;
2256 if (pL.isType<std::string>("coarseMatrixName"))
2257 coarseMatrixName = pL.get<std::string>("coarseMatrixName");
2258 else {
2259 if (matrixName.size() == 1)
2260 coarseMatrixName = matrixName + "c";
2261 else
2262 coarseMatrixName = matrixName + "_coarse";
2263 }
2264
2265 std::string levelstr, labelstr;
2266 if (coarseLevel != nullptr) {
2267 std::ostringstream levelss;
2268 levelss << coarseLevel->GetLevelID();
2269 levelstr = levelss.str();
2270 labelstr = FormattingHelper::getColonLabel(coarseLevel->getObjectLabel());
2271 }
2272
2273 bool isGPU = Node::is_gpu;
2274
2275 // Reuse coarse matrix memory if available (multiple solve)
2276 if (RAPparams.is_null())
2277 RAPparams = rcp(new ParameterList);
2278 if (pL.isSublist("matrixmatrix: kernel params"))
2279 RAPparams->setParameters(pL.sublist("matrixmatrix: kernel params"));
2280
2281 if (RAPparams->isParameter("graph")) {
2282 Ac = RAPparams->get<RCP<Matrix>>("graph");
2283
2284 // Some eigenvalue may have been cached with the matrix in the previous run.
2285 // As the matrix values will be updated, we need to reset the eigenvalue.
2286 Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<Scalar>::one());
2287 }
2288
2289 // We *always* need global constants for the RAP, but not for the temps
2290 RAPparams->set("compute global constants: temporaries", RAPparams->get("compute global constants: temporaries", false));
2291 RAPparams->set("compute global constants", true);
2292
2293 if (pL.get<bool>("rap: triple product") == false || isGPU) {
2294 if (pL.get<bool>("rap: triple product") && isGPU)
2295 verbObj.GetOStream(Warnings1) << "Switching from triple product to R x (A x P) since triple product has not been implemented for "
2296 << Node::execution_space::name() << std::endl;
2297
2298 RCP<Matrix> AP;
2299
2300 // Reuse pattern if available (multiple solve)
2301 if (APparams.is_null())
2302 APparams = rcp(new ParameterList);
2303 if (pL.isSublist("matrixmatrix: kernel params"))
2304 APparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
2305
2306 // By default, we don't need global constants for A*P
2307 APparams->set("compute global constants: temporaries", APparams->get("compute global constants: temporaries", false));
2308 APparams->set("compute global constants", APparams->get("compute global constants", false));
2309
2310 if (APparams->isParameter("graph"))
2311 AP = APparams->get<RCP<Matrix>>("graph");
2312
2313 std::string monitorstrAP = "MxM: " + matrixName + " x " + prolongatorName;
2314 std::string timerstrAP = "MueLu::" + matrixName + "*" + prolongatorName;
2315 if (!labelstr.empty())
2316 timerstrAP = labelstr + timerstrAP;
2317 if (!levelstr.empty())
2318 timerstrAP = timerstrAP + "-" + levelstr;
2319
2320 {
2321 SubFactoryMonitor subM(verbObj, monitorstrAP, *coarseLevel);
2322
2323 AP = MatrixMatrix::Multiply(*A, !doTranspose, *P, !doTranspose, AP, verbObj.GetOStream(Statistics2),
2324 doFillComplete, doOptimizeStorage, timerstrAP, APparams);
2325 }
2326
2327 // Allow optimization of storage.
2328 // This is necessary for new faster Epetra MM kernels.
2329 // Seems to work with matrix modifications to repair diagonal entries.
2330
2331 std::string timerstrRAP, monitorstrRAP;
2332 if (useImplicit) {
2333 monitorstrRAP = "MxM: " + prolongatorName + "' x (" + matrixName + prolongatorName + ") (implicit)";
2334 timerstrRAP = "MueLu::" + restrictorName + "*(" + matrixName + "*" + prolongatorName + ")-implicit";
2335 } else {
2336 monitorstrRAP = "MxM: " + restrictorName + " x (" + matrixName + prolongatorName + ") (explicit)";
2337 timerstrRAP = "MueLu::" + restrictorName + "*(" + matrixName + "*" + prolongatorName + ")-explicit";
2338 }
2339 if (!labelstr.empty())
2340 timerstrRAP = labelstr + timerstrRAP;
2341 if (!levelstr.empty())
2342 timerstrRAP = timerstrRAP + "-" + levelstr;
2343
2344 if (useImplicit) {
2345 SubFactoryMonitor m2(verbObj, monitorstrRAP, *coarseLevel);
2346
2347 Ac = MatrixMatrix::Multiply(*P, doTranspose, *AP, !doTranspose, Ac, verbObj.GetOStream(Statistics2),
2348 doFillComplete, doOptimizeStorage, timerstrRAP, RAPparams);
2349
2350 } else {
2351 SubFactoryMonitor m2(verbObj, monitorstrRAP, *coarseLevel);
2352
2353 Ac = MatrixMatrix::Multiply(*R, !doTranspose, *AP, !doTranspose, Ac, verbObj.GetOStream(Statistics2),
2354 doFillComplete, doOptimizeStorage, timerstrRAP, RAPparams);
2355 }
2356
2357 if (!isGPU) {
2358 APparams->set("graph", AP);
2359 }
2360
2361 } else {
2362 std::string monitorstrRAP;
2363 std::string timerstrRAP;
2364 if (useImplicit) {
2365 monitorstrRAP = "MxMxM: " + restrictorName + " x " + matrixName + " x " + prolongatorName + " (implicit)";
2366 timerstrRAP = "MueLu::" + restrictorName + "*" + matrixName + "*" + prolongatorName + "-implicit";
2367 } else {
2368 monitorstrRAP = "MxMxM: " + restrictorName + " x " + matrixName + " x " + prolongatorName + " (explicit)";
2369 timerstrRAP = "MueLu::" + restrictorName + "*" + matrixName + "*" + prolongatorName + "-explicit";
2370 }
2371 if (!labelstr.empty())
2372 timerstrRAP = labelstr + timerstrRAP;
2373 if (!levelstr.empty())
2374 timerstrRAP = timerstrRAP + "-" + levelstr;
2375
2376 if (useImplicit) {
2377 Ac = MatrixFactory::Build(P->getDomainMap(), Teuchos::as<LocalOrdinal>(0));
2378
2379 SubFactoryMonitor m2(verbObj, monitorstrRAP, *coarseLevel);
2380
2381 Xpetra::TripleMatrixMultiply<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2382 MultiplyRAP(*P, doTranspose, *A, !doTranspose, *P, !doTranspose, *Ac, doFillComplete,
2383 doOptimizeStorage, timerstrRAP,
2384 RAPparams);
2385 } else {
2386 Ac = MatrixFactory::Build(R->getRowMap(), Teuchos::as<LocalOrdinal>(0));
2387
2388 SubFactoryMonitor m2(verbObj, monitorstrRAP, *coarseLevel);
2389
2390 Xpetra::TripleMatrixMultiply<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2391 MultiplyRAP(*R, !doTranspose, *A, !doTranspose, *P, !doTranspose, *Ac, doFillComplete,
2392 doOptimizeStorage, timerstrRAP,
2393 RAPparams);
2394 }
2395 }
2396
2397 Teuchos::ArrayView<const double> relativeFloor = pL.get<Teuchos::Array<double>>("rap: relative diagonal floor")();
2398 if (relativeFloor.size() > 0) {
2399 Xpetra::MatrixUtils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::RelativeDiagonalBoost(Ac, relativeFloor, verbObj.GetOStream(Statistics2));
2400 }
2401
2402 bool repairZeroDiagonals = pL.get<bool>("RepairMainDiagonal") || pL.get<bool>("rap: fix zero diagonals");
2403 bool checkAc = pL.get<bool>("CheckMainDiagonal") || pL.get<bool>("rap: fix zero diagonals");
2404
2405 if (checkAc || repairZeroDiagonals) {
2406 using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
2407 magnitudeType threshold;
2408 if (pL.isType<magnitudeType>("rap: fix zero diagonals threshold"))
2409 threshold = pL.get<magnitudeType>("rap: fix zero diagonals threshold");
2410 else
2411 threshold = Teuchos::as<magnitudeType>(pL.get<double>("rap: fix zero diagonals threshold"));
2412 Scalar replacement = Teuchos::as<Scalar>(pL.get<double>("rap: fix zero diagonals replacement"));
2413 Xpetra::MatrixUtils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::CheckRepairMainDiagonal(Ac, repairZeroDiagonals, verbObj.GetOStream(Warnings1), threshold, replacement);
2414 }
2415
2416 if (verbObj.IsPrint(Statistics2)) {
2417 RCP<ParameterList> params = rcp(new ParameterList());
2418 params->set("printLoadBalancingInfo", true);
2419 params->set("printCommInfo", true);
2420 verbObj.GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ac, coarseMatrixName, params);
2421 }
2422
2423 if (!isGPU) {
2424 RAPparams->set("graph", Ac);
2425 }
2426
2427 if (Behavior::debug())
2428 MatrixUtils::checkLocalRowMapMatchesColMap(*Ac);
2429}
2430
2431} // namespace MueLu
2432
2433#define MUELU_UTILITIESBASE_SHORT
2434#endif // MUELU_UTILITIESBASE_DEF_HPP
2435
2436// LocalWords: LocalOrdinal
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
MueLu::DefaultNode Node
Base class for MueLu classes.
Exception throws to report incompatible objects (like maps).
Exception throws to report errors in the internal logical of the program.
This class specifies the default factory that should generate some data on a Level if the data does n...
Factory for building the approximate inverse of a matrix.
Class that holds all level-specific information.
void SetComm(RCP< const Teuchos::Comm< int > > const &comm)
void SetLevelID(int levelID)
Set level number.
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
void Request(const FactoryBase &factory)
Increment the storage counter for all the inputs of a factory.
void SetFactoryManager(const RCP< const FactoryManagerBase > &factoryManager)
Set default factories (used internally by Hierarchy::SetLevel()).
static const RCP< const NoFactory > getRCP()
Static Get() functions.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
Teuchos::ScalarTraits< Scalar >::magnitudeType Magnitude
static Teuchos::RCP< Vector > GetInverse(Teuchos::RCP< const Vector > v, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps() *100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero())
Return vector containing inverse of input vector.
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps() *100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool doLumped=false)
Extract Matrix Diagonal.
static RCP< Xpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > GetThresholdedGraph(const RCP< Matrix > &A, const Magnitude threshold)
Threshold a graph.
static Teuchos::ArrayRCP< Scalar > GetMatrixDiagonal_arcp(const Matrix &A)
Extract Matrix Diagonal.
static RCP< Matrix > GetThresholdedMatrix(const RCP< Matrix > &Ain, const Magnitude threshold, const bool keepDiagonal=true)
Threshold a matrix.
static RCP< Matrix > Crs2Op(RCP< CrsMatrix > Op)
static RCP< Vector > GetMatrixDiagonal(const Matrix &A)
Extract Matrix Diagonal.
Teuchos::FancyOStream & GetOStream(MsgType type, int thisProcRankOnly=0) const
Get an output stream for outputting the input message type.
bool IsPrint(MsgType type, int thisProcRankOnly=-1) const
Find out whether we need to print out information for a specific message type.
Namespace for MueLu classes and methods.
Kokkos::View< bool *, memory_space > DetectDirichletRows_kokkos(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
@ Statistics2
Print even more statistics.
@ Warnings1
Additional warnings.
void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, memory_space > &dirichletRows)
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > removeSmallEntries(Teuchos::RCP< Xpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType threshold, const bool keepDiagonal)
KOKKOS_FORCEINLINE_FUNCTION bool isDirichletRow(typename CrsMatrix::ordinal_type rowId, KokkosSparse::SparseRowViewConst< CrsMatrix > &row, const typename KokkosKernels::ArithTraits< typename CrsMatrix::value_type >::magnitudeType &tol, const bool count_twos_as_dirichlet)