Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_computeRowAndColumnOneNorms_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
11#define TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
12
19
20#if KOKKOS_VERSION >= 40799
21#include "KokkosKernels_ArithTraits.hpp"
22#else
23#include "Kokkos_ArithTraits.hpp"
24#endif
25#include "Kokkos_Macros.hpp"
28#include "Tpetra_CrsMatrix.hpp"
29#include "Tpetra_Export.hpp"
30#include "Tpetra_Map.hpp"
31#include "Tpetra_MultiVector.hpp"
32#include "Tpetra_RowMatrix.hpp"
33#include "Kokkos_Core.hpp"
34#include "Teuchos_CommHelpers.hpp"
35#include <memory>
36
37namespace Tpetra {
38namespace Details {
39
40template <class SC, class LO, class GO, class NT>
41std::size_t
42lclMaxNumEntriesRowMatrix(const Tpetra::RowMatrix<SC, LO, GO, NT>& A) {
43 const auto& rowMap = *(A.getRowMap());
44 const LO lclNumRows = static_cast<LO>(rowMap.getLocalNumElements());
45
46 std::size_t maxNumEnt{0};
47 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
48 const std::size_t numEnt = A.getNumEntriesInLocalRow(lclRow);
49 maxNumEnt = numEnt > maxNumEnt ? numEnt : maxNumEnt;
50 }
51 return maxNumEnt;
52}
53
54template <class SC, class LO, class GO, class NT>
55void forEachLocalRowMatrixRow(
57 const LO lclNumRows,
58 const std::size_t maxNumEnt,
59 std::function<void(
60 const LO lclRow,
61 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& /*ind*/,
62 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& /*val*/,
63 std::size_t /*numEnt*/)>
64 doForEachRow) {
65 using lids_type = typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type;
66 using vals_type = typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type;
67 lids_type indBuf("indices", maxNumEnt);
68 vals_type valBuf("values", maxNumEnt);
69
70 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
71 std::size_t numEnt = A.getNumEntriesInLocalRow(lclRow);
72 lids_type ind = Kokkos::subview(indBuf, std::make_pair((size_t)0, numEnt));
73 vals_type val = Kokkos::subview(valBuf, std::make_pair((size_t)0, numEnt));
74 A.getLocalRowCopy(lclRow, ind, val, numEnt);
75 doForEachRow(lclRow, ind, val, numEnt);
76 }
77}
78
79template <class SC, class LO, class GO, class NT>
80void forEachLocalRowMatrixRow(
82 std::function<void(
83 const LO lclRow,
84 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& /*ind*/,
85 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& /*val*/,
86 std::size_t /*numEnt*/)>
87 doForEachRow) {
88 const auto& rowMap = *(A.getRowMap());
89 const LO lclNumRows = static_cast<LO>(rowMap.getLocalNumElements());
90 const std::size_t maxNumEnt = lclMaxNumEntriesRowMatrix(A);
91
92 forEachLocalRowMatrixRow<SC, LO, GO, NT>(A, lclNumRows, maxNumEnt, doForEachRow);
93}
94
98template <class SC, class LO, class GO, class NT>
99#if KOKKOS_VERSION >= 40799
100void computeLocalRowScaledColumnNorms_RowMatrix(EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
101#else
102void computeLocalRowScaledColumnNorms_RowMatrix(EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
103#endif
104 typename NT::device_type>& result,
106#if KOKKOS_VERSION >= 40799
107 using KAT = KokkosKernels::ArithTraits<SC>;
108#else
109 using KAT = Kokkos::ArithTraits<SC>;
110#endif
111 using mag_type = typename KAT::mag_type;
112#if KOKKOS_VERSION >= 40799
113 using KAV = KokkosKernels::ArithTraits<typename KAT::val_type>;
114#else
115 using KAV = Kokkos::ArithTraits<typename KAT::val_type>;
116#endif
117
118 auto rowNorms_h = Kokkos::create_mirror_view(result.rowNorms);
119
120 // DEEP_COPY REVIEW - NOT TESTED
121 Kokkos::deep_copy(rowNorms_h, result.rowNorms);
122 auto rowScaledColNorms_h = Kokkos::create_mirror_view(result.rowScaledColNorms);
123
125 [&](const LO lclRow,
126 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
127 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
128 std::size_t numEnt) {
129 const mag_type rowNorm = rowNorms_h[lclRow];
130 for (std::size_t k = 0; k < numEnt; ++k) {
131 const mag_type matrixAbsVal = KAV::abs(val[k]);
132 const LO lclCol = ind[k];
133
135 }
136 });
137
138 // DEEP_COPY REVIEW - NOT TESTED
139 Kokkos::deep_copy(result.rowScaledColNorms, rowScaledColNorms_h);
140}
141
144template <class SC, class LO, class GO, class NT>
145#if KOKKOS_VERSION >= 40799
147#else
149#endif
151#if KOKKOS_VERSION >= 40799
152 using KAT = KokkosKernels::ArithTraits<SC>;
153#else
154 using KAT = Kokkos::ArithTraits<SC>;
155#endif
156 using val_type = typename KAT::val_type;
157#if KOKKOS_VERSION >= 40799
158 using KAV = KokkosKernels::ArithTraits<val_type>;
159#else
160 using KAV = Kokkos::ArithTraits<val_type>;
161#endif
162 using mag_type = typename KAT::mag_type;
163#if KOKKOS_VERSION >= 40799
164 using KAM = KokkosKernels::ArithTraits<mag_type>;
165#else
166 using KAM = Kokkos::ArithTraits<mag_type>;
167#endif
168 using device_type = typename NT::device_type;
169 using equib_info_type = EquilibrationInfo<val_type, device_type>;
170
171 const auto& rowMap = *(A.getRowMap());
172 const auto& colMap = *(A.getColMap());
173 const LO lclNumRows = static_cast<LO>(rowMap.getLocalNumElements());
174 const LO lclNumCols = 0; // don't allocate column-related Views
175 constexpr bool assumeSymmetric = false; // doesn't matter here
176 equib_info_type result(lclNumRows, lclNumCols, assumeSymmetric);
177 auto result_h = result.createMirrorView();
178
179 const auto val_zero = KAV::zero();
180 const auto mag_zero = KAM::zero();
181
183 [&](const LO lclRow,
184 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
185 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
186 std::size_t numEnt) {
187 mag_type rowNorm = mag_zero;
188 val_type diagVal = val_zero;
189 const GO gblRow = rowMap.getGlobalElement(lclRow);
190 // OK if invalid(); then we simply won't find the diagonal entry.
191 const GO lclDiagColInd = colMap.getLocalElement(gblRow);
192
193 for (std::size_t k = 0; k < numEnt; ++k) {
194 const val_type matrixVal = val[k];
195 if (KAV::isInf(matrixVal)) {
196 result_h.foundInf = true;
197 }
198 if (KAV::isNan(matrixVal)) {
199 result_h.foundNan = true;
200 }
201 const mag_type matrixAbsVal = KAV::abs(matrixVal);
203 const LO lclCol = ind[k];
204 if (lclCol == lclDiagColInd) {
205 diagVal += val[k]; // repeats count additively
206 }
207 } // for each entry in row
208
209 // This is a local result. If the matrix has an overlapping
210 // row Map, then the global result might differ.
211 if (diagVal == KAV::zero()) {
212 result_h.foundZeroDiag = true;
213 }
214 if (rowNorm == KAM::zero()) {
215 result_h.foundZeroRowNorm = true;
216 }
217 // NOTE (mfh 24 May 2018) We could actually compute local
218 // rowScaledColNorms in situ at this point, if ! assumeSymmetric
219 // and row Map is the same as range Map (so that the local row
220 // norms are the same as the global row norms).
221 result_h.rowDiagonalEntries[lclRow] += diagVal;
222 result_h.rowNorms[lclRow] = rowNorm;
223 });
224
225 result.assign(result_h);
226 return result;
227}
228
231template <class SC, class LO, class GO, class NT>
232#if KOKKOS_VERSION >= 40799
234#else
236#endif
238 const bool assumeSymmetric) {
239#if KOKKOS_VERSION >= 40799
240 using KAT = KokkosKernels::ArithTraits<SC>;
241#else
242 using KAT = Kokkos::ArithTraits<SC>;
243#endif
244 using val_type = typename KAT::val_type;
245#if KOKKOS_VERSION >= 40799
246 using KAV = KokkosKernels::ArithTraits<val_type>;
247#else
248 using KAV = Kokkos::ArithTraits<val_type>;
249#endif
250 using mag_type = typename KAT::mag_type;
251#if KOKKOS_VERSION >= 40799
252 using KAM = KokkosKernels::ArithTraits<mag_type>;
253#else
254 using KAM = Kokkos::ArithTraits<mag_type>;
255#endif
256 using device_type = typename NT::device_type;
257
258 const auto& rowMap = *(A.getRowMap());
259 const auto& colMap = *(A.getColMap());
260 const LO lclNumRows = static_cast<LO>(rowMap.getLocalNumElements());
261 const LO lclNumCols = static_cast<LO>(colMap.getLocalNumElements());
262
264 auto result_h = result.createMirrorView();
265
266 const auto val_zero = KAV::zero();
267 const auto mag_zero = KAM::zero();
268
270 [&](const LO lclRow,
271 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
272 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
273 std::size_t numEnt) {
274 mag_type rowNorm = mag_zero;
275 val_type diagVal = val_zero;
276 const GO gblRow = rowMap.getGlobalElement(lclRow);
277 // OK if invalid(); then we simply won't find the diagonal entry.
278 const GO lclDiagColInd = colMap.getLocalElement(gblRow);
279
280 for (std::size_t k = 0; k < numEnt; ++k) {
281 const val_type matrixVal = val[k];
282 if (KAV::isInf(matrixVal)) {
283 result_h.foundInf = true;
284 }
285 if (KAV::isNan(matrixVal)) {
286 result_h.foundNan = true;
287 }
288 const mag_type matrixAbsVal = KAV::abs(matrixVal);
290 const LO lclCol = ind[k];
291 if (lclCol == lclDiagColInd) {
292 diagVal += val[k]; // repeats count additively
293 }
294 if (!assumeSymmetric) {
295 result_h.colNorms[lclCol] += matrixAbsVal;
296 }
297 } // for each entry in row
298
299 // This is a local result. If the matrix has an overlapping
300 // row Map, then the global result might differ.
301 if (diagVal == KAV::zero()) {
302 result_h.foundZeroDiag = true;
303 }
304 if (rowNorm == KAM::zero()) {
305 result_h.foundZeroRowNorm = true;
306 }
307 // NOTE (mfh 24 May 2018) We could actually compute local
308 // rowScaledColNorms in situ at this point, if ! assumeSymmetric
309 // and row Map is the same as range Map (so that the local row
310 // norms are the same as the global row norms).
311 result_h.rowDiagonalEntries[lclRow] += diagVal;
312 result_h.rowNorms[lclRow] = rowNorm;
313 if (!assumeSymmetric &&
314 lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid()) {
315 result_h.colDiagonalEntries[lclDiagColInd] += diagVal;
316 }
317 });
318
319 result.assign(result_h);
320 return result;
321}
322
323template <class SC, class LO, class GO, class NT>
324class ComputeLocalRowScaledColumnNorms {
325 public:
326 using crs_matrix_type = ::Tpetra::CrsMatrix<SC, LO, GO, NT>;
327#if KOKKOS_VERSION >= 40799
328 using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
329#else
330 using val_type = typename Kokkos::ArithTraits<SC>::val_type;
331#endif
332#if KOKKOS_VERSION >= 40799
333 using mag_type = typename KokkosKernels::ArithTraits<val_type>::mag_type;
334#else
335 using mag_type = typename Kokkos::ArithTraits<val_type>::mag_type;
336#endif
337 using device_type = typename crs_matrix_type::device_type;
338 using policy_type = Kokkos::TeamPolicy<typename device_type::execution_space, LO>;
339
340 ComputeLocalRowScaledColumnNorms(const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
341 const Kokkos::View<const mag_type*, device_type>& rowNorms,
342 const crs_matrix_type& A)
343 : rowScaledColNorms_(rowScaledColNorms)
344 , rowNorms_(rowNorms)
345 , A_lcl_(A.getLocalMatrixDevice()) {}
346
347 KOKKOS_INLINE_FUNCTION void operator()(const typename policy_type::member_type& team) const {
348#if KOKKOS_VERSION >= 40799
349 using KAT = KokkosKernels::ArithTraits<val_type>;
350#else
351 using KAT = Kokkos::ArithTraits<val_type>;
352#endif
353
354 const LO lclRow = team.league_rank();
355 const auto curRow = A_lcl_.rowConst(lclRow);
356 const mag_type rowNorm = rowNorms_[lclRow];
357 const LO numEnt = curRow.length;
358 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, numEnt), [&](const LO k) {
359 const mag_type matrixAbsVal = KAT::abs(curRow.value(k));
360 const LO lclCol = curRow.colidx(k);
361
362 Kokkos::atomic_add(&rowScaledColNorms_[lclCol], matrixAbsVal / rowNorm);
363 });
364 }
365
366 static void
367 run(const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
368 const Kokkos::View<const mag_type*, device_type>& rowNorms,
369 const crs_matrix_type& A) {
370 using functor_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
371
372 functor_type functor(rowScaledColNorms, rowNorms, A);
373 const LO lclNumRows =
374 static_cast<LO>(A.getRowMap()->getLocalNumElements());
375 Kokkos::parallel_for("computeLocalRowScaledColumnNorms",
376 policy_type(lclNumRows, Kokkos::AUTO), functor);
377 }
378
379 private:
380 Kokkos::View<mag_type*, device_type> rowScaledColNorms_;
381 Kokkos::View<const mag_type*, device_type> rowNorms_;
382
383 using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
384 local_matrix_device_type A_lcl_;
385};
386
387template <class SC, class LO, class GO, class NT>
388#if KOKKOS_VERSION >= 40799
389void computeLocalRowScaledColumnNorms_CrsMatrix(EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
390#else
391void computeLocalRowScaledColumnNorms_CrsMatrix(EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
392#endif
393 typename NT::device_type>& result,
395 using impl_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
396 impl_type::run(result.rowScaledColNorms, result.rowNorms, A);
397}
398
399template <class SC, class LO, class GO, class NT>
400#if KOKKOS_VERSION >= 40799
401void computeLocalRowScaledColumnNorms(EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
402#else
403void computeLocalRowScaledColumnNorms(EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
404#endif
405 typename NT::device_type>& result,
407 using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NT>;
408#if KOKKOS_VERSION >= 40799
409 using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
410#else
411 using val_type = typename Kokkos::ArithTraits<SC>::val_type;
412#endif
413#if KOKKOS_VERSION >= 40799
414 using mag_type = typename KokkosKernels::ArithTraits<val_type>::mag_type;
415#else
416 using mag_type = typename Kokkos::ArithTraits<val_type>::mag_type;
417#endif
418 using device_type = typename NT::device_type;
419
420 auto colMapPtr = A.getColMap();
421 TEUCHOS_TEST_FOR_EXCEPTION(colMapPtr.get() == nullptr, std::invalid_argument,
422 "computeLocalRowScaledColumnNorms: "
423 "Input matrix A must have a nonnull column Map.");
424 const LO lclNumCols = static_cast<LO>(colMapPtr->getLocalNumElements());
425 if (static_cast<std::size_t>(result.rowScaledColNorms.extent(0)) !=
426 static_cast<std::size_t>(lclNumCols)) {
427 result.rowScaledColNorms =
428 Kokkos::View<mag_type*, device_type>("rowScaledColNorms", lclNumCols);
429 }
430
431 const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*>(&A);
432 if (A_crs == nullptr) {
434 } else {
435 computeLocalRowScaledColumnNorms_CrsMatrix(result, *A_crs);
436 }
437}
438
439// Kokkos::parallel_reduce functor that is part of the implementation
440// of computeLocalRowOneNorms_CrsMatrix.
441template <class SC, class LO, class GO, class NT>
442class ComputeLocalRowOneNorms {
443 public:
444#if KOKKOS_VERSION >= 40799
445 using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
446#else
447 using val_type = typename Kokkos::ArithTraits<SC>::val_type;
448#endif
449 using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
450 using local_matrix_device_type =
451 typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
452 using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
453 using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
454
455 ComputeLocalRowOneNorms(const equib_info_type& equib, // in/out
456 const local_matrix_device_type& A_lcl, // in
457 const local_map_type& rowMap, // in
458 const local_map_type& colMap)
459 : // in
460 equib_(equib)
461 , A_lcl_(A_lcl)
462 , rowMap_(rowMap)
463 , colMap_(colMap) {}
464
465 // (result & 1) != 0 means "found Inf."
466 // (result & 2) != 0 means "found NaN."
467 // (result & 4) != 0 means "found zero diag."
468 // (result & 8) != 0 means "found zero row norm."
469 // Pack into a single int so the reduction is cheaper,
470 // esp. on GPU.
471 using value_type = int;
472
473 KOKKOS_INLINE_FUNCTION void init(value_type& dst) const {
474 dst = 0;
475 }
476
477 KOKKOS_INLINE_FUNCTION void
478 join(value_type& dst,
479 const value_type& src) const {
480 dst |= src;
481 }
482
483 KOKKOS_INLINE_FUNCTION void
484 operator()(const typename policy_type::member_type& team, value_type& dst) const {
485#if KOKKOS_VERSION >= 40799
486 using KAT = KokkosKernels::ArithTraits<val_type>;
487#else
488 using KAT = Kokkos::ArithTraits<val_type>;
489#endif
490 using mag_type = typename KAT::mag_type;
491#if KOKKOS_VERSION >= 40799
492 using KAM = KokkosKernels::ArithTraits<mag_type>;
493#else
494 using KAM = Kokkos::ArithTraits<mag_type>;
495#endif
496
497 const LO lclRow = team.league_rank();
498 const GO gblRow = rowMap_.getGlobalElement(lclRow);
499 // OK if invalid(); then we simply won't find the diagonal entry.
500 const GO lclDiagColInd = colMap_.getLocalElement(gblRow);
501
502 const auto curRow = A_lcl_.rowConst(lclRow);
503 const LO numEnt = curRow.length;
504
505 const auto val_zero = KAT::zero();
506 const auto mag_zero = KAM::zero();
507
508 mag_type rowNorm = mag_zero;
509 val_type diagVal = val_zero;
510 value_type dstThread{0};
511
512 Kokkos::parallel_reduce(
513 Kokkos::TeamThreadRange(team, numEnt), [&](const LO k, mag_type& normContrib, val_type& diagContrib, value_type& dstContrib) {
514 const val_type matrixVal = curRow.value(k);
515 if (KAT::isInf(matrixVal)) {
516 dstContrib |= 1;
517 }
518 if (KAT::isNan(matrixVal)) {
519 dstContrib |= 2;
520 }
521 const mag_type matrixAbsVal = KAT::abs(matrixVal);
522 normContrib += matrixAbsVal;
523 const LO lclCol = curRow.colidx(k);
524 if (lclCol == lclDiagColInd) {
525 diagContrib = curRow.value(k); // assume no repeats
526 }
527 },
528 Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread)); // for each entry in row
529
530 // This is a local result. If the matrix has an overlapping
531 // row Map, then the global result might differ.
532 Kokkos::single(Kokkos::PerTeam(team), [&]() {
533 dst |= dstThread;
534 if (diagVal == KAT::zero()) {
535 dst |= 4;
536 }
537 if (rowNorm == KAM::zero()) {
538 dst |= 8;
539 }
540 equib_.rowDiagonalEntries[lclRow] = diagVal;
541 equib_.rowNorms[lclRow] = rowNorm;
542 });
543 }
544
545 private:
546 equib_info_type equib_;
547 local_matrix_device_type A_lcl_;
548 local_map_type rowMap_;
549 local_map_type colMap_;
550};
551
552// Kokkos::parallel_reduce functor that is part of the implementation
553// of computeLocalRowAndColumnOneNorms_CrsMatrix.
554template <class SC, class LO, class GO, class NT>
555class ComputeLocalRowAndColumnOneNorms {
556 public:
557#if KOKKOS_VERSION >= 40799
558 using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
559#else
560 using val_type = typename Kokkos::ArithTraits<SC>::val_type;
561#endif
562 using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
563 using local_matrix_device_type = typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
564 using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
565 using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
566
567 public:
568 ComputeLocalRowAndColumnOneNorms(const equib_info_type& equib, // in/out
569 const local_matrix_device_type& A_lcl, // in
570 const local_map_type& rowMap, // in
571 const local_map_type& colMap)
572 : // in
573 equib_(equib)
574 , A_lcl_(A_lcl)
575 , rowMap_(rowMap)
576 , colMap_(colMap) {}
577
578 // (result & 1) != 0 means "found Inf."
579 // (result & 2) != 0 means "found NaN."
580 // (result & 4) != 0 means "found zero diag."
581 // (result & 8) != 0 means "found zero row norm."
582 // Pack into a single int so the reduction is cheaper,
583 // esp. on GPU.
584 using value_type = int;
585
586 KOKKOS_INLINE_FUNCTION void init(value_type& dst) const {
587 dst = 0;
588 }
589
590 KOKKOS_INLINE_FUNCTION void
591 join(value_type& dst,
592 const value_type& src) const {
593 dst |= src;
594 }
595
596 KOKKOS_INLINE_FUNCTION void
597 operator()(const typename policy_type::member_type& team, value_type& dst) const {
598#if KOKKOS_VERSION >= 40799
599 using KAT = KokkosKernels::ArithTraits<val_type>;
600#else
601 using KAT = Kokkos::ArithTraits<val_type>;
602#endif
603 using mag_type = typename KAT::mag_type;
604#if KOKKOS_VERSION >= 40799
605 using KAM = KokkosKernels::ArithTraits<mag_type>;
606#else
607 using KAM = Kokkos::ArithTraits<mag_type>;
608#endif
609
610 const LO lclRow = team.league_rank();
611 const GO gblRow = rowMap_.getGlobalElement(lclRow);
612 // OK if invalid(); then we simply won't find the diagonal entry.
613 const GO lclDiagColInd = colMap_.getLocalElement(gblRow);
614
615 const auto curRow = A_lcl_.rowConst(lclRow);
616 const LO numEnt = curRow.length;
617
618 const auto val_zero = KAT::zero();
619 const auto mag_zero = KAM::zero();
620
621 mag_type rowNorm = mag_zero;
622 val_type diagVal = val_zero;
623 value_type dstThread{0};
624
625 Kokkos::parallel_reduce(
626 Kokkos::TeamThreadRange(team, numEnt), [&](const LO k, mag_type& normContrib, val_type& diagContrib, value_type& dstContrib) {
627 const val_type matrixVal = curRow.value(k);
628 if (KAT::isInf(matrixVal)) {
629 dstContrib |= 1;
630 }
631 if (KAT::isNan(matrixVal)) {
632 dstContrib |= 2;
633 }
634 const mag_type matrixAbsVal = KAT::abs(matrixVal);
635 normContrib += matrixAbsVal;
636 const LO lclCol = curRow.colidx(k);
637 if (lclCol == lclDiagColInd) {
638 diagContrib = curRow.value(k); // assume no repeats
639 }
640 if (!equib_.assumeSymmetric) {
641 Kokkos::atomic_add(&(equib_.colNorms[lclCol]), matrixAbsVal);
642 }
643 },
644 Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread)); // for each entry in row
645
646 // This is a local result. If the matrix has an overlapping
647 // row Map, then the global result might differ.
648 Kokkos::single(Kokkos::PerTeam(team), [&]() {
649 dst |= dstThread;
650 if (diagVal == KAT::zero()) {
651 dst |= 4;
652 }
653 if (rowNorm == KAM::zero()) {
654 dst |= 8;
655 }
656 // NOTE (mfh 24 May 2018) We could actually compute local
657 // rowScaledColNorms in situ at this point, if ! assumeSymmetric
658 // and row Map is the same as range Map (so that the local row
659 // norms are the same as the global row norms).
660 equib_.rowDiagonalEntries[lclRow] = diagVal;
661 equib_.rowNorms[lclRow] = rowNorm;
662 if (!equib_.assumeSymmetric &&
663 lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid()) {
664 // Don't need an atomic update here, since this lclDiagColInd is
665 // a one-to-one function of lclRow.
666 equib_.colDiagonalEntries[lclDiagColInd] += diagVal;
667 }
668 });
669 }
670
671 private:
672 equib_info_type equib_;
673 local_matrix_device_type A_lcl_;
674 local_map_type rowMap_;
675 local_map_type colMap_;
676};
677
680template <class SC, class LO, class GO, class NT>
681#if KOKKOS_VERSION >= 40799
682EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type, typename NT::device_type>
683#else
684EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
685#endif
687 using execution_space = typename NT::device_type::execution_space;
688 using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
690#if KOKKOS_VERSION >= 40799
691 using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
692#else
693 using val_type = typename Kokkos::ArithTraits<SC>::val_type;
694#endif
695 using device_type = typename NT::device_type;
696 using equib_info_type = EquilibrationInfo<val_type, device_type>;
697
698 const LO lclNumRows = static_cast<LO>(A.getRowMap()->getLocalNumElements());
699 const LO lclNumCols = 0; // don't allocate column-related Views
700 constexpr bool assumeSymmetric = false; // doesn't matter here
701 equib_info_type equib(lclNumRows, lclNumCols, assumeSymmetric);
702
703 functor_type functor(equib, A.getLocalMatrixDevice(),
704 A.getRowMap()->getLocalMap(),
705 A.getColMap()->getLocalMap());
706 int result = 0;
707 Kokkos::parallel_reduce("computeLocalRowOneNorms",
708 policy_type(lclNumRows, Kokkos::AUTO), functor,
709 result);
710 equib.foundInf = (result & 1) != 0;
711 equib.foundNan = (result & 2) != 0;
712 equib.foundZeroDiag = (result & 4) != 0;
713 equib.foundZeroRowNorm = (result & 8) != 0;
714 return equib;
715}
716
719template <class SC, class LO, class GO, class NT>
720#if KOKKOS_VERSION >= 40799
722#else
724#endif
726 const bool assumeSymmetric) {
727 using execution_space = typename NT::device_type::execution_space;
728 using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
730#if KOKKOS_VERSION >= 40799
731 using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
732#else
733 using val_type = typename Kokkos::ArithTraits<SC>::val_type;
734#endif
735 using device_type = typename NT::device_type;
736 using equib_info_type = EquilibrationInfo<val_type, device_type>;
737
738 const LO lclNumRows = static_cast<LO>(A.getRowMap()->getLocalNumElements());
739 const LO lclNumCols = static_cast<LO>(A.getColMap()->getLocalNumElements());
740 equib_info_type equib(lclNumRows, lclNumCols, assumeSymmetric);
741
742 functor_type functor(equib, A.getLocalMatrixDevice(),
743 A.getRowMap()->getLocalMap(),
744 A.getColMap()->getLocalMap());
745 int result = 0;
746 Kokkos::parallel_reduce("computeLocalRowAndColumnOneNorms",
747 policy_type(lclNumRows, Kokkos::AUTO), functor,
748 result);
749 equib.foundInf = (result & 1) != 0;
750 equib.foundNan = (result & 2) != 0;
751 equib.foundZeroDiag = (result & 4) != 0;
752 equib.foundZeroRowNorm = (result & 8) != 0;
753 return equib;
754}
755
760template <class SC, class LO, class GO, class NT>
761#if KOKKOS_VERSION >= 40799
763#else
765#endif
766 typename NT::device_type>
768 using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NT>;
769 const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*>(&A);
770
771 if (A_crs == nullptr) {
773 } else {
775 }
776}
777
799template <class SC, class LO, class GO, class NT>
800#if KOKKOS_VERSION >= 40799
802#else
804#endif
806 const bool assumeSymmetric) {
807 using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NT>;
808 const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*>(&A);
809
810 if (A_crs == nullptr) {
811 return computeLocalRowAndColumnOneNorms_RowMatrix(A, assumeSymmetric);
812 } else {
813 return computeLocalRowAndColumnOneNorms_CrsMatrix(*A_crs, assumeSymmetric);
814 }
815}
816
817template <class SC, class LO, class GO, class NT>
818auto getLocalView_1d_readOnly(
820 const LO whichColumn)
821 -> decltype(Kokkos::subview(X.getLocalViewDevice(Access::ReadOnly),
822 Kokkos::ALL(), whichColumn)) {
823 if (X.isConstantStride()) {
824 return Kokkos::subview(X.getLocalViewDevice(Access::ReadOnly),
825 Kokkos::ALL(), whichColumn);
826 } else {
827 auto X_whichColumn = X.getVector(whichColumn);
828 return Kokkos::subview(X_whichColumn->getLocalViewDevice(Access::ReadOnly),
829 Kokkos::ALL(), 0);
830 }
831}
832
833template <class SC, class LO, class GO, class NT>
834auto getLocalView_1d_writeOnly(
836 const LO whichColumn)
837 -> decltype(Kokkos::subview(X.getLocalViewDevice(Access::ReadWrite),
838 Kokkos::ALL(), whichColumn)) {
839 if (X.isConstantStride()) {
840 return Kokkos::subview(X.getLocalViewDevice(Access::ReadWrite),
841 Kokkos::ALL(), whichColumn);
842 } else {
843 auto X_whichColumn = X.getVectorNonConst(whichColumn);
844 return Kokkos::subview(X_whichColumn->getLocalViewDevice(Access::ReadWrite),
845 Kokkos::ALL(), 0);
846 }
847}
848
849template <class SC, class LO, class GO, class NT, class ViewValueType>
850void copy1DViewIntoMultiVectorColumn(
852 const LO whichColumn,
853 const Kokkos::View<ViewValueType*, typename NT::device_type>& view) {
854 auto X_lcl = getLocalView_1d_writeOnly(X, whichColumn);
855 Tpetra::Details::copyConvert(X_lcl, view);
856}
857
858template <class SC, class LO, class GO, class NT, class ViewValueType>
859void copy1DViewIntoMultiVectorColumn_mag(
861 const LO whichColumn,
862 const Kokkos::View<ViewValueType*, typename NT::device_type>& view) {
863#if KOKKOS_VERSION >= 40799
864 using KAT = KokkosKernels::ArithTraits<ViewValueType>;
865#else
866 using KAT = Kokkos::ArithTraits<ViewValueType>;
867#endif
868 using execution_space = typename NT::execution_space;
869 using range_type = Kokkos::RangePolicy<execution_space, LO>;
870 auto X_lcl = getLocalView_1d_writeOnly(X, whichColumn);
871 Kokkos::parallel_for(
872 "",
873 range_type(0, X_lcl.extent(0)),
874 KOKKOS_LAMBDA(const LO i) {
875 X_lcl(i) = KAT::magnitude(view(i));
876 });
877}
878
879template <class SC, class LO, class GO, class NT, class ViewValueType>
880void copyMultiVectorColumnInto1DView(
881 const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
883 const LO whichColumn) {
884 auto X_lcl = getLocalView_1d_readOnly(X, whichColumn);
885 Tpetra::Details::copyConvert(view, X_lcl);
886}
887
888template <class SC, class LO, class GO, class NT, class ViewValueType>
889void copyMultiVectorColumnInto1DView_mag(
890 const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
892 const LO whichColumn) {
893#if KOKKOS_VERSION >= 40799
894 using implScalar = typename KokkosKernels::ArithTraits<SC>::val_type;
895#else
896 using implScalar = typename Kokkos::ArithTraits<SC>::val_type;
897#endif
898#if KOKKOS_VERSION >= 40799
899 using KAT = KokkosKernels::ArithTraits<implScalar>;
900#else
901 using KAT = Kokkos::ArithTraits<implScalar>;
902#endif
903 using range_type = Kokkos::RangePolicy<typename NT::execution_space, LO>;
904 auto X_lcl = getLocalView_1d_readOnly(X, whichColumn);
905 Kokkos::parallel_for(
906 "",
907 range_type(0, X_lcl.extent(0)),
908 KOKKOS_LAMBDA(LO i) {
909 view(i) = KAT::magnitude(X_lcl(i));
910 });
911}
912
913template <class OneDViewType, class IndexType>
914class FindZero {
915 public:
916 static_assert(OneDViewType::rank == 1,
917 "OneDViewType must be a rank-1 Kokkos::View.");
918 static_assert(std::is_integral<IndexType>::value,
919 "IndexType must be a built-in integer type.");
920 FindZero(const OneDViewType& x)
921 : x_(x) {}
922 // Kokkos historically didn't like bool reduction results on CUDA,
923 // so we use int as the reduction result type.
924 KOKKOS_INLINE_FUNCTION void
925 operator()(const IndexType i, int& result) const {
926 using val_type = typename OneDViewType::non_const_value_type;
927#if KOKKOS_VERSION >= 40799
928 result = (x_(i) == KokkosKernels::ArithTraits<val_type>::zero()) ? 1 : result;
929#else
930 result = (x_(i) == Kokkos::ArithTraits<val_type>::zero()) ? 1 : result;
931#endif
932 }
933
934 private:
935 OneDViewType x_;
936};
937
938template <class OneDViewType>
939bool findZero(const OneDViewType& x) {
940 using view_type = typename OneDViewType::const_type;
941 using execution_space = typename view_type::execution_space;
942 using size_type = typename view_type::size_type;
943 using functor_type = FindZero<view_type, size_type>;
944
945 Kokkos::RangePolicy<execution_space, size_type> range(0, x.extent(0));
946 range.set_chunk_size(500); // adjust as needed
947
948 int foundZero = 0;
949 Kokkos::parallel_reduce("findZero", range, functor_type(x), foundZero);
950 return foundZero == 1;
951}
952
953template <class SC, class LO, class GO, class NT>
954#if KOKKOS_VERSION >= 40799
955void globalizeRowOneNorms(EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
956#else
957void globalizeRowOneNorms(EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
958#endif
959 typename NT::device_type>& equib,
962
963 auto G = A.getGraph();
964 TEUCHOS_TEST_FOR_EXCEPTION(G.get() == nullptr, std::invalid_argument,
965 "globalizeRowOneNorms: Input RowMatrix A must have a nonnull graph "
966 "(that is, getGraph() must return nonnull).");
967 TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::invalid_argument,
968 "globalizeRowOneNorms: Input CrsGraph G must be fillComplete.");
969
970 auto exp = G->getExporter();
971 if (!exp.is_null()) {
972 // If the matrix has an overlapping row Map, first Export the
973 // local row norms with ADD CombineMode to a range Map Vector to
974 // get the global row norms, then reverse them back with REPLACE
975 // CombineMode to the row Map Vector. Ditto for the local row
976 // diagonal entries. Use SC instead of mag_type, so we can
977 // communicate both row norms and row diagonal entries at once.
978
979 // FIXME (mfh 16 May 2018) Clever DualView tricks could possibly
980 // avoid the local copy here.
981 mv_type rowMapMV(G->getRowMap(), 2, false);
982
983 copy1DViewIntoMultiVectorColumn(rowMapMV, 0, equib.rowNorms);
984 copy1DViewIntoMultiVectorColumn(rowMapMV, 1, equib.rowDiagonalEntries);
985 {
986 mv_type rangeMapMV(G->getRangeMap(), 2, true);
987 rangeMapMV.doExport(rowMapMV, *exp, Tpetra::ADD); // forward mode
988 rowMapMV.doImport(rangeMapMV, *exp, Tpetra::REPLACE); // reverse mode
989 }
990 copyMultiVectorColumnInto1DView_mag(equib.rowNorms, rowMapMV, 0);
991 copyMultiVectorColumnInto1DView(equib.rowDiagonalEntries, rowMapMV, 1);
992
993 // It's not common for users to solve linear systems with a
994 // nontrival Export, so it's OK for this to cost an additional
995 // pass over rowDiagonalEntries.
996 equib.foundZeroDiag = findZero(equib.rowDiagonalEntries);
997 equib.foundZeroRowNorm = findZero(equib.rowNorms);
998 }
999
1000 constexpr int allReduceCount = 4;
1001 int lclNaughtyMatrix[allReduceCount];
1002 lclNaughtyMatrix[0] = equib.foundInf ? 1 : 0;
1003 lclNaughtyMatrix[1] = equib.foundNan ? 1 : 0;
1004 lclNaughtyMatrix[2] = equib.foundZeroDiag ? 1 : 0;
1005 lclNaughtyMatrix[3] = equib.foundZeroRowNorm ? 1 : 0;
1006
1007 using Teuchos::outArg;
1008 using Teuchos::REDUCE_MAX;
1009 using Teuchos::reduceAll;
1010 auto comm = G->getComm();
1011 int gblNaughtyMatrix[allReduceCount];
1012 reduceAll<int, int>(*comm, REDUCE_MAX, allReduceCount,
1013 lclNaughtyMatrix, gblNaughtyMatrix);
1014
1015 equib.foundInf = gblNaughtyMatrix[0] == 1;
1016 equib.foundNan = gblNaughtyMatrix[1] == 1;
1017 equib.foundZeroDiag = gblNaughtyMatrix[2] == 1;
1018 equib.foundZeroRowNorm = gblNaughtyMatrix[3] == 1;
1019}
1020
1021template <class SC, class LO, class GO, class NT>
1022#if KOKKOS_VERSION >= 40799
1023void globalizeColumnOneNorms(EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
1024#else
1025void globalizeColumnOneNorms(EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
1026#endif
1027 typename NT::device_type>& equib,
1029 const bool assumeSymmetric) // if so, use row norms
1030{
1031#if KOKKOS_VERSION >= 40799
1032 using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
1033#else
1034 using val_type = typename Kokkos::ArithTraits<SC>::val_type;
1035#endif
1036#if KOKKOS_VERSION >= 40799
1037 using mag_type = typename KokkosKernels::ArithTraits<val_type>::mag_type;
1038#else
1039 using mag_type = typename Kokkos::ArithTraits<val_type>::mag_type;
1040#endif
1042 using device_type = typename NT::device_type;
1043
1044 auto G = A.getGraph();
1045 TEUCHOS_TEST_FOR_EXCEPTION(G.get() == nullptr, std::invalid_argument,
1046 "globalizeColumnOneNorms: Input RowMatrix A must have a nonnull graph "
1047 "(that is, getGraph() must return nonnull).");
1048 TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::invalid_argument,
1049 "globalizeColumnOneNorms: Input CrsGraph G must be fillComplete.");
1050
1051 auto imp = G->getImporter();
1052 if (assumeSymmetric) {
1053 const LO numCols = 2;
1054 // Redistribute local row info to global column info.
1055
1056 // Get the data into a MultiVector on the domain Map.
1057 mv_type rowNorms_domMap(G->getDomainMap(), numCols, false);
1058 const bool rowMapSameAsDomainMap = G->getRowMap()->isSameAs(*(G->getDomainMap()));
1059 if (rowMapSameAsDomainMap) {
1060 copy1DViewIntoMultiVectorColumn(rowNorms_domMap, 0, equib.rowNorms);
1061 copy1DViewIntoMultiVectorColumn_mag(rowNorms_domMap, 1, equib.rowDiagonalEntries);
1062 } else {
1063 // This is not a common case; it would normally arise when the
1064 // matrix has an overlapping row Map.
1065 Tpetra::Export<LO, GO, NT> rowToDom(G->getRowMap(), G->getDomainMap());
1066 mv_type rowNorms_rowMap(G->getRowMap(), numCols, true);
1067 copy1DViewIntoMultiVectorColumn(rowNorms_rowMap, 0, equib.rowNorms);
1068 copy1DViewIntoMultiVectorColumn_mag(rowNorms_rowMap, 1, equib.rowDiagonalEntries);
1069 rowNorms_domMap.doExport(rowNorms_rowMap, rowToDom, Tpetra::REPLACE);
1070 }
1071
1072 // Use the existing Import to redistribute the row norms from the
1073 // domain Map to the column Map.
1074 std::unique_ptr<mv_type> rowNorms_colMap;
1075 if (imp.is_null()) {
1076 // Shallow copy of rowNorms_domMap.
1077 rowNorms_colMap =
1078 std::unique_ptr<mv_type>(new mv_type(rowNorms_domMap, *(G->getColMap())));
1079 } else {
1080 rowNorms_colMap =
1081 std::unique_ptr<mv_type>(new mv_type(G->getColMap(), numCols, true));
1082 rowNorms_colMap->doImport(rowNorms_domMap, *imp, Tpetra::REPLACE);
1083 }
1084
1085 // Make sure the result has allocations of the right size.
1086 const LO lclNumCols =
1087 static_cast<LO>(G->getColMap()->getLocalNumElements());
1088 if (static_cast<LO>(equib.colNorms.extent(0)) != lclNumCols) {
1089 equib.colNorms =
1090 Kokkos::View<mag_type*, device_type>("colNorms", lclNumCols);
1091 }
1092 if (static_cast<LO>(equib.colDiagonalEntries.extent(0)) != lclNumCols) {
1093 equib.colDiagonalEntries =
1094 Kokkos::View<val_type*, device_type>("colDiagonalEntries", lclNumCols);
1095 }
1096
1097 // Copy row norms and diagonal entries, appropriately
1098 // redistributed, into column norms resp. diagonal entries.
1099 copyMultiVectorColumnInto1DView(equib.colNorms, *rowNorms_colMap, 0);
1100 copyMultiVectorColumnInto1DView(equib.colDiagonalEntries, *rowNorms_colMap, 1);
1101 } else {
1102 if (!imp.is_null()) {
1103 const LO numCols = 3;
1104 // If the matrix has an overlapping column Map (this is usually
1105 // the case), first Export (reverse-mode Import) the local info
1106 // to a domain Map Vector to get the global info, then Import
1107 // them back with REPLACE CombineMode to the column Map Vector.
1108 // Ditto for the row-scaled column norms.
1109
1110 // FIXME (mfh 16 May 2018) Clever DualView tricks could possibly
1111 // avoid the local copy here.
1112 mv_type colMapMV(G->getColMap(), numCols, false);
1113
1114 copy1DViewIntoMultiVectorColumn(colMapMV, 0, equib.colNorms);
1115 copy1DViewIntoMultiVectorColumn_mag(colMapMV, 1, equib.colDiagonalEntries);
1116 copy1DViewIntoMultiVectorColumn(colMapMV, 2, equib.rowScaledColNorms);
1117 {
1118 mv_type domainMapMV(G->getDomainMap(), numCols, true);
1119 domainMapMV.doExport(colMapMV, *imp, Tpetra::ADD); // reverse mode
1120 colMapMV.doImport(domainMapMV, *imp, Tpetra::REPLACE); // forward mode
1121 }
1122 copyMultiVectorColumnInto1DView(equib.colNorms, colMapMV, 0);
1123 copyMultiVectorColumnInto1DView(equib.colDiagonalEntries, colMapMV, 1);
1124 copyMultiVectorColumnInto1DView(equib.rowScaledColNorms, colMapMV, 2);
1125 }
1126 }
1127}
1128
1129} // namespace Details
1130
1131template <class SC, class LO, class GO, class NT>
1132#if KOKKOS_VERSION >= 40799
1133Details::EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
1134#else
1135Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
1136#endif
1137 typename NT::device_type>
1139 TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::invalid_argument,
1140 "computeRowOneNorms: Input matrix A must be fillComplete.");
1142
1143 Details::globalizeRowOneNorms(result, A);
1144 return result;
1145}
1146
1147template <class SC, class LO, class GO, class NT>
1148#if KOKKOS_VERSION >= 40799
1149Details::EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
1150#else
1151Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
1152#endif
1153 typename NT::device_type>
1155 const bool assumeSymmetric) {
1156 TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::invalid_argument,
1157 "computeRowAndColumnOneNorms: Input matrix A must be fillComplete.");
1158 auto result = Details::computeLocalRowAndColumnOneNorms(A, assumeSymmetric);
1159
1160 Details::globalizeRowOneNorms(result, A);
1161 if (!assumeSymmetric) {
1162 // Row-norm-scaled column norms are trivial if the matrix is
1163 // symmetric, since the row norms and column norms are the same in
1164 // that case.
1165 Details::computeLocalRowScaledColumnNorms(result, A);
1166 }
1167 Details::globalizeColumnOneNorms(result, A, assumeSymmetric);
1168 return result;
1169}
1170
1171} // namespace Tpetra
1172
1173//
1174// Explicit instantiation macro
1175//
1176// Must be expanded from within the Tpetra namespace!
1177//
1178
1179#if KOKKOS_VERSION >= 40799
1180#define TPETRA_COMPUTEROWANDCOLUMNONENORMS_INSTANT(SC, LO, GO, NT) \
1181 template Details::EquilibrationInfo<KokkosKernels::ArithTraits<SC>::val_type, NT::device_type> \
1182 computeRowOneNorms(const Tpetra::RowMatrix<SC, LO, GO, NT>& A); \
1183 \
1184 template Details::EquilibrationInfo<KokkosKernels::ArithTraits<SC>::val_type, NT::device_type> \
1185 computeRowAndColumnOneNorms(const Tpetra::RowMatrix<SC, LO, GO, NT>& A, \
1186 const bool assumeSymmetric);
1187
1188#else
1189#define TPETRA_COMPUTEROWANDCOLUMNONENORMS_INSTANT(SC, LO, GO, NT) \
1190 template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
1191 computeRowOneNorms(const Tpetra::RowMatrix<SC, LO, GO, NT>& A); \
1192 \
1193 template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
1194 computeRowAndColumnOneNorms(const Tpetra::RowMatrix<SC, LO, GO, NT>& A, \
1195 const bool assumeSymmetric);
1196
1197#endif
1198
1199#endif // TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
Declaration of Tpetra::Details::EquilibrationInfo.
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular,...
Struct that holds views of the contents of a CrsMatrix.
typename Node::device_type device_type
The Kokkos device type.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Implementation details of Tpetra.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsM...
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsMatrix.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::CrsMatrix.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length....
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute LOCAL row and column one-norms ("row sums" etc.) of the input sparse matrix A....
void computeLocalRowScaledColumnNorms_RowMatrix(EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > &result, const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
For a given Tpetra::RowMatrix that is not a Tpetra::CrsMatrix, assume that result....
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute LOCAL row one-norms ("row sums" etc.) of the input sparse matrix A.
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::CrsMatrix.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute global row one-norms ("row sums") of the input sparse matrix A, in a way suitable for one-sid...
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute global row and column one-norms ("row sums" and "column sums") of the input sparse matrix A,...
@ REPLACE
Replace existing values with new values.
@ ADD
Sum new values.
Struct storing results of Tpetra::computeRowAndColumnOneNorms.