Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_computeRowAndColumnOneNorms_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
11#define TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
12
19
20#include "KokkosKernels_ArithTraits.hpp"
21#include "Kokkos_Macros.hpp"
24#include "Tpetra_CrsMatrix.hpp"
25#include "Tpetra_Export.hpp"
26#include "Tpetra_Map.hpp"
27#include "Tpetra_MultiVector.hpp"
28#include "Tpetra_RowMatrix.hpp"
29#include "Kokkos_Core.hpp"
30#include "Teuchos_CommHelpers.hpp"
31#include <memory>
32
33namespace Tpetra {
34namespace Details {
35
36template <class SC, class LO, class GO, class NT>
37std::size_t
38lclMaxNumEntriesRowMatrix(const Tpetra::RowMatrix<SC, LO, GO, NT>& A) {
39 const auto& rowMap = *(A.getRowMap());
40 const LO lclNumRows = static_cast<LO>(rowMap.getLocalNumElements());
41
42 std::size_t maxNumEnt{0};
43 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
44 const std::size_t numEnt = A.getNumEntriesInLocalRow(lclRow);
45 maxNumEnt = numEnt > maxNumEnt ? numEnt : maxNumEnt;
46 }
47 return maxNumEnt;
48}
49
50template <class SC, class LO, class GO, class NT>
51void forEachLocalRowMatrixRow(
53 const LO lclNumRows,
54 const std::size_t maxNumEnt,
55 std::function<void(
56 const LO lclRow,
57 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& /*ind*/,
58 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& /*val*/,
59 std::size_t /*numEnt*/)>
60 doForEachRow) {
61 using lids_type = typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type;
62 using vals_type = typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type;
63 lids_type indBuf("indices", maxNumEnt);
64 vals_type valBuf("values", maxNumEnt);
65
66 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
67 std::size_t numEnt = A.getNumEntriesInLocalRow(lclRow);
68 lids_type ind = Kokkos::subview(indBuf, std::make_pair((size_t)0, numEnt));
69 vals_type val = Kokkos::subview(valBuf, std::make_pair((size_t)0, numEnt));
70 A.getLocalRowCopy(lclRow, ind, val, numEnt);
71 doForEachRow(lclRow, ind, val, numEnt);
72 }
73}
74
75template <class SC, class LO, class GO, class NT>
76void forEachLocalRowMatrixRow(
78 std::function<void(
79 const LO lclRow,
80 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& /*ind*/,
81 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& /*val*/,
82 std::size_t /*numEnt*/)>
83 doForEachRow) {
84 const auto& rowMap = *(A.getRowMap());
85 const LO lclNumRows = static_cast<LO>(rowMap.getLocalNumElements());
86 const std::size_t maxNumEnt = lclMaxNumEntriesRowMatrix(A);
87
88 forEachLocalRowMatrixRow<SC, LO, GO, NT>(A, lclNumRows, maxNumEnt, doForEachRow);
89}
90
94template <class SC, class LO, class GO, class NT>
95void computeLocalRowScaledColumnNorms_RowMatrix(EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
96 typename NT::device_type>& result,
98 using KAT = KokkosKernels::ArithTraits<SC>;
99 using mag_type = typename KAT::mag_type;
100 using KAV = KokkosKernels::ArithTraits<typename KAT::val_type>;
101
102 auto rowNorms_h = Kokkos::create_mirror_view(result.rowNorms);
103
104 // DEEP_COPY REVIEW - NOT TESTED
105 Kokkos::deep_copy(rowNorms_h, result.rowNorms);
106 auto rowScaledColNorms_h = Kokkos::create_mirror_view(result.rowScaledColNorms);
107
109 [&](const LO lclRow,
110 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
111 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
112 std::size_t numEnt) {
113 const mag_type rowNorm = rowNorms_h[lclRow];
114 for (std::size_t k = 0; k < numEnt; ++k) {
115 const mag_type matrixAbsVal = KAV::abs(val[k]);
116 const LO lclCol = ind[k];
117
119 }
120 });
121
122 // DEEP_COPY REVIEW - NOT TESTED
123 Kokkos::deep_copy(result.rowScaledColNorms, rowScaledColNorms_h);
124}
125
128template <class SC, class LO, class GO, class NT>
131 using KAT = KokkosKernels::ArithTraits<SC>;
132 using val_type = typename KAT::val_type;
133 using KAV = KokkosKernels::ArithTraits<val_type>;
134 using mag_type = typename KAT::mag_type;
135 using KAM = KokkosKernels::ArithTraits<mag_type>;
136 using device_type = typename NT::device_type;
137 using equib_info_type = EquilibrationInfo<val_type, device_type>;
138
139 const auto& rowMap = *(A.getRowMap());
140 const auto& colMap = *(A.getColMap());
141 const LO lclNumRows = static_cast<LO>(rowMap.getLocalNumElements());
142 const LO lclNumCols = 0; // don't allocate column-related Views
143 constexpr bool assumeSymmetric = false; // doesn't matter here
144 equib_info_type result(lclNumRows, lclNumCols, assumeSymmetric);
145 auto result_h = result.createMirrorView();
146
147 const auto val_zero = KAV::zero();
148 const auto mag_zero = KAM::zero();
149
151 [&](const LO lclRow,
152 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
153 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
154 std::size_t numEnt) {
155 mag_type rowNorm = mag_zero;
156 val_type diagVal = val_zero;
157 const GO gblRow = rowMap.getGlobalElement(lclRow);
158 // OK if invalid(); then we simply won't find the diagonal entry.
159 const GO lclDiagColInd = colMap.getLocalElement(gblRow);
160
161 for (std::size_t k = 0; k < numEnt; ++k) {
162 const val_type matrixVal = val[k];
163 if (KAV::isInf(matrixVal)) {
164 result_h.foundInf = true;
165 }
166 if (KAV::isNan(matrixVal)) {
167 result_h.foundNan = true;
168 }
169 const mag_type matrixAbsVal = KAV::abs(matrixVal);
171 const LO lclCol = ind[k];
172 if (lclCol == lclDiagColInd) {
173 diagVal += val[k]; // repeats count additively
174 }
175 } // for each entry in row
176
177 // This is a local result. If the matrix has an overlapping
178 // row Map, then the global result might differ.
179 if (diagVal == KAV::zero()) {
180 result_h.foundZeroDiag = true;
181 }
182 if (rowNorm == KAM::zero()) {
183 result_h.foundZeroRowNorm = true;
184 }
185 // NOTE (mfh 24 May 2018) We could actually compute local
186 // rowScaledColNorms in situ at this point, if ! assumeSymmetric
187 // and row Map is the same as range Map (so that the local row
188 // norms are the same as the global row norms).
189 result_h.rowDiagonalEntries[lclRow] += diagVal;
190 result_h.rowNorms[lclRow] = rowNorm;
191 });
192
193 result.assign(result_h);
194 return result;
195}
196
199template <class SC, class LO, class GO, class NT>
202 const bool assumeSymmetric) {
203 using KAT = KokkosKernels::ArithTraits<SC>;
204 using val_type = typename KAT::val_type;
205 using KAV = KokkosKernels::ArithTraits<val_type>;
206 using mag_type = typename KAT::mag_type;
207 using KAM = KokkosKernels::ArithTraits<mag_type>;
208 using device_type = typename NT::device_type;
209
210 const auto& rowMap = *(A.getRowMap());
211 const auto& colMap = *(A.getColMap());
212 const LO lclNumRows = static_cast<LO>(rowMap.getLocalNumElements());
213 const LO lclNumCols = static_cast<LO>(colMap.getLocalNumElements());
214
216 auto result_h = result.createMirrorView();
217
218 const auto val_zero = KAV::zero();
219 const auto mag_zero = KAM::zero();
220
222 [&](const LO lclRow,
223 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_local_inds_host_view_type& ind,
224 const typename Tpetra::RowMatrix<SC, LO, GO, NT>::nonconst_values_host_view_type& val,
225 std::size_t numEnt) {
226 mag_type rowNorm = mag_zero;
227 val_type diagVal = val_zero;
228 const GO gblRow = rowMap.getGlobalElement(lclRow);
229 // OK if invalid(); then we simply won't find the diagonal entry.
230 const GO lclDiagColInd = colMap.getLocalElement(gblRow);
231
232 for (std::size_t k = 0; k < numEnt; ++k) {
233 const val_type matrixVal = val[k];
234 if (KAV::isInf(matrixVal)) {
235 result_h.foundInf = true;
236 }
237 if (KAV::isNan(matrixVal)) {
238 result_h.foundNan = true;
239 }
240 const mag_type matrixAbsVal = KAV::abs(matrixVal);
242 const LO lclCol = ind[k];
243 if (lclCol == lclDiagColInd) {
244 diagVal += val[k]; // repeats count additively
245 }
246 if (!assumeSymmetric) {
247 result_h.colNorms[lclCol] += matrixAbsVal;
248 }
249 } // for each entry in row
250
251 // This is a local result. If the matrix has an overlapping
252 // row Map, then the global result might differ.
253 if (diagVal == KAV::zero()) {
254 result_h.foundZeroDiag = true;
255 }
256 if (rowNorm == KAM::zero()) {
257 result_h.foundZeroRowNorm = true;
258 }
259 // NOTE (mfh 24 May 2018) We could actually compute local
260 // rowScaledColNorms in situ at this point, if ! assumeSymmetric
261 // and row Map is the same as range Map (so that the local row
262 // norms are the same as the global row norms).
263 result_h.rowDiagonalEntries[lclRow] += diagVal;
264 result_h.rowNorms[lclRow] = rowNorm;
265 if (!assumeSymmetric &&
266 lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid()) {
267 result_h.colDiagonalEntries[lclDiagColInd] += diagVal;
268 }
269 });
270
271 result.assign(result_h);
272 return result;
273}
274
275template <class SC, class LO, class GO, class NT>
276class ComputeLocalRowScaledColumnNorms {
277 public:
278 using crs_matrix_type = ::Tpetra::CrsMatrix<SC, LO, GO, NT>;
279 using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
280 using mag_type = typename KokkosKernels::ArithTraits<val_type>::mag_type;
281 using device_type = typename crs_matrix_type::device_type;
282 using policy_type = Kokkos::TeamPolicy<typename device_type::execution_space, LO>;
283
284 ComputeLocalRowScaledColumnNorms(const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
285 const Kokkos::View<const mag_type*, device_type>& rowNorms,
286 const crs_matrix_type& A)
287 : rowScaledColNorms_(rowScaledColNorms)
288 , rowNorms_(rowNorms)
289 , A_lcl_(A.getLocalMatrixDevice()) {}
290
291 KOKKOS_INLINE_FUNCTION void operator()(const typename policy_type::member_type& team) const {
292 using KAT = KokkosKernels::ArithTraits<val_type>;
293
294 const LO lclRow = team.league_rank();
295 const auto curRow = A_lcl_.rowConst(lclRow);
296 const mag_type rowNorm = rowNorms_[lclRow];
297 const LO numEnt = curRow.length;
298 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, numEnt), [&](const LO k) {
299 const mag_type matrixAbsVal = KAT::abs(curRow.value(k));
300 const LO lclCol = curRow.colidx(k);
301
302 Kokkos::atomic_add(&rowScaledColNorms_[lclCol], matrixAbsVal / rowNorm);
303 });
304 }
305
306 static void
307 run(const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
308 const Kokkos::View<const mag_type*, device_type>& rowNorms,
309 const crs_matrix_type& A) {
310 using functor_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
311
312 functor_type functor(rowScaledColNorms, rowNorms, A);
313 const LO lclNumRows =
314 static_cast<LO>(A.getRowMap()->getLocalNumElements());
315 Kokkos::parallel_for("computeLocalRowScaledColumnNorms",
316 policy_type(lclNumRows, Kokkos::AUTO), functor);
317 }
318
319 private:
320 Kokkos::View<mag_type*, device_type> rowScaledColNorms_;
321 Kokkos::View<const mag_type*, device_type> rowNorms_;
322
323 using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
324 local_matrix_device_type A_lcl_;
325};
326
327template <class SC, class LO, class GO, class NT>
328void computeLocalRowScaledColumnNorms_CrsMatrix(EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
329 typename NT::device_type>& result,
331 using impl_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
332 impl_type::run(result.rowScaledColNorms, result.rowNorms, A);
333}
334
335template <class SC, class LO, class GO, class NT>
336void computeLocalRowScaledColumnNorms(EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
337 typename NT::device_type>& result,
339 using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NT>;
340 using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
341 using mag_type = typename KokkosKernels::ArithTraits<val_type>::mag_type;
342 using device_type = typename NT::device_type;
343
344 auto colMapPtr = A.getColMap();
345 TEUCHOS_TEST_FOR_EXCEPTION(colMapPtr.get() == nullptr, std::invalid_argument,
346 "computeLocalRowScaledColumnNorms: "
347 "Input matrix A must have a nonnull column Map.");
348 const LO lclNumCols = static_cast<LO>(colMapPtr->getLocalNumElements());
349 if (static_cast<std::size_t>(result.rowScaledColNorms.extent(0)) !=
350 static_cast<std::size_t>(lclNumCols)) {
351 result.rowScaledColNorms =
352 Kokkos::View<mag_type*, device_type>("rowScaledColNorms", lclNumCols);
353 }
354
355 const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*>(&A);
356 if (A_crs == nullptr) {
358 } else {
359 computeLocalRowScaledColumnNorms_CrsMatrix(result, *A_crs);
360 }
361}
362
363// Kokkos::parallel_reduce functor that is part of the implementation
364// of computeLocalRowOneNorms_CrsMatrix.
365template <class SC, class LO, class GO, class NT>
366class ComputeLocalRowOneNorms {
367 public:
368 using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
369 using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
370 using local_matrix_device_type =
371 typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
372 using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
373 using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
374
375 ComputeLocalRowOneNorms(const equib_info_type& equib, // in/out
376 const local_matrix_device_type& A_lcl, // in
377 const local_map_type& rowMap, // in
378 const local_map_type& colMap)
379 : // in
380 equib_(equib)
381 , A_lcl_(A_lcl)
382 , rowMap_(rowMap)
383 , colMap_(colMap) {}
384
385 // (result & 1) != 0 means "found Inf."
386 // (result & 2) != 0 means "found NaN."
387 // (result & 4) != 0 means "found zero diag."
388 // (result & 8) != 0 means "found zero row norm."
389 // Pack into a single int so the reduction is cheaper,
390 // esp. on GPU.
391 using value_type = int;
392
393 KOKKOS_INLINE_FUNCTION void init(value_type& dst) const {
394 dst = 0;
395 }
396
397 KOKKOS_INLINE_FUNCTION void
398 join(value_type& dst,
399 const value_type& src) const {
400 dst |= src;
401 }
402
403 KOKKOS_INLINE_FUNCTION void
404 operator()(const typename policy_type::member_type& team, value_type& dst) const {
405 using KAT = KokkosKernels::ArithTraits<val_type>;
406 using mag_type = typename KAT::mag_type;
407 using KAM = KokkosKernels::ArithTraits<mag_type>;
408
409 const LO lclRow = team.league_rank();
410 const GO gblRow = rowMap_.getGlobalElement(lclRow);
411 // OK if invalid(); then we simply won't find the diagonal entry.
412 const GO lclDiagColInd = colMap_.getLocalElement(gblRow);
413
414 const auto curRow = A_lcl_.rowConst(lclRow);
415 const LO numEnt = curRow.length;
416
417 const auto val_zero = KAT::zero();
418 const auto mag_zero = KAM::zero();
419
420 mag_type rowNorm = mag_zero;
421 val_type diagVal = val_zero;
422 value_type dstThread{0};
423
424 Kokkos::parallel_reduce(
425 Kokkos::TeamThreadRange(team, numEnt), [&](const LO k, mag_type& normContrib, val_type& diagContrib, value_type& dstContrib) {
426 const val_type matrixVal = curRow.value(k);
427 if (KAT::isInf(matrixVal)) {
428 dstContrib |= 1;
429 }
430 if (KAT::isNan(matrixVal)) {
431 dstContrib |= 2;
432 }
433 const mag_type matrixAbsVal = KAT::abs(matrixVal);
434 normContrib += matrixAbsVal;
435 const LO lclCol = curRow.colidx(k);
436 if (lclCol == lclDiagColInd) {
437 diagContrib = curRow.value(k); // assume no repeats
438 }
439 },
440 Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread)); // for each entry in row
441
442 // This is a local result. If the matrix has an overlapping
443 // row Map, then the global result might differ.
444 Kokkos::single(Kokkos::PerTeam(team), [&]() {
445 dst |= dstThread;
446 if (diagVal == KAT::zero()) {
447 dst |= 4;
448 }
449 if (rowNorm == KAM::zero()) {
450 dst |= 8;
451 }
452 equib_.rowDiagonalEntries[lclRow] = diagVal;
453 equib_.rowNorms[lclRow] = rowNorm;
454 });
455 }
456
457 private:
458 equib_info_type equib_;
459 local_matrix_device_type A_lcl_;
460 local_map_type rowMap_;
461 local_map_type colMap_;
462};
463
464// Kokkos::parallel_reduce functor that is part of the implementation
465// of computeLocalRowAndColumnOneNorms_CrsMatrix.
466template <class SC, class LO, class GO, class NT>
467class ComputeLocalRowAndColumnOneNorms {
468 public:
469 using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
470 using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
471 using local_matrix_device_type = typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_device_type;
472 using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
473 using policy_type = Kokkos::TeamPolicy<typename local_matrix_device_type::execution_space, LO>;
474
475 public:
476 ComputeLocalRowAndColumnOneNorms(const equib_info_type& equib, // in/out
477 const local_matrix_device_type& A_lcl, // in
478 const local_map_type& rowMap, // in
479 const local_map_type& colMap)
480 : // in
481 equib_(equib)
482 , A_lcl_(A_lcl)
483 , rowMap_(rowMap)
484 , colMap_(colMap) {}
485
486 // (result & 1) != 0 means "found Inf."
487 // (result & 2) != 0 means "found NaN."
488 // (result & 4) != 0 means "found zero diag."
489 // (result & 8) != 0 means "found zero row norm."
490 // Pack into a single int so the reduction is cheaper,
491 // esp. on GPU.
492 using value_type = int;
493
494 KOKKOS_INLINE_FUNCTION void init(value_type& dst) const {
495 dst = 0;
496 }
497
498 KOKKOS_INLINE_FUNCTION void
499 join(value_type& dst,
500 const value_type& src) const {
501 dst |= src;
502 }
503
504 KOKKOS_INLINE_FUNCTION void
505 operator()(const typename policy_type::member_type& team, value_type& dst) const {
506 using KAT = KokkosKernels::ArithTraits<val_type>;
507 using mag_type = typename KAT::mag_type;
508 using KAM = KokkosKernels::ArithTraits<mag_type>;
509
510 const LO lclRow = team.league_rank();
511 const GO gblRow = rowMap_.getGlobalElement(lclRow);
512 // OK if invalid(); then we simply won't find the diagonal entry.
513 const GO lclDiagColInd = colMap_.getLocalElement(gblRow);
514
515 const auto curRow = A_lcl_.rowConst(lclRow);
516 const LO numEnt = curRow.length;
517
518 const auto val_zero = KAT::zero();
519 const auto mag_zero = KAM::zero();
520
521 mag_type rowNorm = mag_zero;
522 val_type diagVal = val_zero;
523 value_type dstThread{0};
524
525 Kokkos::parallel_reduce(
526 Kokkos::TeamThreadRange(team, numEnt), [&](const LO k, mag_type& normContrib, val_type& diagContrib, value_type& dstContrib) {
527 const val_type matrixVal = curRow.value(k);
528 if (KAT::isInf(matrixVal)) {
529 dstContrib |= 1;
530 }
531 if (KAT::isNan(matrixVal)) {
532 dstContrib |= 2;
533 }
534 const mag_type matrixAbsVal = KAT::abs(matrixVal);
535 normContrib += matrixAbsVal;
536 const LO lclCol = curRow.colidx(k);
537 if (lclCol == lclDiagColInd) {
538 diagContrib = curRow.value(k); // assume no repeats
539 }
540 if (!equib_.assumeSymmetric) {
541 Kokkos::atomic_add(&(equib_.colNorms[lclCol]), matrixAbsVal);
542 }
543 },
544 Kokkos::Sum<mag_type>(rowNorm), Kokkos::Sum<val_type>(diagVal), Kokkos::BOr<value_type>(dstThread)); // for each entry in row
545
546 // This is a local result. If the matrix has an overlapping
547 // row Map, then the global result might differ.
548 Kokkos::single(Kokkos::PerTeam(team), [&]() {
549 dst |= dstThread;
550 if (diagVal == KAT::zero()) {
551 dst |= 4;
552 }
553 if (rowNorm == KAM::zero()) {
554 dst |= 8;
555 }
556 // NOTE (mfh 24 May 2018) We could actually compute local
557 // rowScaledColNorms in situ at this point, if ! assumeSymmetric
558 // and row Map is the same as range Map (so that the local row
559 // norms are the same as the global row norms).
560 equib_.rowDiagonalEntries[lclRow] = diagVal;
561 equib_.rowNorms[lclRow] = rowNorm;
562 if (!equib_.assumeSymmetric &&
563 lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid()) {
564 // Don't need an atomic update here, since this lclDiagColInd is
565 // a one-to-one function of lclRow.
566 equib_.colDiagonalEntries[lclDiagColInd] += diagVal;
567 }
568 });
569 }
570
571 private:
572 equib_info_type equib_;
573 local_matrix_device_type A_lcl_;
574 local_map_type rowMap_;
575 local_map_type colMap_;
576};
577
580template <class SC, class LO, class GO, class NT>
581EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type, typename NT::device_type>
583 using execution_space = typename NT::device_type::execution_space;
584 using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
586 using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
587 using device_type = typename NT::device_type;
588 using equib_info_type = EquilibrationInfo<val_type, device_type>;
589
590 const LO lclNumRows = static_cast<LO>(A.getRowMap()->getLocalNumElements());
591 const LO lclNumCols = 0; // don't allocate column-related Views
592 constexpr bool assumeSymmetric = false; // doesn't matter here
593 equib_info_type equib(lclNumRows, lclNumCols, assumeSymmetric);
594
595 functor_type functor(equib, A.getLocalMatrixDevice(),
596 A.getRowMap()->getLocalMap(),
597 A.getColMap()->getLocalMap());
598 int result = 0;
599 Kokkos::parallel_reduce("computeLocalRowOneNorms",
600 policy_type(lclNumRows, Kokkos::AUTO), functor,
601 result);
602 equib.foundInf = (result & 1) != 0;
603 equib.foundNan = (result & 2) != 0;
604 equib.foundZeroDiag = (result & 4) != 0;
605 equib.foundZeroRowNorm = (result & 8) != 0;
606 return equib;
607}
608
611template <class SC, class LO, class GO, class NT>
614 const bool assumeSymmetric) {
615 using execution_space = typename NT::device_type::execution_space;
616 using policy_type = Kokkos::TeamPolicy<execution_space, LO>;
618 using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
619 using device_type = typename NT::device_type;
620 using equib_info_type = EquilibrationInfo<val_type, device_type>;
621
622 const LO lclNumRows = static_cast<LO>(A.getRowMap()->getLocalNumElements());
623 const LO lclNumCols = static_cast<LO>(A.getColMap()->getLocalNumElements());
624 equib_info_type equib(lclNumRows, lclNumCols, assumeSymmetric);
625
626 functor_type functor(equib, A.getLocalMatrixDevice(),
627 A.getRowMap()->getLocalMap(),
628 A.getColMap()->getLocalMap());
629 int result = 0;
630 Kokkos::parallel_reduce("computeLocalRowAndColumnOneNorms",
631 policy_type(lclNumRows, Kokkos::AUTO), functor,
632 result);
633 equib.foundInf = (result & 1) != 0;
634 equib.foundNan = (result & 2) != 0;
635 equib.foundZeroDiag = (result & 4) != 0;
636 equib.foundZeroRowNorm = (result & 8) != 0;
637 return equib;
638}
639
644template <class SC, class LO, class GO, class NT>
646 typename NT::device_type>
648 using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NT>;
649 const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*>(&A);
650
651 if (A_crs == nullptr) {
653 } else {
655 }
656}
657
679template <class SC, class LO, class GO, class NT>
682 const bool assumeSymmetric) {
683 using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NT>;
684 const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*>(&A);
685
686 if (A_crs == nullptr) {
687 return computeLocalRowAndColumnOneNorms_RowMatrix(A, assumeSymmetric);
688 } else {
689 return computeLocalRowAndColumnOneNorms_CrsMatrix(*A_crs, assumeSymmetric);
690 }
691}
692
693template <class SC, class LO, class GO, class NT>
694auto getLocalView_1d_readOnly(
696 const LO whichColumn)
697 -> decltype(Kokkos::subview(X.getLocalViewDevice(Access::ReadOnly),
698 Kokkos::ALL(), whichColumn)) {
699 if (X.isConstantStride()) {
700 return Kokkos::subview(X.getLocalViewDevice(Access::ReadOnly),
701 Kokkos::ALL(), whichColumn);
702 } else {
703 auto X_whichColumn = X.getVector(whichColumn);
704 return Kokkos::subview(X_whichColumn->getLocalViewDevice(Access::ReadOnly),
705 Kokkos::ALL(), 0);
706 }
707}
708
709template <class SC, class LO, class GO, class NT>
710auto getLocalView_1d_writeOnly(
712 const LO whichColumn)
713 -> decltype(Kokkos::subview(X.getLocalViewDevice(Access::ReadWrite),
714 Kokkos::ALL(), whichColumn)) {
715 if (X.isConstantStride()) {
716 return Kokkos::subview(X.getLocalViewDevice(Access::ReadWrite),
717 Kokkos::ALL(), whichColumn);
718 } else {
719 auto X_whichColumn = X.getVectorNonConst(whichColumn);
720 return Kokkos::subview(X_whichColumn->getLocalViewDevice(Access::ReadWrite),
721 Kokkos::ALL(), 0);
722 }
723}
724
725template <class SC, class LO, class GO, class NT, class ViewValueType>
726void copy1DViewIntoMultiVectorColumn(
728 const LO whichColumn,
729 const Kokkos::View<ViewValueType*, typename NT::device_type>& view) {
730 auto X_lcl = getLocalView_1d_writeOnly(X, whichColumn);
731 Tpetra::Details::copyConvert(X_lcl, view);
732}
733
734template <class SC, class LO, class GO, class NT, class ViewValueType>
735void copy1DViewIntoMultiVectorColumn_mag(
737 const LO whichColumn,
738 const Kokkos::View<ViewValueType*, typename NT::device_type>& view) {
739 using KAT = KokkosKernels::ArithTraits<ViewValueType>;
740 using execution_space = typename NT::execution_space;
741 using range_type = Kokkos::RangePolicy<execution_space, LO>;
742 auto X_lcl = getLocalView_1d_writeOnly(X, whichColumn);
743 Kokkos::parallel_for(
744 "",
745 range_type(0, X_lcl.extent(0)),
746 KOKKOS_LAMBDA(const LO i) {
747 X_lcl(i) = KAT::magnitude(view(i));
748 });
749}
750
751template <class SC, class LO, class GO, class NT, class ViewValueType>
752void copyMultiVectorColumnInto1DView(
753 const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
755 const LO whichColumn) {
756 auto X_lcl = getLocalView_1d_readOnly(X, whichColumn);
757 Tpetra::Details::copyConvert(view, X_lcl);
758}
759
760template <class SC, class LO, class GO, class NT, class ViewValueType>
761void copyMultiVectorColumnInto1DView_mag(
762 const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
764 const LO whichColumn) {
765 using implScalar = typename KokkosKernels::ArithTraits<SC>::val_type;
766 using KAT = KokkosKernels::ArithTraits<implScalar>;
767 using range_type = Kokkos::RangePolicy<typename NT::execution_space, LO>;
768 auto X_lcl = getLocalView_1d_readOnly(X, whichColumn);
769 Kokkos::parallel_for(
770 "",
771 range_type(0, X_lcl.extent(0)),
772 KOKKOS_LAMBDA(LO i) {
773 view(i) = KAT::magnitude(X_lcl(i));
774 });
775}
776
777template <class OneDViewType, class IndexType>
778class FindZero {
779 public:
780 static_assert(OneDViewType::rank == 1,
781 "OneDViewType must be a rank-1 Kokkos::View.");
782 static_assert(std::is_integral<IndexType>::value,
783 "IndexType must be a built-in integer type.");
784 FindZero(const OneDViewType& x)
785 : x_(x) {}
786 // Kokkos historically didn't like bool reduction results on CUDA,
787 // so we use int as the reduction result type.
788 KOKKOS_INLINE_FUNCTION void
789 operator()(const IndexType i, int& result) const {
790 using val_type = typename OneDViewType::non_const_value_type;
791 result = (x_(i) == KokkosKernels::ArithTraits<val_type>::zero()) ? 1 : result;
792 }
793
794 private:
795 OneDViewType x_;
796};
797
798template <class OneDViewType>
799bool findZero(const OneDViewType& x) {
800 using view_type = typename OneDViewType::const_type;
801 using execution_space = typename view_type::execution_space;
802 using size_type = typename view_type::size_type;
803 using functor_type = FindZero<view_type, size_type>;
804
805 Kokkos::RangePolicy<execution_space, size_type> range(0, x.extent(0));
806 range.set_chunk_size(500); // adjust as needed
807
808 int foundZero = 0;
809 Kokkos::parallel_reduce("findZero", range, functor_type(x), foundZero);
810 return foundZero == 1;
811}
812
813template <class SC, class LO, class GO, class NT>
814void globalizeRowOneNorms(EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
815 typename NT::device_type>& equib,
818
819 auto G = A.getGraph();
820 TEUCHOS_TEST_FOR_EXCEPTION(G.get() == nullptr, std::invalid_argument,
821 "globalizeRowOneNorms: Input RowMatrix A must have a nonnull graph "
822 "(that is, getGraph() must return nonnull).");
823 TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::invalid_argument,
824 "globalizeRowOneNorms: Input CrsGraph G must be fillComplete.");
825
826 auto exp = G->getExporter();
827 if (!exp.is_null()) {
828 // If the matrix has an overlapping row Map, first Export the
829 // local row norms with ADD CombineMode to a range Map Vector to
830 // get the global row norms, then reverse them back with REPLACE
831 // CombineMode to the row Map Vector. Ditto for the local row
832 // diagonal entries. Use SC instead of mag_type, so we can
833 // communicate both row norms and row diagonal entries at once.
834
835 // FIXME (mfh 16 May 2018) Clever DualView tricks could possibly
836 // avoid the local copy here.
837 mv_type rowMapMV(G->getRowMap(), 2, false);
838
839 copy1DViewIntoMultiVectorColumn(rowMapMV, 0, equib.rowNorms);
840 copy1DViewIntoMultiVectorColumn(rowMapMV, 1, equib.rowDiagonalEntries);
841 {
842 mv_type rangeMapMV(G->getRangeMap(), 2, true);
843 rangeMapMV.doExport(rowMapMV, *exp, Tpetra::ADD); // forward mode
844 rowMapMV.doImport(rangeMapMV, *exp, Tpetra::REPLACE); // reverse mode
845 }
846 copyMultiVectorColumnInto1DView_mag(equib.rowNorms, rowMapMV, 0);
847 copyMultiVectorColumnInto1DView(equib.rowDiagonalEntries, rowMapMV, 1);
848
849 // It's not common for users to solve linear systems with a
850 // nontrival Export, so it's OK for this to cost an additional
851 // pass over rowDiagonalEntries.
852 equib.foundZeroDiag = findZero(equib.rowDiagonalEntries);
853 equib.foundZeroRowNorm = findZero(equib.rowNorms);
854 }
855
856 constexpr int allReduceCount = 4;
857 int lclNaughtyMatrix[allReduceCount];
858 lclNaughtyMatrix[0] = equib.foundInf ? 1 : 0;
859 lclNaughtyMatrix[1] = equib.foundNan ? 1 : 0;
860 lclNaughtyMatrix[2] = equib.foundZeroDiag ? 1 : 0;
861 lclNaughtyMatrix[3] = equib.foundZeroRowNorm ? 1 : 0;
862
863 using Teuchos::outArg;
864 using Teuchos::REDUCE_MAX;
865 using Teuchos::reduceAll;
866 auto comm = G->getComm();
867 int gblNaughtyMatrix[allReduceCount];
868 reduceAll<int, int>(*comm, REDUCE_MAX, allReduceCount,
869 lclNaughtyMatrix, gblNaughtyMatrix);
870
871 equib.foundInf = gblNaughtyMatrix[0] == 1;
872 equib.foundNan = gblNaughtyMatrix[1] == 1;
873 equib.foundZeroDiag = gblNaughtyMatrix[2] == 1;
874 equib.foundZeroRowNorm = gblNaughtyMatrix[3] == 1;
875}
876
877template <class SC, class LO, class GO, class NT>
878void globalizeColumnOneNorms(EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
879 typename NT::device_type>& equib,
881 const bool assumeSymmetric) // if so, use row norms
882{
883 using val_type = typename KokkosKernels::ArithTraits<SC>::val_type;
884 using mag_type = typename KokkosKernels::ArithTraits<val_type>::mag_type;
886 using device_type = typename NT::device_type;
887
888 auto G = A.getGraph();
889 TEUCHOS_TEST_FOR_EXCEPTION(G.get() == nullptr, std::invalid_argument,
890 "globalizeColumnOneNorms: Input RowMatrix A must have a nonnull graph "
891 "(that is, getGraph() must return nonnull).");
892 TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::invalid_argument,
893 "globalizeColumnOneNorms: Input CrsGraph G must be fillComplete.");
894
895 auto imp = G->getImporter();
896 if (assumeSymmetric) {
897 const LO numCols = 2;
898 // Redistribute local row info to global column info.
899
900 // Get the data into a MultiVector on the domain Map.
901 mv_type rowNorms_domMap(G->getDomainMap(), numCols, false);
902 const bool rowMapSameAsDomainMap = G->getRowMap()->isSameAs(*(G->getDomainMap()));
903 if (rowMapSameAsDomainMap) {
904 copy1DViewIntoMultiVectorColumn(rowNorms_domMap, 0, equib.rowNorms);
905 copy1DViewIntoMultiVectorColumn_mag(rowNorms_domMap, 1, equib.rowDiagonalEntries);
906 } else {
907 // This is not a common case; it would normally arise when the
908 // matrix has an overlapping row Map.
909 Tpetra::Export<LO, GO, NT> rowToDom(G->getRowMap(), G->getDomainMap());
910 mv_type rowNorms_rowMap(G->getRowMap(), numCols, true);
911 copy1DViewIntoMultiVectorColumn(rowNorms_rowMap, 0, equib.rowNorms);
912 copy1DViewIntoMultiVectorColumn_mag(rowNorms_rowMap, 1, equib.rowDiagonalEntries);
913 rowNorms_domMap.doExport(rowNorms_rowMap, rowToDom, Tpetra::REPLACE);
914 }
915
916 // Use the existing Import to redistribute the row norms from the
917 // domain Map to the column Map.
918 std::unique_ptr<mv_type> rowNorms_colMap;
919 if (imp.is_null()) {
920 // Shallow copy of rowNorms_domMap.
921 rowNorms_colMap =
922 std::unique_ptr<mv_type>(new mv_type(rowNorms_domMap, *(G->getColMap())));
923 } else {
924 rowNorms_colMap =
925 std::unique_ptr<mv_type>(new mv_type(G->getColMap(), numCols, true));
926 rowNorms_colMap->doImport(rowNorms_domMap, *imp, Tpetra::REPLACE);
927 }
928
929 // Make sure the result has allocations of the right size.
930 const LO lclNumCols =
931 static_cast<LO>(G->getColMap()->getLocalNumElements());
932 if (static_cast<LO>(equib.colNorms.extent(0)) != lclNumCols) {
933 equib.colNorms =
934 Kokkos::View<mag_type*, device_type>("colNorms", lclNumCols);
935 }
936 if (static_cast<LO>(equib.colDiagonalEntries.extent(0)) != lclNumCols) {
937 equib.colDiagonalEntries =
938 Kokkos::View<val_type*, device_type>("colDiagonalEntries", lclNumCols);
939 }
940
941 // Copy row norms and diagonal entries, appropriately
942 // redistributed, into column norms resp. diagonal entries.
943 copyMultiVectorColumnInto1DView(equib.colNorms, *rowNorms_colMap, 0);
944 copyMultiVectorColumnInto1DView(equib.colDiagonalEntries, *rowNorms_colMap, 1);
945 } else {
946 if (!imp.is_null()) {
947 const LO numCols = 3;
948 // If the matrix has an overlapping column Map (this is usually
949 // the case), first Export (reverse-mode Import) the local info
950 // to a domain Map Vector to get the global info, then Import
951 // them back with REPLACE CombineMode to the column Map Vector.
952 // Ditto for the row-scaled column norms.
953
954 // FIXME (mfh 16 May 2018) Clever DualView tricks could possibly
955 // avoid the local copy here.
956 mv_type colMapMV(G->getColMap(), numCols, false);
957
958 copy1DViewIntoMultiVectorColumn(colMapMV, 0, equib.colNorms);
959 copy1DViewIntoMultiVectorColumn_mag(colMapMV, 1, equib.colDiagonalEntries);
960 copy1DViewIntoMultiVectorColumn(colMapMV, 2, equib.rowScaledColNorms);
961 {
962 mv_type domainMapMV(G->getDomainMap(), numCols, true);
963 domainMapMV.doExport(colMapMV, *imp, Tpetra::ADD); // reverse mode
964 colMapMV.doImport(domainMapMV, *imp, Tpetra::REPLACE); // forward mode
965 }
966 copyMultiVectorColumnInto1DView(equib.colNorms, colMapMV, 0);
967 copyMultiVectorColumnInto1DView(equib.colDiagonalEntries, colMapMV, 1);
968 copyMultiVectorColumnInto1DView(equib.rowScaledColNorms, colMapMV, 2);
969 }
970 }
971}
972
973} // namespace Details
974
975template <class SC, class LO, class GO, class NT>
976Details::EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
977 typename NT::device_type>
979 TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::invalid_argument,
980 "computeRowOneNorms: Input matrix A must be fillComplete.");
982
983 Details::globalizeRowOneNorms(result, A);
984 return result;
985}
986
987template <class SC, class LO, class GO, class NT>
988Details::EquilibrationInfo<typename KokkosKernels::ArithTraits<SC>::val_type,
989 typename NT::device_type>
991 const bool assumeSymmetric) {
992 TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::invalid_argument,
993 "computeRowAndColumnOneNorms: Input matrix A must be fillComplete.");
994 auto result = Details::computeLocalRowAndColumnOneNorms(A, assumeSymmetric);
995
996 Details::globalizeRowOneNorms(result, A);
997 if (!assumeSymmetric) {
998 // Row-norm-scaled column norms are trivial if the matrix is
999 // symmetric, since the row norms and column norms are the same in
1000 // that case.
1001 Details::computeLocalRowScaledColumnNorms(result, A);
1002 }
1003 Details::globalizeColumnOneNorms(result, A, assumeSymmetric);
1004 return result;
1005}
1006
1007} // namespace Tpetra
1008
1009//
1010// Explicit instantiation macro
1011//
1012// Must be expanded from within the Tpetra namespace!
1013//
1014
1015#define TPETRA_COMPUTEROWANDCOLUMNONENORMS_INSTANT(SC, LO, GO, NT) \
1016 template Details::EquilibrationInfo<KokkosKernels::ArithTraits<SC>::val_type, NT::device_type> \
1017 computeRowOneNorms(const Tpetra::RowMatrix<SC, LO, GO, NT>& A); \
1018 \
1019 template Details::EquilibrationInfo<KokkosKernels::ArithTraits<SC>::val_type, NT::device_type> \
1020 computeRowAndColumnOneNorms(const Tpetra::RowMatrix<SC, LO, GO, NT>& A, \
1021 const bool assumeSymmetric);
1022
1023#endif // TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
Declaration of Tpetra::Details::EquilibrationInfo.
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular,...
typename Node::device_type device_type
The Kokkos device type.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Struct that holds views of the contents of a CrsMatrix.
Implementation details of Tpetra.
EquilibrationInfo< typename KokkosKernels::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::CrsMatrix.
void computeLocalRowScaledColumnNorms_RowMatrix(EquilibrationInfo< typename KokkosKernels::ArithTraits< SC >::val_type, typename NT::device_type > &result, const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
For a given Tpetra::RowMatrix that is not a Tpetra::CrsMatrix, assume that result....
EquilibrationInfo< typename KokkosKernels::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Implementation of computeLocalRowOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsMatrix.
EquilibrationInfo< typename KokkosKernels::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute LOCAL row one-norms ("row sums" etc.) of the input sparse matrix A.
EquilibrationInfo< typename KokkosKernels::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::CrsMatrix.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length....
EquilibrationInfo< typename KokkosKernels::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute LOCAL row and column one-norms ("row sums" etc.) of the input sparse matrix A....
EquilibrationInfo< typename KokkosKernels::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsM...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Details::EquilibrationInfo< typename KokkosKernels::ArithTraits< SC >::val_type, typename NT::device_type > computeRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute global row one-norms ("row sums") of the input sparse matrix A, in a way suitable for one-sid...
Details::EquilibrationInfo< typename KokkosKernels::ArithTraits< SC >::val_type, typename NT::device_type > computeRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute global row and column one-norms ("row sums" and "column sums") of the input sparse matrix A,...
@ REPLACE
Replace existing values with new values.
@ ADD
Sum new values.
Struct storing results of Tpetra::computeRowAndColumnOneNorms.