Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10// mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both
11// incorrect (as() is not a device function) and usually irrelevant
12// (it would only matter if LocalOrdinal were bigger than size_t on a
13// particular platform, which is unlikely).
14
15// KDD Aug 2020: In the permute/pack/unpack functors,
16// the Enabled template parameter is specialized in
17// downstream packages like Stokhos using SFINAE to provide partial
18// specializations based on the scalar type of the SrcView and DstView
19// template parameters. See #7898.
20// Do not remove it before checking with Stokhos and other specializing users.
21
22#ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
23#define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
24
25#include "Kokkos_Core.hpp"
26#if KOKKOS_VERSION >= 40799
27#include "KokkosKernels_ArithTraits.hpp"
28#else
29#include "Kokkos_ArithTraits.hpp"
30#endif
31#include <sstream>
32#include <stdexcept>
33
34namespace Tpetra {
35namespace KokkosRefactor {
36namespace Details {
37
43namespace Impl {
44
51template <class IntegerType,
52 const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
54 static KOKKOS_INLINE_FUNCTION bool
55 test(const IntegerType x,
57};
58
59// Partial specialization for the case where IntegerType IS signed.
60template <class IntegerType>
62 static KOKKOS_INLINE_FUNCTION bool
63 test(const IntegerType x,
66 }
67};
68
69// Partial specialization for the case where IntegerType is NOT signed.
70template <class IntegerType>
71struct OutOfBounds<IntegerType, false> {
72 static KOKKOS_INLINE_FUNCTION bool
73 test(const IntegerType x,
75 return x >= exclusiveUpperBound;
76 }
77};
78
81template <class IntegerType>
82KOKKOS_INLINE_FUNCTION bool
86
87} // namespace Impl
88
89// Functors for implementing packAndPrepare and unpackAndCombine
90// through parallel_for
91
92template <typename DstView, typename SrcView, typename IdxView,
93 typename Enabled = void>
94struct PackArraySingleColumn {
95 typedef typename DstView::execution_space execution_space;
96 typedef typename execution_space::size_type size_type;
97
98 DstView dst;
99 SrcView src;
100 IdxView idx;
101 size_t col;
102
103 PackArraySingleColumn(const DstView& dst_,
104 const SrcView& src_,
105 const IdxView& idx_,
106 const size_t col_)
107 : dst(dst_)
108 , src(src_)
109 , idx(idx_)
110 , col(col_) {}
111
113 operator()(const size_type k) const {
114 dst(k) = src(idx(k), col);
115 }
116
117 static void
118 pack(const DstView& dst,
119 const SrcView& src,
120 const IdxView& idx,
121 const size_t col,
122 const execution_space& space) {
123 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
124 Kokkos::parallel_for("Tpetra::MultiVector pack one col",
125 range_type(space, 0, idx.size()),
126 PackArraySingleColumn(dst, src, idx, col));
127 }
128};
129
130template <typename DstView,
131 typename SrcView,
132 typename IdxView,
133 typename SizeType = typename DstView::execution_space::size_type,
134 typename Enabled = void>
135class PackArraySingleColumnWithBoundsCheck {
136 private:
137 static_assert(Kokkos::is_view<DstView>::value,
138 "DstView must be a Kokkos::View.");
139 static_assert(Kokkos::is_view<SrcView>::value,
140 "SrcView must be a Kokkos::View.");
141 static_assert(Kokkos::is_view<IdxView>::value,
142 "IdxView must be a Kokkos::View.");
143 static_assert(static_cast<int>(DstView::rank) == 1,
144 "DstView must be a rank-1 Kokkos::View.");
145 static_assert(static_cast<int>(SrcView::rank) == 2,
146 "SrcView must be a rank-2 Kokkos::View.");
147 static_assert(static_cast<int>(IdxView::rank) == 1,
148 "IdxView must be a rank-1 Kokkos::View.");
149 static_assert(std::is_integral<SizeType>::value,
150 "SizeType must be a built-in integer type.");
151
152 using execution_space = typename DstView::execution_space;
153
154 public:
155 typedef SizeType size_type;
156 using value_type = size_t;
157
158 private:
159 DstView dst;
160 SrcView src;
161 IdxView idx;
162 size_type col;
163 execution_space space;
164
165 public:
166 PackArraySingleColumnWithBoundsCheck(const DstView& dst_,
167 const SrcView& src_,
168 const IdxView& idx_,
169 const size_type col_)
170 : dst(dst_)
171 , src(src_)
172 , idx(idx_)
173 , col(col_) {}
174
175 KOKKOS_INLINE_FUNCTION void
176 operator()(const size_type k, value_type& lclErrCount) const {
177 using index_type = typename IdxView::non_const_value_type;
178
179 const index_type lclRow = idx(k);
180 if (lclRow < static_cast<index_type>(0) ||
181 lclRow >= static_cast<index_type>(src.extent(0))) {
182 ++lclErrCount;
183 } else {
184 dst(k) = src(lclRow, col);
185 }
186 }
187
188 KOKKOS_INLINE_FUNCTION
189 void init(value_type& initialErrorCount) const {
190 initialErrorCount = 0;
191 }
192
193 KOKKOS_INLINE_FUNCTION void
194 join(value_type& dstErrorCount,
195 const value_type& srcErrorCount) const {
196 dstErrorCount += srcErrorCount;
197 }
198
199 static void
200 pack(const DstView& dst,
201 const SrcView& src,
202 const IdxView& idx,
203 const size_type col,
204 const execution_space& space) {
205 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
206 typedef typename IdxView::non_const_value_type index_type;
207
208 size_t errorCount = 0;
209 Kokkos::parallel_reduce("Tpetra::MultiVector pack one col debug only",
210 range_type(space, 0, idx.size()),
211 PackArraySingleColumnWithBoundsCheck(dst, src, idx, col),
212 errorCount);
213
214 if (errorCount != 0) {
215 // Go back and find the out-of-bounds entries in the index
216 // array. Performance doesn't matter since we are already in
217 // an error state, so we can do this sequentially, on host.
218 auto idx_h = Kokkos::create_mirror_view(idx);
219
220 // DEEP_COPY REVIEW - NOT TESTED
221 Kokkos::deep_copy(idx_h, idx);
222
223 std::vector<index_type> badIndices;
224 const size_type numInds = idx_h.extent(0);
225 for (size_type k = 0; k < numInds; ++k) {
226 if (idx_h(k) < static_cast<index_type>(0) ||
227 idx_h(k) >= static_cast<index_type>(src.extent(0))) {
228 badIndices.push_back(idx_h(k));
229 }
230 }
231
232 TEUCHOS_TEST_FOR_EXCEPTION(errorCount != badIndices.size(), std::logic_error,
233 "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
234 << " != badIndices.size() = " << badIndices.size() << ". This sho"
235 "uld never happen. Please report this to the Tpetra developers.");
236
237 std::ostringstream os;
238 os << "MultiVector single-column pack kernel had "
239 << badIndices.size() << " out-of bounds index/ices. "
240 "Here they are: [";
241 for (size_t k = 0; k < badIndices.size(); ++k) {
242 os << badIndices[k];
243 if (k + 1 < badIndices.size()) {
244 os << ", ";
245 }
246 }
247 os << "].";
248 throw std::runtime_error(os.str());
249 }
250 }
251};
252
253template <typename DstView, typename SrcView, typename IdxView>
254void pack_array_single_column(const DstView& dst,
255 const SrcView& src,
256 const IdxView& idx,
257 const size_t col,
258 const bool debug,
259 const typename DstView::execution_space& space) {
260 static_assert(Kokkos::is_view<DstView>::value,
261 "DstView must be a Kokkos::View.");
262 static_assert(Kokkos::is_view<SrcView>::value,
263 "SrcView must be a Kokkos::View.");
264 static_assert(Kokkos::is_view<IdxView>::value,
265 "IdxView must be a Kokkos::View.");
266 static_assert(static_cast<int>(DstView::rank) == 1,
267 "DstView must be a rank-1 Kokkos::View.");
268 static_assert(static_cast<int>(SrcView::rank) == 2,
269 "SrcView must be a rank-2 Kokkos::View.");
270 static_assert(static_cast<int>(IdxView::rank) == 1,
271 "IdxView must be a rank-1 Kokkos::View.");
272
273 using execution_space = typename DstView::execution_space;
274
275 static_assert(Kokkos::SpaceAccessibility<execution_space,
276 typename DstView::memory_space>::accessible,
277 "DstView not accessible from execution space");
278 static_assert(Kokkos::SpaceAccessibility<execution_space,
279 typename SrcView::memory_space>::accessible,
280 "SrcView not accessible from execution space");
281 static_assert(Kokkos::SpaceAccessibility<execution_space,
282 typename IdxView::memory_space>::accessible,
283 "IdxView not accessible from execution space");
284
285 if (debug) {
286 typedef PackArraySingleColumnWithBoundsCheck<DstView, SrcView, IdxView> impl_type;
287 impl_type::pack(dst, src, idx, col, space);
288 } else {
289 typedef PackArraySingleColumn<DstView, SrcView, IdxView> impl_type;
290 impl_type::pack(dst, src, idx, col, space);
291 }
292}
293
296template <typename DstView, typename SrcView, typename IdxView>
297void pack_array_single_column(const DstView& dst,
298 const SrcView& src,
299 const IdxView& idx,
300 const size_t col,
301 const bool debug = true) {
302 pack_array_single_column(dst, src, idx, col, debug, typename DstView::execution_space());
303}
304
305template <typename DstView, typename SrcView, typename IdxView,
306 typename Enabled = void>
307struct PackArrayMultiColumn {
308 using execution_space = typename DstView::execution_space;
309 typedef typename execution_space::size_type size_type;
310
311 DstView dst;
312 SrcView src;
313 IdxView idx;
314 size_t numCols;
315
316 PackArrayMultiColumn(const DstView& dst_,
317 const SrcView& src_,
318 const IdxView& idx_,
319 const size_t numCols_)
320 : dst(dst_)
321 , src(src_)
322 , idx(idx_)
323 , numCols(numCols_) {}
324
325 KOKKOS_INLINE_FUNCTION void
326 operator()(const size_type k) const {
327 const typename IdxView::value_type localRow = idx(k);
328 const size_t offset = k * numCols;
329 for (size_t j = 0; j < numCols; ++j) {
330 dst(offset + j) = src(localRow, j);
331 }
332 }
333
334 static void pack(const DstView& dst,
335 const SrcView& src,
336 const IdxView& idx,
337 size_t numCols,
338 const execution_space& space) {
339 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
340 Kokkos::parallel_for("Tpetra::MultiVector pack multicol const stride",
341 range_type(space, 0, idx.size()),
342 PackArrayMultiColumn(dst, src, idx, numCols));
343 }
344};
345
346template <typename DstView,
347 typename SrcView,
348 typename IdxView,
349 typename SizeType = typename DstView::execution_space::size_type,
350 typename Enabled = void>
351class PackArrayMultiColumnWithBoundsCheck {
352 public:
353 using size_type = SizeType;
354 using value_type = size_t;
355 using execution_space = typename DstView::execution_space;
356
357 private:
358 DstView dst;
359 SrcView src;
360 IdxView idx;
361 size_type numCols;
362
363 public:
364 PackArrayMultiColumnWithBoundsCheck(const DstView& dst_,
365 const SrcView& src_,
366 const IdxView& idx_,
367 const size_type numCols_)
368 : dst(dst_)
369 , src(src_)
370 , idx(idx_)
371 , numCols(numCols_) {}
372
373 KOKKOS_INLINE_FUNCTION void
374 operator()(const size_type k, value_type& lclErrorCount) const {
375 typedef typename IdxView::non_const_value_type index_type;
376
377 const index_type lclRow = idx(k);
378 if (lclRow < static_cast<index_type>(0) ||
379 lclRow >= static_cast<index_type>(src.extent(0))) {
380 ++lclErrorCount; // failed
381 } else {
382 const size_type offset = k * numCols;
383 for (size_type j = 0; j < numCols; ++j) {
384 dst(offset + j) = src(lclRow, j);
385 }
386 }
387 }
388
389 KOKKOS_INLINE_FUNCTION
390 void init(value_type& initialErrorCount) const {
391 initialErrorCount = 0;
392 }
393
394 KOKKOS_INLINE_FUNCTION void
395 join(value_type& dstErrorCount,
396 const value_type& srcErrorCount) const {
397 dstErrorCount += srcErrorCount;
398 }
399
400 static void
401 pack(const DstView& dst,
402 const SrcView& src,
403 const IdxView& idx,
404 const size_type numCols,
405 const execution_space& space) {
406 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
407 typedef typename IdxView::non_const_value_type index_type;
408
409 size_t errorCount = 0;
410 Kokkos::parallel_reduce("Tpetra::MultiVector pack multicol const stride debug only",
411 range_type(space, 0, idx.size()),
412 PackArrayMultiColumnWithBoundsCheck(dst, src, idx, numCols),
413 errorCount);
414 if (errorCount != 0) {
415 // Go back and find the out-of-bounds entries in the index
416 // array. Performance doesn't matter since we are already in
417 // an error state, so we can do this sequentially, on host.
418 auto idx_h = Kokkos::create_mirror_view(idx);
419
420 // DEEP_COPY REVIEW - NOT TESTED
421 Kokkos::deep_copy(idx_h, idx);
422
423 std::vector<index_type> badIndices;
424 const size_type numInds = idx_h.extent(0);
425 for (size_type k = 0; k < numInds; ++k) {
426 if (idx_h(k) < static_cast<index_type>(0) ||
427 idx_h(k) >= static_cast<index_type>(src.extent(0))) {
428 badIndices.push_back(idx_h(k));
429 }
430 }
431
432 TEUCHOS_TEST_FOR_EXCEPTION(errorCount != badIndices.size(), std::logic_error,
433 "PackArrayMultiColumnWithBoundsCheck: errorCount = " << errorCount
434 << " != badIndices.size() = " << badIndices.size() << ". This sho"
435 "uld never happen. Please report this to the Tpetra developers.");
436
437 std::ostringstream os;
438 os << "Tpetra::MultiVector multiple-column pack kernel had "
439 << badIndices.size() << " out-of bounds index/ices (errorCount = "
440 << errorCount << "): [";
441 for (size_t k = 0; k < badIndices.size(); ++k) {
442 os << badIndices[k];
443 if (k + 1 < badIndices.size()) {
444 os << ", ";
445 }
446 }
447 os << "].";
448 throw std::runtime_error(os.str());
449 }
450 }
451};
452
453template <typename DstView,
454 typename SrcView,
455 typename IdxView>
456void pack_array_multi_column(const DstView& dst,
457 const SrcView& src,
458 const IdxView& idx,
459 const size_t numCols,
460 const bool debug,
461 const typename DstView::execution_space& space) {
462 static_assert(Kokkos::is_view<DstView>::value,
463 "DstView must be a Kokkos::View.");
464 static_assert(Kokkos::is_view<SrcView>::value,
465 "SrcView must be a Kokkos::View.");
466 static_assert(Kokkos::is_view<IdxView>::value,
467 "IdxView must be a Kokkos::View.");
468 static_assert(static_cast<int>(DstView::rank) == 1,
469 "DstView must be a rank-1 Kokkos::View.");
470 static_assert(static_cast<int>(SrcView::rank) == 2,
471 "SrcView must be a rank-2 Kokkos::View.");
472 static_assert(static_cast<int>(IdxView::rank) == 1,
473 "IdxView must be a rank-1 Kokkos::View.");
474
475 using execution_space = typename DstView::execution_space;
476
477 static_assert(Kokkos::SpaceAccessibility<execution_space,
478 typename DstView::memory_space>::accessible,
479 "DstView not accessible from execution space");
480 static_assert(Kokkos::SpaceAccessibility<execution_space,
481 typename SrcView::memory_space>::accessible,
482 "SrcView not accessible from execution space");
483 static_assert(Kokkos::SpaceAccessibility<execution_space,
484 typename IdxView::memory_space>::accessible,
485 "IdxView not accessible from execution space");
486
487 if (debug) {
488 typedef PackArrayMultiColumnWithBoundsCheck<DstView,
489 SrcView, IdxView>
490 impl_type;
491 impl_type::pack(dst, src, idx, numCols, space);
492 } else {
493 typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
494 impl_type::pack(dst, src, idx, numCols, space);
495 }
496}
497
498template <typename DstView,
499 typename SrcView,
500 typename IdxView>
501void pack_array_multi_column(const DstView& dst,
502 const SrcView& src,
503 const IdxView& idx,
504 const size_t numCols,
505 const bool debug = true) {
506 pack_array_multi_column(dst, src, idx, numCols, debug, typename DstView::execution_space());
507}
508
509template <typename DstView, typename SrcView, typename IdxView,
510 typename ColView, typename Enabled = void>
511struct PackArrayMultiColumnVariableStride {
512 using execution_space = typename DstView::execution_space;
513 typedef typename execution_space::size_type size_type;
514
515 DstView dst;
516 SrcView src;
517 IdxView idx;
518 ColView col;
519 size_t numCols;
520
521 PackArrayMultiColumnVariableStride(const DstView& dst_,
522 const SrcView& src_,
523 const IdxView& idx_,
524 const ColView& col_,
525 const size_t numCols_)
526 : dst(dst_)
527 , src(src_)
528 , idx(idx_)
529 , col(col_)
530 , numCols(numCols_) {}
531
532 KOKKOS_INLINE_FUNCTION
533 void operator()(const size_type k) const {
534 const typename IdxView::value_type localRow = idx(k);
535 const size_t offset = k * numCols;
536 for (size_t j = 0; j < numCols; ++j) {
537 dst(offset + j) = src(localRow, col(j));
538 }
539 }
540
541 static void pack(const DstView& dst,
542 const SrcView& src,
543 const IdxView& idx,
544 const ColView& col,
545 size_t numCols,
546 const execution_space& space) {
547 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
548 Kokkos::parallel_for("Tpetra::MultiVector pack multicol var stride",
549 range_type(space, 0, idx.size()),
550 PackArrayMultiColumnVariableStride(dst, src, idx, col, numCols));
551 }
552};
553
554template <typename DstView,
555 typename SrcView,
556 typename IdxView,
557 typename ColView,
558 typename SizeType = typename DstView::execution_space::size_type,
559 typename Enabled = void>
560class PackArrayMultiColumnVariableStrideWithBoundsCheck {
561 public:
562 using size_type = SizeType;
563 using value_type = size_t;
564 using execution_space = typename DstView::execution_space;
565
566 private:
567 DstView dst;
568 SrcView src;
569 IdxView idx;
570 ColView col;
571 size_type numCols;
572
573 public:
574 PackArrayMultiColumnVariableStrideWithBoundsCheck(const DstView& dst_,
575 const SrcView& src_,
576 const IdxView& idx_,
577 const ColView& col_,
578 const size_type numCols_)
579 : dst(dst_)
580 , src(src_)
581 , idx(idx_)
582 , col(col_)
583 , numCols(numCols_) {}
584
585 KOKKOS_INLINE_FUNCTION void
586 operator()(const size_type k, value_type& lclErrorCount) const {
587 typedef typename IdxView::non_const_value_type row_index_type;
588 typedef typename ColView::non_const_value_type col_index_type;
589
590 const row_index_type lclRow = idx(k);
591 if (lclRow < static_cast<row_index_type>(0) ||
592 lclRow >= static_cast<row_index_type>(src.extent(0))) {
593 ++lclErrorCount = 0;
594 } else {
595 const size_type offset = k * numCols;
596 for (size_type j = 0; j < numCols; ++j) {
597 const col_index_type lclCol = col(j);
598 if (Impl::outOfBounds<col_index_type>(lclCol, src.extent(1))) {
599 ++lclErrorCount = 0;
600 } else { // all indices are valid; do the assignment
601 dst(offset + j) = src(lclRow, lclCol);
602 }
603 }
604 }
605 }
606
607 KOKKOS_INLINE_FUNCTION void
608 init(value_type& initialErrorCount) const {
609 initialErrorCount = 0;
610 }
611
612 KOKKOS_INLINE_FUNCTION void
613 join(value_type& dstErrorCount,
614 const value_type& srcErrorCount) const {
615 dstErrorCount += srcErrorCount;
616 }
617
618 static void
619 pack(const DstView& dst,
620 const SrcView& src,
621 const IdxView& idx,
622 const ColView& col,
623 const size_type numCols,
624 const execution_space& space) {
625 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
626 using row_index_type = typename IdxView::non_const_value_type;
627 using col_index_type = typename ColView::non_const_value_type;
628
629 size_t errorCount = 0;
630 Kokkos::parallel_reduce("Tpetra::MultiVector pack multicol var stride debug only",
631 range_type(space, 0, idx.size()),
632 PackArrayMultiColumnVariableStrideWithBoundsCheck(dst, src, idx,
633 col, numCols),
634 errorCount);
635 if (errorCount != 0) {
636 constexpr size_t maxNumBadIndicesToPrint = 100;
637
638 std::ostringstream os; // for error reporting
639 os << "Tpetra::MultiVector multicolumn variable stride pack kernel "
640 "found "
641 << errorCount
642 << " error" << (errorCount != size_t(1) ? "s" : "") << ". ";
643
644 // Go back and find any out-of-bounds entries in the array of
645 // row indices. Performance doesn't matter since we are already
646 // in an error state, so we can do this sequentially, on host.
647 auto idx_h = Kokkos::create_mirror_view(idx);
648
649 // DEEP_COPY REVIEW - NOT TESTED
650 Kokkos::deep_copy(idx_h, idx);
651
652 std::vector<row_index_type> badRows;
653 const size_type numRowInds = idx_h.extent(0);
654 for (size_type k = 0; k < numRowInds; ++k) {
655 if (Impl::outOfBounds<row_index_type>(idx_h(k), src.extent(0))) {
656 badRows.push_back(idx_h(k));
657 }
658 }
659
660 if (badRows.size() != 0) {
661 os << badRows.size() << " out-of-bounds row ind"
662 << (badRows.size() != size_t(1) ? "ices" : "ex");
663 if (badRows.size() <= maxNumBadIndicesToPrint) {
664 os << ": [";
665 for (size_t k = 0; k < badRows.size(); ++k) {
666 os << badRows[k];
667 if (k + 1 < badRows.size()) {
668 os << ", ";
669 }
670 }
671 os << "]. ";
672 } else {
673 os << ". ";
674 }
675 } else {
676 os << "No out-of-bounds row indices. ";
677 }
678
679 // Go back and find any out-of-bounds entries in the array
680 // of column indices.
681 auto col_h = Kokkos::create_mirror_view(col);
682
683 // DEEP_COPY REVIEW - NOT TESTED
684 Kokkos::deep_copy(col_h, col);
685
686 std::vector<col_index_type> badCols;
687 const size_type numColInds = col_h.extent(0);
688 for (size_type k = 0; k < numColInds; ++k) {
689 if (Impl::outOfBounds<col_index_type>(col_h(k), src.extent(1))) {
690 badCols.push_back(col_h(k));
691 }
692 }
693
694 if (badCols.size() != 0) {
695 os << badCols.size() << " out-of-bounds column ind"
696 << (badCols.size() != size_t(1) ? "ices" : "ex");
697 if (badCols.size() <= maxNumBadIndicesToPrint) {
698 os << ": [";
699 for (size_t k = 0; k < badCols.size(); ++k) {
700 os << badCols[k];
701 if (k + 1 < badCols.size()) {
702 os << ", ";
703 }
704 }
705 os << "]. ";
706 } else {
707 os << ". ";
708 }
709 } else {
710 os << "No out-of-bounds column indices. ";
711 }
712
713 TEUCHOS_TEST_FOR_EXCEPTION(errorCount != 0 && badRows.size() == 0 && badCols.size() == 0,
714 std::logic_error,
715 "Tpetra::MultiVector variable stride pack "
716 "kernel reports errorCount="
717 << errorCount << ", but we failed "
718 "to find any bad rows or columns. This should never happen. "
719 "Please report this to the Tpetra developers.");
720
721 throw std::runtime_error(os.str());
722 } // hasErr
723 }
724};
725
726template <typename DstView,
727 typename SrcView,
728 typename IdxView,
729 typename ColView>
730void pack_array_multi_column_variable_stride(const DstView& dst,
731 const SrcView& src,
732 const IdxView& idx,
733 const ColView& col,
734 const size_t numCols,
735 const bool debug,
736 const typename DstView::execution_space& space) {
737 static_assert(Kokkos::is_view<DstView>::value,
738 "DstView must be a Kokkos::View.");
739 static_assert(Kokkos::is_view<SrcView>::value,
740 "SrcView must be a Kokkos::View.");
741 static_assert(Kokkos::is_view<IdxView>::value,
742 "IdxView must be a Kokkos::View.");
743 static_assert(Kokkos::is_view<ColView>::value,
744 "ColView must be a Kokkos::View.");
745 static_assert(static_cast<int>(DstView::rank) == 1,
746 "DstView must be a rank-1 Kokkos::View.");
747 static_assert(static_cast<int>(SrcView::rank) == 2,
748 "SrcView must be a rank-2 Kokkos::View.");
749 static_assert(static_cast<int>(IdxView::rank) == 1,
750 "IdxView must be a rank-1 Kokkos::View.");
751 static_assert(static_cast<int>(ColView::rank) == 1,
752 "ColView must be a rank-1 Kokkos::View.");
753
754 using execution_space = typename DstView::execution_space;
755
756 static_assert(Kokkos::SpaceAccessibility<execution_space,
757 typename DstView::memory_space>::accessible,
758 "DstView not accessible from execution space");
759 static_assert(Kokkos::SpaceAccessibility<execution_space,
760 typename SrcView::memory_space>::accessible,
761 "SrcView not accessible from execution space");
762 static_assert(Kokkos::SpaceAccessibility<execution_space,
763 typename IdxView::memory_space>::accessible,
764 "IdxView not accessible from execution space");
765
766 if (debug) {
767 typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
768 SrcView, IdxView, ColView>
769 impl_type;
770 impl_type::pack(dst, src, idx, col, numCols, space);
771 } else {
772 typedef PackArrayMultiColumnVariableStride<DstView,
773 SrcView, IdxView, ColView>
774 impl_type;
775 impl_type::pack(dst, src, idx, col, numCols, space);
776 }
777}
778
779template <typename DstView,
780 typename SrcView,
781 typename IdxView,
782 typename ColView>
783void pack_array_multi_column_variable_stride(const DstView& dst,
784 const SrcView& src,
785 const IdxView& idx,
786 const ColView& col,
787 const size_t numCols,
788 const bool debug = true) {
789 pack_array_multi_column_variable_stride(dst, src, idx, col, numCols, debug,
790 typename DstView::execution_space());
791}
792
793// Tag types to indicate whether to use atomic updates in the
794// various CombineMode "Op"s.
795struct atomic_tag {};
796struct nonatomic_tag {};
797
798struct AddOp {
799 template <class SC>
800 KOKKOS_INLINE_FUNCTION void operator()(atomic_tag, SC& dest, const SC& src) const {
801 Kokkos::atomic_add(&dest, src);
802 }
803
804 template <class SC>
805 KOKKOS_INLINE_FUNCTION void operator()(nonatomic_tag, SC& dest, const SC& src) const {
806 dest += src;
807 }
808};
809
810struct InsertOp {
811 // There's no point to using Kokkos::atomic_assign for the REPLACE
812 // or INSERT CombineModes, since this is not a well-defined
813 // reduction for MultiVector anyway. See GitHub Issue #4417
814 // (which this fixes).
815 template <class SC>
816 KOKKOS_INLINE_FUNCTION void operator()(atomic_tag, SC& dest, const SC& src) const {
817 dest = src;
818 }
819
820 template <class SC>
821 KOKKOS_INLINE_FUNCTION void operator()(nonatomic_tag, SC& dest, const SC& src) const {
822 dest = src;
823 }
824};
825
826struct AbsMaxOp {
827 template <class Scalar>
828 struct AbsMaxHelper {
829 Scalar value;
830
831 KOKKOS_FUNCTION AbsMaxHelper& operator+=(AbsMaxHelper const& rhs) {
832#if KOKKOS_VERSION >= 40799
833 auto lhs_abs_value = KokkosKernels::ArithTraits<Scalar>::abs(value);
834#else
835 auto lhs_abs_value = Kokkos::ArithTraits<Scalar>::abs(value);
836#endif
837#if KOKKOS_VERSION >= 40799
838 auto rhs_abs_value = KokkosKernels::ArithTraits<Scalar>::abs(rhs.value);
839#else
840 auto rhs_abs_value = Kokkos::ArithTraits<Scalar>::abs(rhs.value);
841#endif
842 value = lhs_abs_value > rhs_abs_value ? lhs_abs_value : rhs_abs_value;
843 return *this;
844 }
845
846 KOKKOS_FUNCTION AbsMaxHelper operator+(AbsMaxHelper const& rhs) const {
847 AbsMaxHelper ret = *this;
848 ret += rhs;
849 return ret;
850 }
851 };
852
853 template <typename SC>
854 KOKKOS_INLINE_FUNCTION void operator()(atomic_tag, SC& dst, const SC& src) const {
855 Kokkos::atomic_add(reinterpret_cast<AbsMaxHelper<SC>*>(&dst), AbsMaxHelper<SC>{src});
856 }
857
858 template <typename SC>
859 KOKKOS_INLINE_FUNCTION void operator()(nonatomic_tag, SC& dst, const SC& src) const {
860#if KOKKOS_VERSION >= 40799
861 auto dst_abs_value = KokkosKernels::ArithTraits<SC>::abs(dst);
862#else
863 auto dst_abs_value = Kokkos::ArithTraits<SC>::abs(dst);
864#endif
865#if KOKKOS_VERSION >= 40799
866 auto src_abs_value = KokkosKernels::ArithTraits<SC>::abs(src);
867#else
868 auto src_abs_value = Kokkos::ArithTraits<SC>::abs(src);
869#endif
870 dst = dst_abs_value > src_abs_value ? dst_abs_value : src_abs_value;
871 }
872};
873
874template <typename ExecutionSpace,
875 typename DstView,
876 typename SrcView,
877 typename IdxView,
878 typename Op,
879 typename Enabled = void>
880class UnpackArrayMultiColumn {
881 private:
882 static_assert(Kokkos::is_view<DstView>::value,
883 "DstView must be a Kokkos::View.");
884 static_assert(Kokkos::is_view<SrcView>::value,
885 "SrcView must be a Kokkos::View.");
886 static_assert(Kokkos::is_view<IdxView>::value,
887 "IdxView must be a Kokkos::View.");
888 static_assert(static_cast<int>(DstView::rank) == 2,
889 "DstView must be a rank-2 Kokkos::View.");
890 static_assert(static_cast<int>(SrcView::rank) == 1,
891 "SrcView must be a rank-1 Kokkos::View.");
892 static_assert(static_cast<int>(IdxView::rank) == 1,
893 "IdxView must be a rank-1 Kokkos::View.");
894
895 public:
896 typedef typename ExecutionSpace::execution_space execution_space;
897 typedef typename execution_space::size_type size_type;
898
899 private:
900 DstView dst;
901 SrcView src;
902 IdxView idx;
903 Op op;
904 size_t numCols;
905
906 public:
907 UnpackArrayMultiColumn(const ExecutionSpace& /* execSpace */,
908 const DstView& dst_,
909 const SrcView& src_,
910 const IdxView& idx_,
911 const Op& op_,
912 const size_t numCols_)
913 : dst(dst_)
914 , src(src_)
915 , idx(idx_)
916 , op(op_)
917 , numCols(numCols_) {}
918
919 template <class TagType>
920 KOKKOS_INLINE_FUNCTION void
921 operator()(TagType tag, const size_type k) const {
922 static_assert(std::is_same<TagType, atomic_tag>::value ||
923 std::is_same<TagType, nonatomic_tag>::value,
924 "TagType must be atomic_tag or nonatomic_tag.");
925
926 const typename IdxView::value_type localRow = idx(k);
927 const size_t offset = k * numCols;
928 for (size_t j = 0; j < numCols; ++j) {
929 op(tag, dst(localRow, j), src(offset + j));
930 }
931 }
932
933 static void
934 unpack(const ExecutionSpace& execSpace,
935 const DstView& dst,
936 const SrcView& src,
937 const IdxView& idx,
938 const Op& op,
939 const size_t numCols,
940 const bool use_atomic_updates) {
941 if (use_atomic_updates) {
942 using range_type =
943 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
944 Kokkos::parallel_for("Tpetra::MultiVector unpack const stride atomic",
945 range_type(0, idx.size()),
946 UnpackArrayMultiColumn(execSpace, dst, src, idx, op, numCols));
947 } else {
948 using range_type =
949 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
950 Kokkos::parallel_for("Tpetra::MultiVector unpack const stride nonatomic",
951 range_type(0, idx.size()),
952 UnpackArrayMultiColumn(execSpace, dst, src, idx, op, numCols));
953 }
954 }
955};
956
957template <typename ExecutionSpace,
958 typename DstView,
959 typename SrcView,
960 typename IdxView,
961 typename Op,
962 typename SizeType = typename ExecutionSpace::execution_space::size_type,
963 typename Enabled = void>
964class UnpackArrayMultiColumnWithBoundsCheck {
965 private:
966 static_assert(Kokkos::is_view<DstView>::value,
967 "DstView must be a Kokkos::View.");
968 static_assert(Kokkos::is_view<SrcView>::value,
969 "SrcView must be a Kokkos::View.");
970 static_assert(Kokkos::is_view<IdxView>::value,
971 "IdxView must be a Kokkos::View.");
972 static_assert(static_cast<int>(DstView::rank) == 2,
973 "DstView must be a rank-2 Kokkos::View.");
974 static_assert(static_cast<int>(SrcView::rank) == 1,
975 "SrcView must be a rank-1 Kokkos::View.");
976 static_assert(static_cast<int>(IdxView::rank) == 1,
977 "IdxView must be a rank-1 Kokkos::View.");
978 static_assert(std::is_integral<SizeType>::value,
979 "SizeType must be a built-in integer type.");
980
981 public:
982 using execution_space = typename ExecutionSpace::execution_space;
983 using size_type = SizeType;
984 using value_type = size_t;
985
986 private:
987 DstView dst;
988 SrcView src;
989 IdxView idx;
990 Op op;
991 size_type numCols;
992
993 public:
994 UnpackArrayMultiColumnWithBoundsCheck(const ExecutionSpace& /* execSpace */,
995 const DstView& dst_,
996 const SrcView& src_,
997 const IdxView& idx_,
998 const Op& op_,
999 const size_type numCols_)
1000 : dst(dst_)
1001 , src(src_)
1002 , idx(idx_)
1003 , op(op_)
1004 , numCols(numCols_) {}
1005
1006 template <class TagType>
1007 KOKKOS_INLINE_FUNCTION void
1008 operator()(TagType tag,
1009 const size_type k,
1010 size_t& lclErrCount) const {
1011 static_assert(std::is_same<TagType, atomic_tag>::value ||
1012 std::is_same<TagType, nonatomic_tag>::value,
1013 "TagType must be atomic_tag or nonatomic_tag.");
1014 using index_type = typename IdxView::non_const_value_type;
1015
1016 const index_type lclRow = idx(k);
1017 if (lclRow < static_cast<index_type>(0) ||
1018 lclRow >= static_cast<index_type>(dst.extent(0))) {
1019 ++lclErrCount;
1020 } else {
1021 const size_type offset = k * numCols;
1022 for (size_type j = 0; j < numCols; ++j) {
1023 op(tag, dst(lclRow, j), src(offset + j));
1024 }
1025 }
1026 }
1027
1028 template <class TagType>
1029 KOKKOS_INLINE_FUNCTION void
1030 init(TagType, size_t& initialErrorCount) const {
1031 initialErrorCount = 0;
1032 }
1033
1034 template <class TagType>
1035 KOKKOS_INLINE_FUNCTION void
1036 join(TagType,
1037 size_t& dstErrorCount,
1038 const size_t& srcErrorCount) const {
1039 dstErrorCount += srcErrorCount;
1040 }
1041
1042 static void
1043 unpack(const ExecutionSpace& execSpace,
1044 const DstView& dst,
1045 const SrcView& src,
1046 const IdxView& idx,
1047 const Op& op,
1048 const size_type numCols,
1049 const bool use_atomic_updates) {
1050 using index_type = typename IdxView::non_const_value_type;
1051
1052 size_t errorCount = 0;
1053 if (use_atomic_updates) {
1054 using range_type =
1055 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1056 Kokkos::parallel_reduce("Tpetra::MultiVector unpack multicol const stride atomic debug only",
1057 range_type(0, idx.size()),
1058 UnpackArrayMultiColumnWithBoundsCheck(execSpace, dst, src,
1059 idx, op, numCols),
1060 errorCount);
1061 } else {
1062 using range_type =
1063 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1064 Kokkos::parallel_reduce("Tpetra::MultiVector unpack multicol const stride nonatomic debug only",
1065 range_type(0, idx.size()),
1066 UnpackArrayMultiColumnWithBoundsCheck(execSpace, dst, src,
1067 idx, op, numCols),
1068 errorCount);
1069 }
1070
1071 if (errorCount != 0) {
1072 // Go back and find the out-of-bounds entries in the index
1073 // array. Performance doesn't matter since we are already in
1074 // an error state, so we can do this sequentially, on host.
1075 auto idx_h = Kokkos::create_mirror_view(idx);
1076
1077 // DEEP_COPY REVIEW - NOT TESTED
1078 Kokkos::deep_copy(idx_h, idx);
1079
1080 std::vector<index_type> badIndices;
1081 const size_type numInds = idx_h.extent(0);
1082 for (size_type k = 0; k < numInds; ++k) {
1083 if (idx_h(k) < static_cast<index_type>(0) ||
1084 idx_h(k) >= static_cast<index_type>(dst.extent(0))) {
1085 badIndices.push_back(idx_h(k));
1086 }
1087 }
1088
1089 if (errorCount != badIndices.size()) {
1090 std::ostringstream os;
1091 os << "MultiVector unpack kernel: errorCount = " << errorCount
1092 << " != badIndices.size() = " << badIndices.size()
1093 << ". This should never happen. "
1094 "Please report this to the Tpetra developers.";
1095 throw std::logic_error(os.str());
1096 }
1097
1098 std::ostringstream os;
1099 os << "MultiVector unpack kernel had " << badIndices.size()
1100 << " out-of bounds index/ices. Here they are: [";
1101 for (size_t k = 0; k < badIndices.size(); ++k) {
1102 os << badIndices[k];
1103 if (k + 1 < badIndices.size()) {
1104 os << ", ";
1105 }
1106 }
1107 os << "].";
1108 throw std::runtime_error(os.str());
1109 }
1110 }
1111};
1112
1113template <typename ExecutionSpace,
1114 typename DstView,
1115 typename SrcView,
1116 typename IdxView,
1117 typename Op>
1118void unpack_array_multi_column(const ExecutionSpace& execSpace,
1119 const DstView& dst,
1120 const SrcView& src,
1121 const IdxView& idx,
1122 const Op& op,
1123 const size_t numCols,
1124 const bool use_atomic_updates,
1125 const bool debug) {
1126 static_assert(Kokkos::is_view<DstView>::value,
1127 "DstView must be a Kokkos::View.");
1128 static_assert(Kokkos::is_view<SrcView>::value,
1129 "SrcView must be a Kokkos::View.");
1130 static_assert(Kokkos::is_view<IdxView>::value,
1131 "IdxView must be a Kokkos::View.");
1132 static_assert(static_cast<int>(DstView::rank) == 2,
1133 "DstView must be a rank-2 Kokkos::View.");
1134 static_assert(static_cast<int>(SrcView::rank) == 1,
1135 "SrcView must be a rank-1 Kokkos::View.");
1136 static_assert(static_cast<int>(IdxView::rank) == 1,
1137 "IdxView must be a rank-1 Kokkos::View.");
1138
1139 if (debug) {
1140 typedef UnpackArrayMultiColumnWithBoundsCheck<ExecutionSpace,
1141 DstView, SrcView, IdxView, Op>
1142 impl_type;
1143 impl_type::unpack(execSpace, dst, src, idx, op, numCols,
1144 use_atomic_updates);
1145 } else {
1146 typedef UnpackArrayMultiColumn<ExecutionSpace,
1147 DstView, SrcView, IdxView, Op>
1148 impl_type;
1149 impl_type::unpack(execSpace, dst, src, idx, op, numCols,
1150 use_atomic_updates);
1151 }
1152}
1153
1154template <typename ExecutionSpace,
1155 typename DstView,
1156 typename SrcView,
1157 typename IdxView,
1158 typename ColView,
1159 typename Op,
1160 typename Enabled = void>
1161class UnpackArrayMultiColumnVariableStride {
1162 private:
1163 static_assert(Kokkos::is_view<DstView>::value,
1164 "DstView must be a Kokkos::View.");
1165 static_assert(Kokkos::is_view<SrcView>::value,
1166 "SrcView must be a Kokkos::View.");
1167 static_assert(Kokkos::is_view<IdxView>::value,
1168 "IdxView must be a Kokkos::View.");
1169 static_assert(Kokkos::is_view<ColView>::value,
1170 "ColView must be a Kokkos::View.");
1171 static_assert(static_cast<int>(DstView::rank) == 2,
1172 "DstView must be a rank-2 Kokkos::View.");
1173 static_assert(static_cast<int>(SrcView::rank) == 1,
1174 "SrcView must be a rank-1 Kokkos::View.");
1175 static_assert(static_cast<int>(IdxView::rank) == 1,
1176 "IdxView must be a rank-1 Kokkos::View.");
1177 static_assert(static_cast<int>(ColView::rank) == 1,
1178 "ColView must be a rank-1 Kokkos::View.");
1179
1180 public:
1181 using execution_space = typename ExecutionSpace::execution_space;
1182 using size_type = typename execution_space::size_type;
1183
1184 private:
1185 DstView dst;
1186 SrcView src;
1187 IdxView idx;
1188 ColView col;
1189 Op op;
1190 size_t numCols;
1191
1192 public:
1193 UnpackArrayMultiColumnVariableStride(const ExecutionSpace& /* execSpace */,
1194 const DstView& dst_,
1195 const SrcView& src_,
1196 const IdxView& idx_,
1197 const ColView& col_,
1198 const Op& op_,
1199 const size_t numCols_)
1200 : dst(dst_)
1201 , src(src_)
1202 , idx(idx_)
1203 , col(col_)
1204 , op(op_)
1205 , numCols(numCols_) {}
1206
1207 template <class TagType>
1208 KOKKOS_INLINE_FUNCTION void
1209 operator()(TagType tag, const size_type k) const {
1210 static_assert(std::is_same<TagType, atomic_tag>::value ||
1211 std::is_same<TagType, nonatomic_tag>::value,
1212 "TagType must be atomic_tag or nonatomic_tag.");
1213
1214 const typename IdxView::value_type localRow = idx(k);
1215 const size_t offset = k * numCols;
1216 for (size_t j = 0; j < numCols; ++j) {
1217 op(tag, dst(localRow, col(j)), src(offset + j));
1218 }
1219 }
1220
1221 static void
1222 unpack(const ExecutionSpace& execSpace,
1223 const DstView& dst,
1224 const SrcView& src,
1225 const IdxView& idx,
1226 const ColView& col,
1227 const Op& op,
1228 const size_t numCols,
1229 const bool use_atomic_updates) {
1230 if (use_atomic_updates) {
1231 using range_type =
1232 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1233 Kokkos::parallel_for("Tpetra::MultiVector unpack var stride atomic",
1234 range_type(0, idx.size()),
1235 UnpackArrayMultiColumnVariableStride(execSpace, dst, src,
1236 idx, col, op, numCols));
1237 } else {
1238 using range_type =
1239 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1240 Kokkos::parallel_for("Tpetra::MultiVector unpack var stride nonatomic",
1241 range_type(0, idx.size()),
1242 UnpackArrayMultiColumnVariableStride(execSpace, dst, src,
1243 idx, col, op, numCols));
1244 }
1245 }
1246};
1247
1248template <typename ExecutionSpace,
1249 typename DstView,
1250 typename SrcView,
1251 typename IdxView,
1252 typename ColView,
1253 typename Op,
1254 typename SizeType = typename ExecutionSpace::execution_space::size_type,
1255 typename Enabled = void>
1256class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1257 private:
1258 static_assert(Kokkos::is_view<DstView>::value,
1259 "DstView must be a Kokkos::View.");
1260 static_assert(Kokkos::is_view<SrcView>::value,
1261 "SrcView must be a Kokkos::View.");
1262 static_assert(Kokkos::is_view<IdxView>::value,
1263 "IdxView must be a Kokkos::View.");
1264 static_assert(Kokkos::is_view<ColView>::value,
1265 "ColView must be a Kokkos::View.");
1266 static_assert(static_cast<int>(DstView::rank) == 2,
1267 "DstView must be a rank-2 Kokkos::View.");
1268 static_assert(static_cast<int>(SrcView::rank) == 1,
1269 "SrcView must be a rank-1 Kokkos::View.");
1270 static_assert(static_cast<int>(IdxView::rank) == 1,
1271 "IdxView must be a rank-1 Kokkos::View.");
1272 static_assert(static_cast<int>(ColView::rank) == 1,
1273 "ColView must be a rank-1 Kokkos::View.");
1274 static_assert(std::is_integral<SizeType>::value,
1275 "SizeType must be a built-in integer type.");
1276
1277 public:
1278 using execution_space = typename ExecutionSpace::execution_space;
1279 using size_type = SizeType;
1280 using value_type = size_t;
1281
1282 private:
1283 DstView dst;
1284 SrcView src;
1285 IdxView idx;
1286 ColView col;
1287 Op op;
1288 size_type numCols;
1289
1290 public:
1291 UnpackArrayMultiColumnVariableStrideWithBoundsCheck(const ExecutionSpace& /* execSpace */,
1292 const DstView& dst_,
1293 const SrcView& src_,
1294 const IdxView& idx_,
1295 const ColView& col_,
1296 const Op& op_,
1297 const size_t numCols_)
1298 : dst(dst_)
1299 , src(src_)
1300 , idx(idx_)
1301 , col(col_)
1302 , op(op_)
1303 , numCols(numCols_) {}
1304
1305 template <class TagType>
1306 KOKKOS_INLINE_FUNCTION void
1307 operator()(TagType tag,
1308 const size_type k,
1309 value_type& lclErrorCount) const {
1310 static_assert(std::is_same<TagType, atomic_tag>::value ||
1311 std::is_same<TagType, nonatomic_tag>::value,
1312 "TagType must be atomic_tag or nonatomic_tag.");
1313 using row_index_type = typename IdxView::non_const_value_type;
1314 using col_index_type = typename ColView::non_const_value_type;
1315
1316 const row_index_type lclRow = idx(k);
1317 if (lclRow < static_cast<row_index_type>(0) ||
1318 lclRow >= static_cast<row_index_type>(dst.extent(0))) {
1319 ++lclErrorCount;
1320 } else {
1321 const size_type offset = k * numCols;
1322 for (size_type j = 0; j < numCols; ++j) {
1323 const col_index_type lclCol = col(j);
1324 if (Impl::outOfBounds<col_index_type>(lclCol, dst.extent(1))) {
1325 ++lclErrorCount;
1326 } else { // all indices are valid; apply the op
1327 op(tag, dst(lclRow, col(j)), src(offset + j));
1328 }
1329 }
1330 }
1331 }
1332
1333 KOKKOS_INLINE_FUNCTION void
1334 init(value_type& initialErrorCount) const {
1335 initialErrorCount = 0;
1336 }
1337
1338 KOKKOS_INLINE_FUNCTION void
1339 join(value_type& dstErrorCount,
1340 const value_type& srcErrorCount) const {
1341 dstErrorCount += srcErrorCount;
1342 }
1343
1344 static void
1345 unpack(const ExecutionSpace& execSpace,
1346 const DstView& dst,
1347 const SrcView& src,
1348 const IdxView& idx,
1349 const ColView& col,
1350 const Op& op,
1351 const size_type numCols,
1352 const bool use_atomic_updates) {
1353 using row_index_type = typename IdxView::non_const_value_type;
1354 using col_index_type = typename ColView::non_const_value_type;
1355
1356 size_t errorCount = 0;
1357 if (use_atomic_updates) {
1358 using range_type =
1359 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1360 Kokkos::parallel_reduce("Tpetra::MultiVector unpack var stride atomic debug only",
1361 range_type(0, idx.size()),
1362 UnpackArrayMultiColumnVariableStrideWithBoundsCheck(execSpace, dst, src, idx, col, op, numCols),
1363 errorCount);
1364 } else {
1365 using range_type =
1366 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1367 Kokkos::parallel_reduce("Tpetra::MultiVector unpack var stride nonatomic debug only",
1368 range_type(0, idx.size()),
1369 UnpackArrayMultiColumnVariableStrideWithBoundsCheck(execSpace, dst, src, idx, col, op, numCols),
1370 errorCount);
1371 }
1372
1373 if (errorCount != 0) {
1374 constexpr size_t maxNumBadIndicesToPrint = 100;
1375
1376 std::ostringstream os; // for error reporting
1377 os << "Tpetra::MultiVector multicolumn variable stride unpack kernel "
1378 "found "
1379 << errorCount
1380 << " error" << (errorCount != size_t(1) ? "s" : "") << ". ";
1381
1382 // Go back and find any out-of-bounds entries in the array of
1383 // row indices. Performance doesn't matter since we are
1384 // already in an error state, so we can do this sequentially,
1385 // on host.
1386 auto idx_h = Kokkos::create_mirror_view(idx);
1387
1388 // DEEP_COPY REVIEW - NOT TESTED
1389 Kokkos::deep_copy(idx_h, idx);
1390
1391 std::vector<row_index_type> badRows;
1392 const size_type numRowInds = idx_h.extent(0);
1393 for (size_type k = 0; k < numRowInds; ++k) {
1394 if (idx_h(k) < static_cast<row_index_type>(0) ||
1395 idx_h(k) >= static_cast<row_index_type>(dst.extent(0))) {
1396 badRows.push_back(idx_h(k));
1397 }
1398 }
1399
1400 if (badRows.size() != 0) {
1401 os << badRows.size() << " out-of-bounds row ind"
1402 << (badRows.size() != size_t(1) ? "ices" : "ex");
1403 if (badRows.size() <= maxNumBadIndicesToPrint) {
1404 os << ": [";
1405 for (size_t k = 0; k < badRows.size(); ++k) {
1406 os << badRows[k];
1407 if (k + 1 < badRows.size()) {
1408 os << ", ";
1409 }
1410 }
1411 os << "]. ";
1412 } else {
1413 os << ". ";
1414 }
1415 } else {
1416 os << "No out-of-bounds row indices. ";
1417 }
1418
1419 // Go back and find any out-of-bounds entries in the array
1420 // of column indices.
1421 auto col_h = Kokkos::create_mirror_view(col);
1422
1423 // DEEP_COPY REVIEW - NOT TESTED
1424 Kokkos::deep_copy(col_h, col);
1425
1426 std::vector<col_index_type> badCols;
1427 const size_type numColInds = col_h.extent(0);
1428 for (size_type k = 0; k < numColInds; ++k) {
1429 if (Impl::outOfBounds<col_index_type>(col_h(k), dst.extent(1))) {
1430 badCols.push_back(col_h(k));
1431 }
1432 }
1433
1434 if (badCols.size() != 0) {
1435 os << badCols.size() << " out-of-bounds column ind"
1436 << (badCols.size() != size_t(1) ? "ices" : "ex");
1437 if (badCols.size() <= maxNumBadIndicesToPrint) {
1438 for (size_t k = 0; k < badCols.size(); ++k) {
1439 os << ": [";
1440 os << badCols[k];
1441 if (k + 1 < badCols.size()) {
1442 os << ", ";
1443 }
1444 }
1445 os << "]. ";
1446 } else {
1447 os << ". ";
1448 }
1449 } else {
1450 os << "No out-of-bounds column indices. ";
1451 }
1452
1453 TEUCHOS_TEST_FOR_EXCEPTION(errorCount != 0 && badRows.size() == 0 && badCols.size() == 0,
1454 std::logic_error,
1455 "Tpetra::MultiVector variable stride unpack "
1456 "kernel reports errorCount="
1457 << errorCount << ", but we failed "
1458 "to find any bad rows or columns. This should never happen. "
1459 "Please report this to the Tpetra developers.");
1460
1461 throw std::runtime_error(os.str());
1462 } // hasErr
1463 }
1464};
1465
1466template <typename ExecutionSpace,
1467 typename DstView,
1468 typename SrcView,
1469 typename IdxView,
1470 typename ColView,
1471 typename Op>
1472void unpack_array_multi_column_variable_stride(const ExecutionSpace& execSpace,
1473 const DstView& dst,
1474 const SrcView& src,
1475 const IdxView& idx,
1476 const ColView& col,
1477 const Op& op,
1478 const size_t numCols,
1479 const bool use_atomic_updates,
1480 const bool debug) {
1481 static_assert(Kokkos::is_view<DstView>::value,
1482 "DstView must be a Kokkos::View.");
1483 static_assert(Kokkos::is_view<SrcView>::value,
1484 "SrcView must be a Kokkos::View.");
1485 static_assert(Kokkos::is_view<IdxView>::value,
1486 "IdxView must be a Kokkos::View.");
1487 static_assert(Kokkos::is_view<ColView>::value,
1488 "ColView must be a Kokkos::View.");
1489 static_assert(static_cast<int>(DstView::rank) == 2,
1490 "DstView must be a rank-2 Kokkos::View.");
1491 static_assert(static_cast<int>(SrcView::rank) == 1,
1492 "SrcView must be a rank-1 Kokkos::View.");
1493 static_assert(static_cast<int>(IdxView::rank) == 1,
1494 "IdxView must be a rank-1 Kokkos::View.");
1495 static_assert(static_cast<int>(ColView::rank) == 1,
1496 "ColView must be a rank-1 Kokkos::View.");
1497
1498 if (debug) {
1499 using impl_type =
1500 UnpackArrayMultiColumnVariableStrideWithBoundsCheck<ExecutionSpace,
1501 DstView, SrcView, IdxView, ColView, Op>;
1502 impl_type::unpack(execSpace, dst, src, idx, col, op, numCols,
1503 use_atomic_updates);
1504 } else {
1505 using impl_type = UnpackArrayMultiColumnVariableStride<ExecutionSpace,
1506 DstView, SrcView, IdxView, ColView, Op>;
1507 impl_type::unpack(execSpace, dst, src, idx, col, op, numCols,
1508 use_atomic_updates);
1509 }
1510}
1511
1512template <typename DstView, typename SrcView,
1513 typename DstIdxView, typename SrcIdxView, typename Op,
1514 typename Enabled = void>
1515struct PermuteArrayMultiColumn {
1516 using size_type = typename DstView::size_type;
1517
1518 DstView dst;
1519 SrcView src;
1520 DstIdxView dst_idx;
1521 SrcIdxView src_idx;
1522 size_t numCols;
1523 Op op;
1524
1525 PermuteArrayMultiColumn(const DstView& dst_,
1526 const SrcView& src_,
1527 const DstIdxView& dst_idx_,
1528 const SrcIdxView& src_idx_,
1529 const size_t numCols_,
1530 const Op& op_)
1531 : dst(dst_)
1532 , src(src_)
1533 , dst_idx(dst_idx_)
1534 , src_idx(src_idx_)
1535 , numCols(numCols_)
1536 , op(op_) {}
1537
1538 KOKKOS_INLINE_FUNCTION void
1539 operator()(const size_type k) const {
1540 const typename DstIdxView::value_type toRow = dst_idx(k);
1541 const typename SrcIdxView::value_type fromRow = src_idx(k);
1542 nonatomic_tag tag; // permute does not need atomics
1543 for (size_t j = 0; j < numCols; ++j) {
1544 op(tag, dst(toRow, j), src(fromRow, j));
1545 }
1546 }
1547
1548 template <typename ExecutionSpace>
1549 static void
1550 permute(const ExecutionSpace& space,
1551 const DstView& dst,
1552 const SrcView& src,
1553 const DstIdxView& dst_idx,
1554 const SrcIdxView& src_idx,
1555 const size_t numCols,
1556 const Op& op) {
1557 using range_type =
1558 Kokkos::RangePolicy<ExecutionSpace, size_type>;
1559 // permute does not need atomics for Op
1560 const size_type n = std::min(dst_idx.size(), src_idx.size());
1561 Kokkos::parallel_for("Tpetra::MultiVector permute multicol const stride",
1562 range_type(space, 0, n),
1563 PermuteArrayMultiColumn(dst, src, dst_idx, src_idx, numCols, op));
1564 }
1565};
1566
1567// To do: Add enable_if<> restrictions on DstView::rank == 1,
1568// SrcView::rank == 2
1569template <typename ExecutionSpace, typename DstView, typename SrcView,
1570 typename DstIdxView, typename SrcIdxView, typename Op>
1571void permute_array_multi_column(const ExecutionSpace& space, const DstView& dst,
1572 const SrcView& src, const DstIdxView& dst_idx,
1573 const SrcIdxView& src_idx, size_t numCols,
1574 const Op& op) {
1575 PermuteArrayMultiColumn<DstView, SrcView, DstIdxView, SrcIdxView,
1576 Op>::permute(space, dst, src, dst_idx, src_idx,
1577 numCols, op);
1578}
1579
1580// To do: Add enable_if<> restrictions on DstView::rank == 1,
1581// SrcView::rank == 2
1582template <typename DstView, typename SrcView,
1583 typename DstIdxView, typename SrcIdxView, typename Op>
1584void permute_array_multi_column(const DstView& dst,
1585 const SrcView& src,
1586 const DstIdxView& dst_idx,
1587 const SrcIdxView& src_idx,
1588 size_t numCols,
1589 const Op& op) {
1590 using execution_space = typename DstView::execution_space;
1591 PermuteArrayMultiColumn<DstView, SrcView, DstIdxView, SrcIdxView, Op>::permute(
1592 execution_space(), dst, src, dst_idx, src_idx, numCols, op);
1593}
1594
1595template <typename DstView, typename SrcView,
1596 typename DstIdxView, typename SrcIdxView,
1597 typename DstColView, typename SrcColView, typename Op,
1598 typename Enabled = void>
1599struct PermuteArrayMultiColumnVariableStride {
1600 using size_type = typename DstView::size_type;
1601
1602 DstView dst;
1603 SrcView src;
1604 DstIdxView dst_idx;
1605 SrcIdxView src_idx;
1606 DstColView dst_col;
1607 SrcColView src_col;
1608 size_t numCols;
1609 Op op;
1610
1611 PermuteArrayMultiColumnVariableStride(const DstView& dst_,
1612 const SrcView& src_,
1613 const DstIdxView& dst_idx_,
1614 const SrcIdxView& src_idx_,
1615 const DstColView& dst_col_,
1616 const SrcColView& src_col_,
1617 const size_t numCols_,
1618 const Op& op_)
1619 : dst(dst_)
1620 , src(src_)
1621 , dst_idx(dst_idx_)
1622 , src_idx(src_idx_)
1623 , dst_col(dst_col_)
1624 , src_col(src_col_)
1625 , numCols(numCols_)
1626 , op(op_) {}
1627
1628 KOKKOS_INLINE_FUNCTION void
1629 operator()(const size_type k) const {
1630 const typename DstIdxView::value_type toRow = dst_idx(k);
1631 const typename SrcIdxView::value_type fromRow = src_idx(k);
1632 const nonatomic_tag tag; // permute does not need atomics
1633 for (size_t j = 0; j < numCols; ++j) {
1634 op(tag, dst(toRow, dst_col(j)), src(fromRow, src_col(j)));
1635 }
1636 }
1637
1638 template <typename ExecutionSpace>
1639 static void
1640 permute(const ExecutionSpace& space,
1641 const DstView& dst,
1642 const SrcView& src,
1643 const DstIdxView& dst_idx,
1644 const SrcIdxView& src_idx,
1645 const DstColView& dst_col,
1646 const SrcColView& src_col,
1647 const size_t numCols,
1648 const Op& op) {
1649 static_assert(Kokkos::SpaceAccessibility<
1650 ExecutionSpace, typename DstView::memory_space>::accessible,
1651 "ExecutionSpace must be able to access DstView");
1652
1653 using range_type = Kokkos::RangePolicy<ExecutionSpace, size_type>;
1654 const size_type n = std::min(dst_idx.size(), src_idx.size());
1655 Kokkos::parallel_for("Tpetra::MultiVector permute multicol var stride",
1656 range_type(space, 0, n),
1657 PermuteArrayMultiColumnVariableStride(dst, src, dst_idx, src_idx,
1658 dst_col, src_col, numCols, op));
1659 }
1660};
1661
1662// To do: Add enable_if<> restrictions on DstView::rank == 1,
1663// SrcView::rank == 2
1664template <typename ExecutionSpace, typename DstView, typename SrcView,
1665 typename DstIdxView, typename SrcIdxView, typename DstColView,
1666 typename SrcColView, typename Op>
1667void permute_array_multi_column_variable_stride(
1668 const ExecutionSpace& space, const DstView& dst, const SrcView& src,
1669 const DstIdxView& dst_idx, const SrcIdxView& src_idx,
1670 const DstColView& dst_col, const SrcColView& src_col, size_t numCols,
1671 const Op& op) {
1672 PermuteArrayMultiColumnVariableStride<DstView, SrcView, DstIdxView,
1673 SrcIdxView, DstColView, SrcColView,
1674 Op>::permute(space, dst, src, dst_idx,
1675 src_idx, dst_col, src_col,
1676 numCols, op);
1677}
1678
1679// To do: Add enable_if<> restrictions on DstView::rank == 1,
1680// SrcView::rank == 2
1681template <typename DstView, typename SrcView,
1682 typename DstIdxView, typename SrcIdxView,
1683 typename DstColView, typename SrcColView, typename Op>
1684void permute_array_multi_column_variable_stride(const DstView& dst,
1685 const SrcView& src,
1686 const DstIdxView& dst_idx,
1687 const SrcIdxView& src_idx,
1688 const DstColView& dst_col,
1689 const SrcColView& src_col,
1690 size_t numCols,
1691 const Op& op) {
1692 using execution_space = typename DstView::execution_space;
1693 PermuteArrayMultiColumnVariableStride<DstView, SrcView,
1694 DstIdxView, SrcIdxView, DstColView, SrcColView, Op>::permute(execution_space(), dst, src, dst_idx, src_idx, dst_col, src_col, numCols, op);
1695}
1696
1697} // namespace Details
1698} // namespace KokkosRefactor
1699} // namespace Tpetra
1700
1701#endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
Struct that holds views of the contents of a CrsMatrix.
Implementation details of Tpetra.
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...