Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_FixedHashTable_def.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
11#define TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
12
13#include "Tpetra_Details_FixedHashTable_decl.hpp"
15#ifdef TPETRA_USE_MURMUR_HASH
16#include "Kokkos_Functional.hpp" // hash function used by Kokkos::UnorderedMap
17#endif // TPETRA_USE_MURMUR_HASH
18#include "KokkosKernels_ArithTraits.hpp"
19#include "Teuchos_TypeNameTraits.hpp"
21#include <type_traits>
22
23namespace Tpetra {
24namespace Details {
25//
26// This namespace stores utility functions and Kokkos
27// functors for use in FixedHashTable construction.
28//
29namespace FHT {
30
31// Is it worth actually using building the FixedHashTable using
32// parallel threads, instead of just counting in a sequential loop?
33//
34// The parallel version of FixedHashTable construction isn't just a
35// parallelization of the sequential loops. It incurs additional
36// overheads. For example, the CountBuckets kernel uses atomic update
37// instructions to count the number of "buckets" per offsets array
38// (ptr) entry. Atomic updates have overhead, even if only one thread
39// issues them. The Kokkos kernels are still correct in that case,
40// but I would rather not incur overhead then. It might make sense to
41// set the minimum number of threads to something greater than 1, but
42// we would need experiments to find out.
43template <class ExecSpace>
44bool worthBuildingFixedHashTableInParallel() {
45 return ExecSpace().concurrency() > 1;
46}
47
48//
49// Functors for FixedHashTable initialization
50//
51// NOTE (mfh 23 May 2015): Once we can use lambdas with CUDA, we
52// should consider replacing all of these functors with in-line
53// lambdas. The only issue is that we would need to bake the
54// execution space into the policy, since the default execution space
55// might differ from the one Tpetra wants to use.
56
66template <class CountsViewType,
67 class KeysViewType,
68 class SizeType = typename KeysViewType::size_type>
70 public:
73 typedef typename CountsViewType::execution_space execution_space;
74 typedef typename CountsViewType::memory_space memory_space;
75 typedef SizeType size_type;
76 typedef typename keys_view_type::non_const_value_type key_type;
77 // mfh 21 May 2015: Having a device_type typedef in the functor
78 // along with an execution_space typedef causes compilation issues.
79 // This is because one of Kokkos' partial specializations picks up
80 // on the device_type typedef, and another picks up on the
81 // execution_space typedef. The former is a legacy of a previous
82 // design iteration of Kokkos, which did not separate memory and
83 // execution spaces.
85
92 const keys_view_type& keys,
93 const size_type size)
94 : counts_(counts)
95 , keys_(keys)
96 , size_(size) {}
97
103 operator()(const size_type& i) const {
105
106 const hash_value_type hashVal = hash_type::hashFunc(keys_[i], size_);
107 Kokkos::atomic_inc(&counts_[hashVal]);
108 }
109
110 using value_type = Kokkos::pair<int, key_type>;
111
116 operator()(const size_type& i, value_type& dst) const {
118
119 const key_type keyVal = keys_[i];
121 if (hashVal < hash_value_type(0) ||
122 hashVal >= hash_value_type(counts_.extent(0))) {
123 dst.first = 1;
124 dst.second = keyVal;
125 } else {
126 Kokkos::atomic_inc(&counts_[hashVal]);
127 }
128 }
129
131 KOKKOS_INLINE_FUNCTION void init(value_type& dst) const {
132 dst.first = 0;
133 dst.second = key_type(0);
134 }
135
137 join(value_type& dst,
138 const value_type& src) const {
139 const bool good = dst.first == 0 || src.first == 0;
140 dst.first = good ? 0 : dst.first;
141 // leave dst.second as it is, to get the "first" key
142 }
143
144 private:
146 counts_view_type counts_;
148 keys_view_type keys_;
150 size_type size_;
152
163template <class KeyType>
167 : minKey_(::KokkosKernels::ArithTraits<KeyType>::max())
168 ,
169 // min() for a floating-point type returns the minimum _positive_
170 // normalized value. This is different than for integer types.
171 // lowest() is new in C++11 and returns the least value, always
172 // negative for signed finite types.
173 //
174 // mfh 23 May 2015: I have heard reports that
175 // std::numeric_limits<int>::lowest() does not exist with the
176 // Intel compiler. I'm not sure if the users in question actually
177 // enabled C++11. However, it's easy enough to work around this
178 // issue. The standard floating-point types are signed and have a
179 // sign bit, so lowest() is just -max(). For integer types, we
180 // can use min() instead.
181 maxKey_(::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max())
182 , success_(true) {
183 static_assert(std::is_arithmetic<KeyType>::value,
184 "FillPairsResult: "
185 "KeyType must be some kind of number type.");
186 }
187
189 FillPairsResult(const KeyType& initMinKey,
190 const KeyType& initMaxKey)
193 , success_(true) {
194 static_assert(std::is_arithmetic<KeyType>::value,
195 "FillPairsResult: "
196 "KeyType must be some kind of number type.");
197 }
198
201 bool success_;
202};
203
233template <class PairsViewType,
234 class KeysViewType,
235 class CountsViewType,
236 class SizeType = typename KeysViewType::size_type>
238 public:
239 typedef typename CountsViewType::non_const_type counts_view_type;
240 typedef typename counts_view_type::const_type offsets_view_type;
241
243 typedef typename KeysViewType::const_type keys_view_type;
244 typedef typename offsets_view_type::execution_space execution_space;
245 typedef typename offsets_view_type::memory_space memory_space;
246 typedef SizeType size_type;
247
248 typedef typename keys_view_type::non_const_value_type key_type;
249 typedef typename pairs_view_type::non_const_value_type pair_type;
250
252
253 // mfh 23 May 2015: Having a device_type typedef in the functor
254 // along with an execution_space typedef causes compilation issues.
255 // This is because one of Kokkos' partial specializations picks up
256 // on the device_type typedef, and another picks up on the
257 // execution_space typedef. The former is a legacy of a previous
258 // design iteration of Kokkos, which did not separate memory and
259 // execution spaces.
261
273 const counts_view_type& counts,
274 const offsets_view_type& ptr,
275 const keys_view_type& keys,
276 const typename pair_type::second_type startingValue)
277 : pairs_(pairs)
278 , counts_(counts)
279 , ptr_(ptr)
280 , keys_(keys)
281 , size_(counts.extent(0))
282 , startingValue_(startingValue)
283 , initMinKey_(::KokkosKernels::ArithTraits<key_type>::max())
284 , initMaxKey_(::KokkosKernels::ArithTraits<key_type>::is_integer ? ::KokkosKernels::ArithTraits<key_type>::min() : -::KokkosKernels::ArithTraits<key_type>::max()) {}
285
305 const counts_view_type& counts,
306 const offsets_view_type& ptr,
307 const keys_view_type& keys,
308 const typename pair_type::second_type startingValue,
309 const key_type initMinKey,
310 const key_type initMaxKey)
311 : pairs_(pairs)
312 , counts_(counts)
313 , ptr_(ptr)
314 , keys_(keys)
315 , size_(counts.extent(0))
316 , startingValue_(startingValue)
317 , initMinKey_(initMinKey)
318 , initMaxKey_(initMaxKey) {
319 }
320
323 dst.minKey_ = initMinKey_;
324 dst.maxKey_ = initMaxKey_;
325 dst.success_ = true;
326 }
327
329 join(value_type& dst,
330 const value_type& src) const {
331 if (src.maxKey_ > dst.maxKey_) {
332 dst.maxKey_ = src.maxKey_;
333 }
334 if (src.minKey_ < dst.minKey_) {
335 dst.minKey_ = src.minKey_;
336 }
337 dst.success_ = dst.success_ && src.success_;
338 }
339
344 KOKKOS_INLINE_FUNCTION void
345 operator()(const size_type& i, value_type& dst) const {
347 typedef typename offsets_view_type::non_const_value_type offset_type;
348 typedef typename pair_type::second_type val_type;
349 typedef typename std::remove_reference<decltype(counts_[0])>::type atomic_incr_type;
350
351 const key_type key = keys_[i];
352 if (key > dst.maxKey_) {
353 dst.maxKey_ = key;
354 }
355 if (key < dst.minKey_) {
356 dst.minKey_ = key;
357 }
358 const val_type theVal = startingValue_ + static_cast<val_type>(i);
360
361 // Return the old count; decrement afterwards.
362 const offset_type count = Kokkos::atomic_fetch_add(&counts_[hashVal], atomic_incr_type(-1));
363 if (count == 0) {
364 dst.success_ = false; // FAILURE!
365 } else {
366 const offset_type curPos = ptr_[hashVal + 1] - count;
367
368 pairs_[curPos].first = key;
369 pairs_[curPos].second = theVal;
370 }
372
373 private:
374 pairs_view_type pairs_;
375 counts_view_type counts_;
376 offsets_view_type ptr_;
377 keys_view_type keys_;
378 size_type size_;
379 typename pair_type::second_type startingValue_;
381 key_type initMinKey_;
383 key_type initMaxKey_;
384};
385
409template <class OffsetsViewType,
410 class PairsViewType,
411 class SizeType = typename OffsetsViewType::size_type>
413 public:
414 typedef typename OffsetsViewType::const_type offsets_view_type;
415 typedef typename PairsViewType::const_type pairs_view_type;
416 typedef typename offsets_view_type::execution_space execution_space;
417 typedef typename offsets_view_type::memory_space memory_space;
418 typedef SizeType size_type;
419
420 // The result of the check is whether the table has one or more duplicates.
421 typedef int value_type;
422
427 CheckForDuplicateKeys(const pairs_view_type& pairs,
428 const offsets_view_type& ptr)
429 : pairs_(pairs)
430 , ptr_(ptr)
431 , size_(ptr_.extent(0) == 0 ? size_type(0) : ptr_.extent(0) - 1) {}
432
435 dst = 0;
436 }
437
441 const value_type& src) const {
442 dst = dst + src > 0 ? 1 : 0;
443 }
444
447 operator()(const size_type& i, value_type& dst) const {
448 typedef typename offsets_view_type::non_const_value_type offset_type;
449 typedef typename pairs_view_type::non_const_value_type pair_type;
450 typedef typename pair_type::first_type key_type;
451
452 if (dst > 0) {
453 return; // we've already found duplicate keys elsewhere
454 } else {
455 const offset_type beg = ptr_[i];
456 const offset_type end = ptr_[i + 1];
457 bool foundDuplicateKey = false;
458 // This is an ~ n^2 algorithm in the worst case, where n is the
459 // max number of keys that hash to the same bucket. However, if
460 // the hash function is reasonable, n should be much less than
461 // the total number of keys.
462 for (offset_type j = beg + 1; j < end; ++j) {
463 const key_type curKey = pairs_[j].first;
464 for (offset_type k = beg; k < j; ++k) {
465 if (pairs_[k].first == curKey) {
466 foundDuplicateKey = true;
467 break;
468 }
469 }
470 }
471 dst = (dst > 0) || foundDuplicateKey ? 1 : 0;
472 }
473 }
474
475 private:
476 pairs_view_type pairs_;
477 offsets_view_type ptr_;
478 size_type size_;
479};
480
481} // namespace FHT
482
483//
484// Here begins the actual implementation of FixedHashTable.
485//
486
487template <class KeyType, class ValueType, class DeviceType>
490 : minVal_(0)
491 , maxVal_(keys.size() == 0 ? static_cast<ValueType>(0) : static_cast<ValueType>(keys.size() - 1))
492 , checkedForDuplicateKeys_(false) {
493 const ValueType startingValue = static_cast<ValueType>(0);
494 const KeyType initMinKey = this->minKey_;
495 const KeyType initMaxKey = this->maxKey_;
497 initMinKey, initMinKey, false);
498}
499
500template <class KeyType, class ValueType, class DeviceType>
502 FixedHashTable(const Teuchos::ArrayView<const KeyType>& keys)
503 : minVal_(0)
504 , maxVal_(keys.size() == 0 ? static_cast<ValueType>(0) : static_cast<ValueType>(keys.size() - 1))
505 , checkedForDuplicateKeys_(false) {
506 typedef typename keys_type::non_const_type nonconst_keys_type;
507
508 // mfh 01 May 2015: I don't trust that
509 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
510 // so I ensure this manually.
511 const ValueType startingValue = static_cast<ValueType>(0);
512 host_input_keys_type keys_k(keys.size() == 0 ? NULL : keys.getRawPtr(),
513 keys.size());
514 using Kokkos::ViewAllocateWithoutInitializing;
516 keys_k.extent(0));
517 // DEEP_COPY REVIEW - NOT TESTED
518 Kokkos::deep_copy(keys_d, keys_k);
519 const KeyType initMinKey = this->minKey_;
520 const KeyType initMaxKey = this->maxKey_;
522 initMinKey, initMinKey, false);
523}
524
525template <class KeyType, class ValueType, class DeviceType>
527 FixedHashTable(const Teuchos::ArrayView<const KeyType>& keys,
529 : minVal_(startingValue)
530 , maxVal_(keys.size() == 0 ? startingValue : static_cast<ValueType>(startingValue + keys.size() - 1))
531 , checkedForDuplicateKeys_(false) {
532 typedef typename keys_type::non_const_type nonconst_keys_type;
533
534 // mfh 01 May 2015: I don't trust that
535 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
536 // so I ensure this manually.
537 host_input_keys_type keys_k(keys.size() == 0 ? NULL : keys.getRawPtr(),
538 keys.size());
539 using Kokkos::ViewAllocateWithoutInitializing;
541 keys_k.extent(0));
542 // DEEP_COPY REVIEW - HOST-TO_DEVICE
543 Kokkos::deep_copy(execution_space(), keys_d, keys_k);
544
545 const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
546 // min() for a floating-point type returns the minimum _positive_
547 // normalized value. This is different than for integer types.
548 // lowest() is new in C++11 and returns the least value, always
549 // negative for signed finite types.
550 //
551 // mfh 23 May 2015: I have heard reports that
552 // std::numeric_limits<int>::lowest() does not exist with the Intel
553 // compiler. I'm not sure if the users in question actually enabled
554 // C++11. However, it's easy enough to work around this issue. The
555 // standard floating-point types are signed and have a sign bit, so
556 // lowest() is just -max(). For integer types, we can use min()
557 // instead.
558 const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
560 initMinKey, initMinKey, false);
561}
562
563template <class KeyType, class ValueType, class DeviceType>
569 : minVal_(startingValue)
570 , maxVal_(keys.size() == 0 ? startingValue : static_cast<ValueType>(startingValue + keys.size() - 1))
571 , firstContigKey_(firstContigKey)
572 , lastContigKey_(lastContigKey)
573 , checkedForDuplicateKeys_(false) {
574 const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
575 // min() for a floating-point type returns the minimum _positive_
576 // normalized value. This is different than for integer types.
577 // lowest() is new in C++11 and returns the least value, always
578 // negative for signed finite types.
579 //
580 // mfh 23 May 2015: I have heard reports that
581 // std::numeric_limits<int>::lowest() does not exist with the Intel
582 // compiler. I'm not sure if the users in question actually enabled
583 // C++11. However, it's easy enough to work around this issue. The
584 // standard floating-point types are signed and have a sign bit, so
585 // lowest() is just -max(). For integer types, we can use min()
586 // instead.
587 const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
590}
591
592template <class KeyType, class ValueType, class DeviceType>
594 FixedHashTable(const Teuchos::ArrayView<const KeyType>& keys,
598 : minVal_(startingValue)
599 , maxVal_(keys.size() == 0 ? startingValue : static_cast<ValueType>(startingValue + keys.size() - 1))
600 , firstContigKey_(firstContigKey)
601 , lastContigKey_(lastContigKey)
602 , checkedForDuplicateKeys_(false) {
603 typedef typename keys_type::non_const_type nonconst_keys_type;
604
605 // mfh 01 May 2015: I don't trust that
606 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
607 // so I ensure this manually.
608 host_input_keys_type keys_k(keys.size() == 0 ? NULL : keys.getRawPtr(),
609 keys.size());
610 using Kokkos::ViewAllocateWithoutInitializing;
612 keys_k.extent(0));
613 // DEEP_COPY REVIEW - NOT TESTED
614 Kokkos::deep_copy(keys_d, keys_k);
615
616 const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
617 // min() for a floating-point type returns the minimum _positive_
618 // normalized value. This is different than for integer types.
619 // lowest() is new in C++11 and returns the least value, always
620 // negative for signed finite types.
621 //
622 // mfh 23 May 2015: I have heard reports that
623 // std::numeric_limits<int>::lowest() does not exist with the Intel
624 // compiler. I'm not sure if the users in question actually enabled
625 // C++11. However, it's easy enough to work around this issue. The
626 // standard floating-point types are signed and have a sign bit, so
627 // lowest() is just -max(). For integer types, we can use min()
628 // instead.
629 const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
632}
633
634template <class KeyType, class ValueType, class DeviceType>
638 : minVal_(startingValue)
639 , maxVal_(keys.size() == 0 ? startingValue : static_cast<ValueType>(startingValue + keys.size() - 1))
640 , checkedForDuplicateKeys_(false) {
641 const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
642 // min() for a floating-point type returns the minimum _positive_
643 // normalized value. This is different than for integer types.
644 // lowest() is new in C++11 and returns the least value, always
645 // negative for signed finite types.
646 //
647 // mfh 23 May 2015: I have heard reports that
648 // std::numeric_limits<int>::lowest() does not exist with the Intel
649 // compiler. I'm not sure if the users in question actually enabled
650 // C++11. However, it's easy enough to work around this issue. The
651 // standard floating-point types are signed and have a sign bit, so
652 // lowest() is just -max(). For integer types, we can use min()
653 // instead.
654 const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
656 initMinKey, initMinKey, false);
657}
658
659template <class KeyType, class ValueType, class DeviceType>
661 FixedHashTable(const Teuchos::ArrayView<const KeyType>& keys,
662 const Teuchos::ArrayView<const ValueType>& vals)
663 : contiguousValues_(false)
664 , checkedForDuplicateKeys_(false) {
665 // mfh 01 May 2015: I don't trust that
666 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
667 // so I ensure this manually.
668 host_input_keys_type keys_k(keys.size() == 0 ? NULL : keys.getRawPtr(),
669 keys.size());
670 host_input_vals_type vals_k(vals.size() == 0 ? NULL : vals.getRawPtr(),
671 vals.size());
672 const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
673 // min() for a floating-point type returns the minimum _positive_
674 // normalized value. This is different than for integer types.
675 // lowest() is new in C++11 and returns the least value, always
676 // negative for signed finite types.
677 //
678 // mfh 23 May 2015: I have heard reports that
679 // std::numeric_limits<int>::lowest() does not exist with the Intel
680 // compiler. I'm not sure if the users in question actually enabled
681 // C++11. However, it's easy enough to work around this issue. The
682 // standard floating-point types are signed and have a sign bit, so
683 // lowest() is just -max(). For integer types, we can use min()
684 // instead.
685 const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
686 this->init(keys_k, vals_k, initMinKey, initMaxKey);
687}
688
689template <class KeyType, class ValueType, class DeviceType>
691 init(const keys_type& keys,
697 const bool computeInitContigKeys) {
698 using Kokkos::subview;
699 using Kokkos::ViewAllocateWithoutInitializing;
700 using Teuchos::TypeNameTraits;
701 typedef typename std::decay<decltype(keys.extent(0))>::type size_type;
702 Tpetra::Details::ProfilingRegion pr("Tpetra::Details::FixedHashTable::init(7-arg)");
703 const char prefix[] = "Tpetra::Details::FixedHashTable: ";
704
705 const offset_type numKeys = static_cast<offset_type>(keys.extent(0));
706 {
707 const offset_type theMaxVal = ::KokkosKernels::ArithTraits<offset_type>::max();
708 const size_type maxValST = static_cast<size_type>(theMaxVal);
709 TEUCHOS_TEST_FOR_EXCEPTION(keys.extent(0) > maxValST, std::invalid_argument, prefix << "The "
710 "number of keys "
711 << keys.extent(0) << " does not fit in "
712 "offset_type = "
713 << TypeNameTraits<offset_type>::name() << ", whose "
714 "max value is "
715 << theMaxVal << ". This means that it is not possible to "
716 "use this constructor.");
717 }
718 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<unsigned long long>(numKeys) >
719 static_cast<unsigned long long>(::KokkosKernels::ArithTraits<ValueType>::max()),
720 std::invalid_argument,
721 "Tpetra::Details::FixedHashTable: The number of "
722 "keys "
723 << numKeys << " is greater than the maximum representable "
724 "ValueType value "
725 << ::KokkosKernels::ArithTraits<ValueType>::max() << ". "
726 "This means that it is not possible to use this constructor.");
727 TEUCHOS_TEST_FOR_EXCEPTION(numKeys > static_cast<offset_type>(INT_MAX), std::logic_error, prefix << "This class currently only works when the number of keys is <= INT_MAX = " << INT_MAX << ". If this is a problem for you, please talk to the Tpetra "
728 "developers.");
729
730 const bool buildInParallel =
731 FHT::worthBuildingFixedHashTableInParallel<execution_space>();
732 const bool debug = ::Tpetra::Details::Behavior::debug();
733
734 // NOTE (mfh 14 May 2015) This method currently assumes UVM. We
735 // could change that by setting up ptr and val as Kokkos::DualView
736 // instances. If we do that, since we are filling on host for now,
737 // we want to make sure that we only zero-fill ptr on host
738 // initially, and that we don't fill val at all. Once we finish
739 // Kokkos-izing all the set-up kernels, we won't need DualView for
740 // either ptr or val.
741
743 // Find the first and last initial contiguous keys. If we find a
744 // long sequence of initial contiguous keys, we can save space by
745 // not storing them explicitly as pairs in the hash table.
746 //
747 // NOTE (mfh 01 Jun 2015) Doing this in parallel requires a scan
748 // ("min index such that the difference between the current key and
749 // the next != 1"), which takes multiple passes over the data. We
750 // could fuse it with CountBuckets (only update counts on 'final'
751 // pass). However, we're really just moving this sequential search
752 // out of Map's constructor here, so there is no loss in doing it
753 // sequentially for now. Later, we can work on parallelization.
754 if (numKeys > 0) {
755 // FIXME: make it a parallel kernel with no host copy
756 auto keys_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
757 keys);
758 firstContigKey_ = keys_h[0];
759 // Start with one plus, then decrement at the end. That lets us do
760 // only one addition per loop iteration, rather than two (if we test
761 // against lastContigKey + 1 and then increment lastContigKey).
762 lastContigKey_ = firstContigKey_ + 1;
763
764 // We will only store keys in the table that are not part of the
765 // initial contiguous sequence. It's possible for the initial
766 // contiguous sequence to be trivial, which for a nonzero number of
767 // keys means that the "sequence" has length 1.
768 for (offset_type k = 1; k < numKeys; ++k) {
769 if (lastContigKey_ != keys_h[k]) {
770 break;
771 }
772 ++lastContigKey_;
773 }
774 --lastContigKey_;
775 }
776 } else {
777 firstContigKey_ = firstContigKey;
778 lastContigKey_ = lastContigKey;
779 }
780
781 offset_type startIndex;
782 if (numKeys > 0) {
783 initMinKey = std::min(initMinKey, firstContigKey_);
784 initMaxKey = std::max(initMaxKey, lastContigKey_);
785 startIndex = static_cast<offset_type>(lastContigKey_ - firstContigKey_);
786 } else {
787 startIndex = 0;
788 }
789
790 const offset_type theNumKeys = numKeys - startIndex;
791 const offset_type size = hash_type::getRecommendedSize(theNumKeys);
792#ifdef HAVE_TPETRA_DEBUG
793 TEUCHOS_TEST_FOR_EXCEPTION(
794 size == 0 && numKeys != 0, std::logic_error,
795 "Tpetra::Details::FixedHashTable constructor: "
796 "getRecommendedSize("
797 << numKeys << ") returned zero, "
798 "even though the number of keys "
799 << numKeys << " is nonzero. "
800 "Please report this bug to the Tpetra developers.");
801#endif // HAVE_TPETRA_DEBUG
802 keys_type theKeys =
803 subview(keys, std::pair<offset_type, offset_type>(startIndex, numKeys));
804
805 // FIXME (mfh 28 Mar 2016) For some reason, we don't seem to need a
806 // fence here, but we do before other allocations.
807
808 // The array of counts must be separate from the array of offsets,
809 // in order for parallel_scan to work correctly.
810 typedef typename ptr_type::non_const_type counts_type;
811 counts_type counts("Tpetra::FixedHashTable::counts", size);
812
813 //
814 // Count the number of "buckets" per offsets array (ptr) entry.
815 //
816
817 // Will only create the mirror for buildInParallel false - but then use it in two places
818 typename keys_type::host_mirror_type theKeysHost;
819
820 // The Kokkos kernel uses atomic update instructions to count the
821 // number of "buckets" per offsets array (ptr) entry. Atomic
822 // updates incur overhead, even in the sequential case. The Kokkos
823 // kernel is still correct in that case, but I would rather not
824 // incur overhead then.
825 if (buildInParallel) {
826 FHT::CountBuckets<counts_type, keys_type> functor(counts, theKeys, size);
827 using range_type = Kokkos::RangePolicy<execution_space, offset_type>;
828 const char kernelLabel[] = "Tpetra::Details::FixedHashTable CountBuckets";
829 if (debug) {
830 using key_type = typename keys_type::non_const_value_type;
831 Kokkos::pair<int, key_type> err;
832 Kokkos::parallel_reduce(kernelLabel, range_type(0, theNumKeys),
833 functor, err);
834 TEUCHOS_TEST_FOR_EXCEPTION(err.first != 0, std::logic_error,
835 "Tpetra::Details::FixedHashTable "
836 "constructor: CountBuckets found a key "
837 << err.second << " that "
838 "results in an out-of-bounds hash value.");
839 } else {
840 Kokkos::parallel_for(kernelLabel, range_type(0, theNumKeys), functor);
841 }
842 } else {
843 Kokkos::HostSpace hostMemSpace;
844 theKeysHost = Kokkos::create_mirror_view(theKeys);
845 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
846 Kokkos::deep_copy(execution_space(), theKeysHost, theKeys);
847 auto countsHost = Kokkos::create_mirror_view(hostMemSpace, counts);
848
849 for (offset_type k = 0; k < theNumKeys; ++k) {
850 using key_type = typename keys_type::non_const_value_type;
851 const key_type key = theKeysHost[k];
852
853 using hash_value_type = typename hash_type::result_type;
854 const hash_value_type hashVal = hash_type::hashFunc(key, size);
855 TEUCHOS_TEST_FOR_EXCEPTION(hashVal < hash_value_type(0) ||
856 hashVal >= hash_value_type(countsHost.extent(0)),
857 std::logic_error,
858 "Tpetra::Details::FixedHashTable "
859 "constructor: Sequential CountBuckets found a key "
860 << key
861 << " that results in an out-of-bounds hash value.");
862
863 ++countsHost[hashVal];
864 }
865 // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
866 Kokkos::deep_copy(execution_space(), counts, countsHost);
867 }
868
869 // KJ: This fence is not required for the 2-argument deep_copy which calls
870 // fence, but will be required if switched to the 3-argumemt deep_copy which
871 // passes a space. The 3-argument form does not fence.
872 execution_space().fence();
873
874 // Kokkos::View fills with zeros by default.
875 typename ptr_type::non_const_type ptr("Tpetra::FixedHashTable::ptr", size + 1);
876
877 // Compute row offsets via prefix sum:
878 //
879 // ptr[i+1] = \sum_{j=0}^{i} counts[j].
880 //
881 // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
882 // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
883 // formula is ptr[i+1] += ptr[i].
884 //
885 // parallel_scan does not incur overhead with Kokkos::Serial, but
886 // with actual parallel execution spaces, it does require multiple
887 // passes over the data. Thus, it still makes sense to have a
888 // sequential fall-back.
889
890 using ::Tpetra::Details::computeOffsetsFromCounts;
891 if (buildInParallel) {
892 computeOffsetsFromCounts(ptr, counts);
893 }
894
895 if (!buildInParallel || debug) {
896 Kokkos::HostSpace hostMemSpace;
897 auto counts_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, counts);
898 auto ptr_h = Kokkos::create_mirror_view(hostMemSpace, ptr);
899
900#ifdef KOKKOS_ENABLE_SERIAL
901 Kokkos::Serial hostExecSpace;
902#else
903 Kokkos::DefaultHostExecutionSpace hostExecSpace;
904#endif // KOKKOS_ENABLE_SERIAL
905
906 computeOffsetsFromCounts(hostExecSpace, ptr_h, counts_h);
907 // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
908 Kokkos::deep_copy(execution_space(), ptr, ptr_h);
909
910 if (debug) {
911 bool bad = false;
912 for (offset_type i = 0; i < size; ++i) {
913 if (ptr_h[i + 1] != ptr_h[i] + counts_h[i]) {
914 bad = true;
915 }
916 }
917 TEUCHOS_TEST_FOR_EXCEPTION(bad, std::logic_error,
918 "Tpetra::Details::FixedHashTable "
919 "constructor: computeOffsetsFromCounts gave an incorrect "
920 "result.");
921 }
922 }
923
924 // KJ: computeOffsetsFromCounts calls parallel_scan which does not fence.
925 // This fence is necessary as we need to make sure that the offset view
926 // completes before the view is used in the next functor.
927 execution_space().fence();
928
929 // Allocate the array of (key,value) pairs. Don't fill it with
930 // zeros, because we will fill it with actual data below.
931 typedef typename val_type::non_const_type nonconst_val_type;
932 nonconst_val_type val(ViewAllocateWithoutInitializing("Tpetra::FixedHashTable::pairs"),
933 theNumKeys);
934
935 // Fill in the hash table's "values" (the (key,value) pairs).
936 typedef FHT::FillPairs<typename val_type::non_const_type, keys_type,
937 typename ptr_type::non_const_type>
938 functor_type;
939 typename functor_type::value_type result(initMinKey, initMaxKey);
940
941 const ValueType newStartingValue = startingValue + static_cast<ValueType>(startIndex);
942 if (buildInParallel) {
943 functor_type functor(val, counts, ptr, theKeys, newStartingValue,
944 initMinKey, initMaxKey);
945 typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
946 Kokkos::parallel_reduce("Tpetra::Details::FixedHashTable::FillPairs", range_type(0, theNumKeys), functor, result);
947 } else {
948 Kokkos::HostSpace hostMemSpace;
949 auto counts_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, counts);
950 auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
951 auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
952 for (offset_type k = 0; k < theNumKeys; ++k) {
953 typedef typename hash_type::result_type hash_value_type;
954 const KeyType key = theKeysHost[k];
955 if (key > result.maxKey_) {
956 result.maxKey_ = key;
957 }
958 if (key < result.minKey_) {
959 result.minKey_ = key;
960 }
961 const ValueType theVal = newStartingValue + static_cast<ValueType>(k);
962 const hash_value_type hashVal = hash_type::hashFunc(key, size);
963
964 // Return the old count; decrement afterwards.
965 const offset_type count = counts_h[hashVal];
966 --counts_h[hashVal];
967 if (count == 0) {
968 result.success_ = false; // FAILURE!
969 break;
970 } else {
971 const offset_type curPos = ptr_h[hashVal + 1] - count;
972 val_h[curPos].first = key;
973 val_h[curPos].second = theVal;
974 }
975 }
976 Kokkos::deep_copy(counts, counts_h); // restore
977 Kokkos::deep_copy(val, val_h); // restore
978 }
979
980 // FIXME (mfh 01 Jun 2015) Temporarily commented out because of
981 // reports of exceptions being thrown in Albany.
982 //
983 // TEUCHOS_TEST_FOR_EXCEPTION
984 // (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
985 // "init: Filling the hash table failed! Please report this bug to the "
986 // "Tpetra developers.");
987 (void)result; // prevent build warning (set but unused variable)
988
989 // "Commit" the computed arrays and other computed quantities.
990 ptr_ = ptr;
991 val_ = val;
992 minKey_ = result.minKey_;
993 maxKey_ = result.maxKey_;
994 // We've already set firstContigKey_ and lastContigKey_ above.
995}
996
997template <class KeyType, class ValueType, class DeviceType>
998void FixedHashTable<KeyType, ValueType, DeviceType>::
999 init(const host_input_keys_type& keys,
1000 const host_input_vals_type& vals,
1001 KeyType initMinKey,
1002 KeyType initMaxKey) {
1003 Tpetra::Details::ProfilingRegion pr("Tpetra::Details::FixedHashTable::init(4-arg)");
1004 const offset_type numKeys = static_cast<offset_type>(keys.extent(0));
1005 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<unsigned long long>(numKeys) > static_cast<unsigned long long>(::KokkosKernels::ArithTraits<ValueType>::max()),
1006 std::invalid_argument,
1007 "Tpetra::Details::FixedHashTable: The number of "
1008 "keys "
1009 << numKeys << " is greater than the maximum representable "
1010 "ValueType value "
1011 << ::KokkosKernels::ArithTraits<ValueType>::max() << ".");
1012 TEUCHOS_TEST_FOR_EXCEPTION(numKeys > static_cast<offset_type>(INT_MAX), std::logic_error,
1013 "Tpetra::"
1014 "Details::FixedHashTable: This class currently only works when the number "
1015 "of keys is <= INT_MAX = "
1016 << INT_MAX << ". If this is a problem for you"
1017 ", please talk to the Tpetra developers.");
1018
1019 // There's no need to find the first and last initial contiguous
1020 // keys in this case, because if we reach this init() function, then
1021 // hasContiguousValues() is false and so get() doesn't use the
1022 // initial contiguous sequence of keys.
1023
1024 const offset_type size = hash_type::getRecommendedSize(numKeys);
1025#ifdef HAVE_TPETRA_DEBUG
1026 TEUCHOS_TEST_FOR_EXCEPTION(
1027 size == 0 && numKeys != 0, std::logic_error,
1028 "Tpetra::Details::FixedHashTable constructor: "
1029 "getRecommendedSize("
1030 << numKeys << ") returned zero, "
1031 "even though the number of keys "
1032 << numKeys << " is nonzero. "
1033 "Please report this bug to the Tpetra developers.");
1034#endif // HAVE_TPETRA_DEBUG
1035
1036 // FIXME: Investigate a couple options:
1037 // 1. Allocate ptr_h, val_h directly on host and only deep_copy to ptr_ and val_ once at the end
1038 // 2. Do all this work as a parallel kernel with the same execution/memory spaces as ptr_ and val_
1039 // An old comment from MFH noted ptr_h should be zero-initialized, while val_h should not be initialized.
1040 // It further noted that we shouldn't need a DualView type arrangement when all setup kernels have
1041 // been "Kokkos-ized".
1042 Kokkos::HostSpace hostMemSpace;
1043 typename ptr_type::non_const_type ptr("Tpetra::FixedHashTable::ptr", size + 1);
1044 auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
1045
1046 // Allocate the array of key,value pairs. Don't waste time filling
1047 // it with zeros, because we will fill it with actual data below.
1048 using Kokkos::ViewAllocateWithoutInitializing;
1049 typedef typename val_type::non_const_type nonconst_val_type;
1050 nonconst_val_type val(ViewAllocateWithoutInitializing("Tpetra::FixedHashTable::pairs"),
1051 numKeys);
1052 auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
1053
1054 // Compute number of entries in each hash table position.
1055 for (offset_type k = 0; k < numKeys; ++k) {
1056 const typename hash_type::result_type hashVal =
1057 hash_type::hashFunc(keys[k], size);
1058 // Shift over one, so that counts[j] = ptr[j+1]. See below.
1059 ++ptr_h[hashVal + 1];
1060 }
1061
1062 // Compute row offsets via prefix sum:
1063 //
1064 // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1065 //
1066 // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1067 // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1068 // formula is ptr[i+1] += ptr[i].
1069 for (offset_type i = 0; i < size; ++i) {
1070 ptr_h[i + 1] += ptr_h[i];
1071 }
1072 // ptr[0] = 0; // We've already done this when initializing ptr above.
1073
1074 // curRowStart[i] is the offset of the next element in row i.
1075 typename ptr_type::non_const_type::host_mirror_type curRowStart("Tpetra::FixedHashTable::curRowStart", size);
1076
1077 // Fill in the hash table.
1078 FHT::FillPairsResult<KeyType> result(initMinKey, initMaxKey);
1079 for (offset_type k = 0; k < numKeys; ++k) {
1080 typedef typename hash_type::result_type hash_value_type;
1081 const KeyType key = keys[k];
1082 if (key > result.maxKey_) {
1083 result.maxKey_ = key;
1084 }
1085 if (key < result.minKey_) {
1086 result.minKey_ = key;
1087 }
1088 const ValueType theVal = vals[k];
1089 if (theVal > maxVal_) {
1090 maxVal_ = theVal;
1091 }
1092 if (theVal < minVal_) {
1093 minVal_ = theVal;
1094 }
1095 const hash_value_type hashVal = hash_type::hashFunc(key, size);
1096
1097 const offset_type offset = curRowStart[hashVal];
1098 const offset_type curPos = ptr_h[hashVal] + offset;
1099 if (curPos >= ptr_h[hashVal + 1]) {
1100 result.success_ = false; // FAILURE!
1101 } else {
1102 val_h[curPos].first = key;
1103 val_h[curPos].second = theVal;
1104 ++curRowStart[hashVal];
1105 }
1106 }
1107
1108 TEUCHOS_TEST_FOR_EXCEPTION(!result.success_, std::logic_error,
1109 "Tpetra::Details::FixedHashTable::"
1110 "init: Filling the hash table failed! Please report this bug to the "
1111 "Tpetra developers.");
1112
1113 // "Commit" the computed arrays and other computed quantities.
1114 Kokkos::deep_copy(ptr, ptr_h);
1115 Kokkos::deep_copy(val, val_h);
1116
1117 ptr_ = ptr;
1118 val_ = val;
1119 minKey_ = result.minKey_;
1120 maxKey_ = result.maxKey_;
1121 // We've already assigned to minVal_ and maxVal_ above.
1122}
1123
1124template <class KeyType, class ValueType, class DeviceType>
1127 if (!checkedForDuplicateKeys_) {
1128 hasDuplicateKeys_ = checkForDuplicateKeys();
1129 checkedForDuplicateKeys_ = true;
1130 }
1131 return hasDuplicateKeys_;
1132}
1133
1134template <class KeyType, class ValueType, class DeviceType>
1136 checkForDuplicateKeys() const {
1137 const offset_type size = this->getSize();
1138 // It's allowed for the hash table to have a positive number of
1139 // buckets (getSize()), but still store no entries (numPairs()).
1140 // Both cases trivially entail no duplicates.
1141 if (size == 0 || this->numPairs() == 0) {
1142 return false;
1143 } else {
1144 typedef FHT::CheckForDuplicateKeys<ptr_type, val_type> functor_type;
1145 functor_type functor(val_, ptr_);
1146 int hasDupKeys = 0;
1147 typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1148 Kokkos::parallel_reduce("Tpetra::Details::FixedHashTable::CheckForDuplicateKeys", range_type(0, size), functor, hasDupKeys);
1149 return hasDupKeys > 0;
1150 }
1151}
1152
1153template <class KeyType, class ValueType, class DeviceType>
1154std::string
1156 description() const {
1157 std::ostringstream oss;
1158 oss << "FixedHashTable<"
1159 << Teuchos::TypeNameTraits<KeyType>::name() << ","
1160 << Teuchos::TypeNameTraits<ValueType>::name() << ">: "
1161 << "{ numKeys: " << val_.extent(0)
1162 << ", tableSize: " << this->getSize() << " }";
1163 return oss.str();
1164}
1165
1166template <class KeyType, class ValueType, class DeviceType>
1168 describe(Teuchos::FancyOStream& out,
1169 const Teuchos::EVerbosityLevel verbLevel) const {
1170 using std::endl;
1171 using std::setw;
1172 using Teuchos::OSTab;
1173 using Teuchos::rcpFromRef;
1174 using Teuchos::TypeNameTraits;
1175 using Teuchos::VERB_DEFAULT;
1176 using Teuchos::VERB_EXTREME;
1177 using Teuchos::VERB_LOW;
1178 using Teuchos::VERB_NONE;
1179
1180 // NOTE (mfh 14 May 2015) This method currently assumes UVM for
1181 // access to ptr_ and val_ from the host.
1182
1183 Teuchos::EVerbosityLevel vl = verbLevel;
1184 if (vl == VERB_DEFAULT) vl = VERB_LOW;
1185
1186 if (vl == VERB_NONE) {
1187 // do nothing
1188 } else if (vl == VERB_LOW) {
1189 out << this->description() << endl;
1190 } else { // MEDIUM, HIGH or EXTREME
1191 out << "FixedHashTable:" << endl;
1192 {
1194
1195 // const std::string label = this->getObjectLabel ();
1196 // if (label != "") {
1197 // out << "label: " << label << endl;
1198 // }
1199 out << "Template parameters:" << endl;
1200 {
1202 out << "KeyType: " << TypeNameTraits<KeyType>::name() << endl
1203 << "ValueType: " << TypeNameTraits<ValueType>::name() << endl;
1204 }
1205
1206 const offset_type tableSize = this->getSize();
1207 const offset_type numKeys = val_.extent(0);
1208
1209 out << "Table parameters:" << endl;
1210 {
1212 out << "numKeys: " << numKeys << endl
1213 << "tableSize: " << tableSize << endl;
1214 }
1215
1216 if (vl >= VERB_EXTREME) {
1217 out << "Contents: ";
1218 if (tableSize == 0 || numKeys == 0) {
1219 out << "[]" << endl;
1220 } else {
1221 out << "[ " << endl;
1222 {
1224 for (offset_type i = 0; i < tableSize; ++i) {
1226 out << "[";
1227 for (offset_type k = ptr_[i]; k < ptr_[i + 1]; ++k) {
1228 out << "(" << val_[k].first << "," << val_[k].second << ")";
1229 if (k + 1 < ptr_[i + 1]) {
1230 out << ", ";
1231 }
1232 }
1233 out << "]" << endl;
1234 } // for each table position i
1235 }
1236 out << "]" << endl;
1237 } // The table contains entries
1238 } // vl >= VERB_EXTREME
1239 }
1240 out << endl;
1241 } // if vl > VERB_LOW
1242}
1243
1244} // namespace Details
1245} // namespace Tpetra
1246
1247// Macro that explicitly instantiates FixedHashTable for the given
1248// local ordinal (LO), global ordinal (GO), and Kokkos device (NODE::device_type)
1249// types. Note that FixedHashTable's first two template parameters
1250// occur in the opposite order of most Tpetra classes. This is
1251// because FixedHashTable performs global-to-local lookup, and the
1252// convention in templated C++ lookup tables (such as std::map) is
1253// <KeyType, ValueType>.
1254//
1255// This macro must be explanded within the Tpetra::Details namespace.
1256// Moreover, we need (GO,LO,Host) and (LO,LO,Host)
1257#if defined(HAVE_TPETRA_INST_SERIAL)
1258#if defined(HAVE_TPETRA_INST_INT_INT)
1259#define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1260 template class Details::FixedHashTable<GO, LO, typename NODE::device_type>;
1261#else
1262#define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1263 template class Details::FixedHashTable<GO, LO, typename NODE::device_type>; \
1264 template class Details::FixedHashTable<LO, LO, typename NODE::device_type>;
1265#endif
1266#elif defined(HAVE_TPETRA_INST_OPENMP)
1267#if defined(HAVE_TPETRA_INST_INT_INT)
1268#define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1269 template class Details::FixedHashTable<GO, LO, typename NODE::device_type>; \
1270 template class Details::FixedHashTable<GO, LO, Kokkos::HostSpace::device_type>;
1271#else
1272#define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1273 template class Details::FixedHashTable<GO, LO, typename NODE::device_type>; \
1274 template class Details::FixedHashTable<LO, LO, typename NODE::device_type>; \
1275 template class Details::FixedHashTable<GO, LO, Kokkos::Device<Kokkos::Serial, Kokkos::HostSpace>>; \
1276 template class Details::FixedHashTable<LO, LO, Kokkos::Device<Kokkos::Serial, Kokkos::HostSpace>>;
1277#endif
1278#else
1279#define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1280 template class Details::FixedHashTable<GO, LO, typename NODE::device_type>; \
1281 template class Details::FixedHashTable<GO, LO, Kokkos::HostSpace::device_type>; \
1282 template class Details::FixedHashTable<LO, LO, Kokkos::HostSpace::device_type>;
1283#endif
1284#endif
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Struct that holds views of the contents of a CrsMatrix.
static bool debug()
Whether Tpetra is in debug mode.
Functor for checking whether a FixedHashTable has one or more duplicate entries.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body.
KOKKOS_INLINE_FUNCTION void join(value_type &dst, const value_type &src) const
Combine two intermediate reduction results.
CheckForDuplicateKeys(const pairs_view_type &pairs, const offsets_view_type &ptr)
Constructor.
Parallel for functor for counting "buckets" in the FixedHashTable.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i) const
Do this for every entry of keys_.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Debug reduce version of above operator().
CountBuckets(const counts_view_type &counts, const keys_view_type &keys, const size_type size)
Constructor.
Parallel reduce functor for filling the FixedHashTable, and computing the min and max keys.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue, const key_type initMinKey, const key_type initMaxKey)
Constructor that takes initial min and max key values.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body; do this for every entry of keys_.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue)
Constructor.
std::string description() const
Implementation of Teuchos::Describable interface.
bool hasDuplicateKeys()
Whether the table has any duplicate keys.
Kokkos::View< const KeyType *, Kokkos::LayoutLeft, device_type > keys_type
Type of a 1-D Kokkos::View (array) used to store keys.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with the given verbosity to the output stream.
Implementation details of Tpetra.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Reduction result for FillPairs functor below.
bool success_
Whether fill succeeded (it can only fail on a bug)
The hash function for FixedHashTable.
ResultType result_type
Type of the return value of the hash function.
static KOKKOS_INLINE_FUNCTION result_type hashFunc(const argument_type &, const offset_type &)
The hash function.