Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_FixedHashTable_def.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
11#define TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
12
13#include "Tpetra_Details_FixedHashTable_decl.hpp"
15#ifdef TPETRA_USE_MURMUR_HASH
16#include "Kokkos_Functional.hpp" // hash function used by Kokkos::UnorderedMap
17#endif // TPETRA_USE_MURMUR_HASH
18#if KOKKOS_VERSION >= 40799
19#include "KokkosKernels_ArithTraits.hpp"
20#else
21#include "Kokkos_ArithTraits.hpp"
22#endif
23#include "Teuchos_TypeNameTraits.hpp"
25#include <type_traits>
26
27namespace Tpetra {
28namespace Details {
29//
30// This namespace stores utility functions and Kokkos
31// functors for use in FixedHashTable construction.
32//
33namespace FHT {
34
35// Is it worth actually using building the FixedHashTable using
36// parallel threads, instead of just counting in a sequential loop?
37//
38// The parallel version of FixedHashTable construction isn't just a
39// parallelization of the sequential loops. It incurs additional
40// overheads. For example, the CountBuckets kernel uses atomic update
41// instructions to count the number of "buckets" per offsets array
42// (ptr) entry. Atomic updates have overhead, even if only one thread
43// issues them. The Kokkos kernels are still correct in that case,
44// but I would rather not incur overhead then. It might make sense to
45// set the minimum number of threads to something greater than 1, but
46// we would need experiments to find out.
47template <class ExecSpace>
48bool worthBuildingFixedHashTableInParallel() {
49 return ExecSpace().concurrency() > 1;
50}
51
52//
53// Functors for FixedHashTable initialization
54//
55// NOTE (mfh 23 May 2015): Once we can use lambdas with CUDA, we
56// should consider replacing all of these functors with in-line
57// lambdas. The only issue is that we would need to bake the
58// execution space into the policy, since the default execution space
59// might differ from the one Tpetra wants to use.
60
70template <class CountsViewType,
71 class KeysViewType,
72 class SizeType = typename KeysViewType::size_type>
74 public:
77 typedef typename CountsViewType::execution_space execution_space;
78 typedef typename CountsViewType::memory_space memory_space;
79 typedef SizeType size_type;
80 typedef typename keys_view_type::non_const_value_type key_type;
81 // mfh 21 May 2015: Having a device_type typedef in the functor
82 // along with an execution_space typedef causes compilation issues.
83 // This is because one of Kokkos' partial specializations picks up
84 // on the device_type typedef, and another picks up on the
85 // execution_space typedef. The former is a legacy of a previous
86 // design iteration of Kokkos, which did not separate memory and
87 // execution spaces.
89
96 const keys_view_type& keys,
97 const size_type size)
98 : counts_(counts)
99 , keys_(keys)
100 , size_(size) {}
101
107 operator()(const size_type& i) const {
109
110 const hash_value_type hashVal = hash_type::hashFunc(keys_[i], size_);
111 Kokkos::atomic_inc(&counts_[hashVal]);
112 }
113
114 using value_type = Kokkos::pair<int, key_type>;
115
120 operator()(const size_type& i, value_type& dst) const {
122
123 const key_type keyVal = keys_[i];
125 if (hashVal < hash_value_type(0) ||
126 hashVal >= hash_value_type(counts_.extent(0))) {
127 dst.first = 1;
128 dst.second = keyVal;
129 } else {
130 Kokkos::atomic_inc(&counts_[hashVal]);
131 }
132 }
133
135 KOKKOS_INLINE_FUNCTION void init(value_type& dst) const {
136 dst.first = 0;
137 dst.second = key_type(0);
138 }
139
141 join(value_type& dst,
142 const value_type& src) const {
143 const bool good = dst.first == 0 || src.first == 0;
144 dst.first = good ? 0 : dst.first;
145 // leave dst.second as it is, to get the "first" key
146 }
147
148 private:
150 counts_view_type counts_;
152 keys_view_type keys_;
154 size_type size_;
156
167template <class KeyType>
171#if KOKKOS_VERSION >= 40799
172 : minKey_(::KokkosKernels::ArithTraits<KeyType>::max())
173#else
174 : minKey_(::Kokkos::ArithTraits<KeyType>::max())
175#endif
177 // min() for a floating-point type returns the minimum _positive_
178 // normalized value. This is different than for integer types.
179 // lowest() is new in C++11 and returns the least value, always
180 // negative for signed finite types.
181 //
182 // mfh 23 May 2015: I have heard reports that
183 // std::numeric_limits<int>::lowest() does not exist with the
184 // Intel compiler. I'm not sure if the users in question actually
185 // enabled C++11. However, it's easy enough to work around this
186 // issue. The standard floating-point types are signed and have a
187 // sign bit, so lowest() is just -max(). For integer types, we
188 // can use min() instead.
189#if KOKKOS_VERSION >= 40799
190 maxKey_(::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max())
191#else
193#endif
194 , success_(true) {
195 static_assert(std::is_arithmetic<KeyType>::value,
196 "FillPairsResult: "
197 "KeyType must be some kind of number type.");
198 }
199
201 FillPairsResult(const KeyType& initMinKey,
202 const KeyType& initMaxKey)
205 , success_(true) {
206 static_assert(std::is_arithmetic<KeyType>::value,
207 "FillPairsResult: "
208 "KeyType must be some kind of number type.");
210
213 bool success_;
214};
215
245template <class PairsViewType,
246 class KeysViewType,
247 class CountsViewType,
248 class SizeType = typename KeysViewType::size_type>
250 public:
251 typedef typename CountsViewType::non_const_type counts_view_type;
252 typedef typename counts_view_type::const_type offsets_view_type;
253
255 typedef typename KeysViewType::const_type keys_view_type;
256 typedef typename offsets_view_type::execution_space execution_space;
257 typedef typename offsets_view_type::memory_space memory_space;
258 typedef SizeType size_type;
259
260 typedef typename keys_view_type::non_const_value_type key_type;
261 typedef typename pairs_view_type::non_const_value_type pair_type;
262
264
265 // mfh 23 May 2015: Having a device_type typedef in the functor
266 // along with an execution_space typedef causes compilation issues.
267 // This is because one of Kokkos' partial specializations picks up
268 // on the device_type typedef, and another picks up on the
269 // execution_space typedef. The former is a legacy of a previous
270 // design iteration of Kokkos, which did not separate memory and
271 // execution spaces.
273
285 const counts_view_type& counts,
286 const offsets_view_type& ptr,
287 const keys_view_type& keys,
288 const typename pair_type::second_type startingValue)
289 : pairs_(pairs)
290 , counts_(counts)
291 , ptr_(ptr)
292 , keys_(keys)
293 , size_(counts.extent(0))
294 , startingValue_(startingValue)
295#if KOKKOS_VERSION >= 40799
296 , initMinKey_(::KokkosKernels::ArithTraits<key_type>::max())
297 , initMaxKey_(::KokkosKernels::ArithTraits<key_type>::is_integer ? ::KokkosKernels::ArithTraits<key_type>::min() : -::KokkosKernels::ArithTraits<key_type>::max()){}
298#else
299 , initMinKey_(::Kokkos::ArithTraits<key_type>::max())
300 , initMaxKey_(::Kokkos::ArithTraits<key_type>::is_integer ? ::Kokkos::ArithTraits<key_type>::min() : -::Kokkos::ArithTraits<key_type>::max()) {
301 }
302#endif
303
323 const counts_view_type& counts,
324 const offsets_view_type& ptr,
325 const keys_view_type& keys,
326 const typename pair_type::second_type startingValue,
327 const key_type initMinKey,
328 const key_type initMaxKey)
329 : pairs_(pairs)
330 , counts_(counts)
331 , ptr_(ptr)
332 , keys_(keys)
333 , size_(counts.extent(0))
334 , startingValue_(startingValue)
335 , initMinKey_(initMinKey)
336 , initMaxKey_(initMaxKey) {
337 }
338
341 dst.minKey_ = initMinKey_;
342 dst.maxKey_ = initMaxKey_;
343 dst.success_ = true;
344 }
345
347 join(value_type& dst,
348 const value_type& src) const {
349 if (src.maxKey_ > dst.maxKey_) {
350 dst.maxKey_ = src.maxKey_;
351 }
352 if (src.minKey_ < dst.minKey_) {
353 dst.minKey_ = src.minKey_;
354 }
355 dst.success_ = dst.success_ && src.success_;
356 }
357
362 KOKKOS_INLINE_FUNCTION void
363 operator()(const size_type& i, value_type& dst) const {
365 typedef typename offsets_view_type::non_const_value_type offset_type;
366 typedef typename pair_type::second_type val_type;
367 typedef typename std::remove_reference<decltype(counts_[0])>::type atomic_incr_type;
368
369 const key_type key = keys_[i];
370 if (key > dst.maxKey_) {
371 dst.maxKey_ = key;
372 }
373 if (key < dst.minKey_) {
374 dst.minKey_ = key;
376 const val_type theVal = startingValue_ + static_cast<val_type>(i);
378
379 // Return the old count; decrement afterwards.
380 const offset_type count = Kokkos::atomic_fetch_add(&counts_[hashVal], atomic_incr_type(-1));
381 if (count == 0) {
382 dst.success_ = false; // FAILURE!
383 } else {
384 const offset_type curPos = ptr_[hashVal + 1] - count;
385
386 pairs_[curPos].first = key;
387 pairs_[curPos].second = theVal;
388 }
389 }
390
391 private:
392 pairs_view_type pairs_;
393 counts_view_type counts_;
394 offsets_view_type ptr_;
395 keys_view_type keys_;
396 size_type size_;
397 typename pair_type::second_type startingValue_;
399 key_type initMinKey_;
401 key_type initMaxKey_;
402};
403
427template <class OffsetsViewType,
428 class PairsViewType,
429 class SizeType = typename OffsetsViewType::size_type>
431 public:
432 typedef typename OffsetsViewType::const_type offsets_view_type;
433 typedef typename PairsViewType::const_type pairs_view_type;
434 typedef typename offsets_view_type::execution_space execution_space;
435 typedef typename offsets_view_type::memory_space memory_space;
436 typedef SizeType size_type;
437
438 // The result of the check is whether the table has one or more duplicates.
439 typedef int value_type;
440
445 CheckForDuplicateKeys(const pairs_view_type& pairs,
446 const offsets_view_type& ptr)
447 : pairs_(pairs)
448 , ptr_(ptr)
449 , size_(ptr_.extent(0) == 0 ? size_type(0) : ptr_.extent(0) - 1) {}
450
453 dst = 0;
454 }
455
459 const value_type& src) const {
460 dst = dst + src > 0 ? 1 : 0;
461 }
462
465 operator()(const size_type& i, value_type& dst) const {
466 typedef typename offsets_view_type::non_const_value_type offset_type;
467 typedef typename pairs_view_type::non_const_value_type pair_type;
468 typedef typename pair_type::first_type key_type;
469
470 if (dst > 0) {
471 return; // we've already found duplicate keys elsewhere
472 } else {
473 const offset_type beg = ptr_[i];
474 const offset_type end = ptr_[i + 1];
475 bool foundDuplicateKey = false;
476 // This is an ~ n^2 algorithm in the worst case, where n is the
477 // max number of keys that hash to the same bucket. However, if
478 // the hash function is reasonable, n should be much less than
479 // the total number of keys.
480 for (offset_type j = beg + 1; j < end; ++j) {
481 const key_type curKey = pairs_[j].first;
482 for (offset_type k = beg; k < j; ++k) {
483 if (pairs_[k].first == curKey) {
484 foundDuplicateKey = true;
485 break;
486 }
487 }
488 }
489 dst = (dst > 0) || foundDuplicateKey ? 1 : 0;
490 }
491 }
492
493 private:
494 pairs_view_type pairs_;
495 offsets_view_type ptr_;
496 size_type size_;
497};
498
499} // namespace FHT
500
501//
502// Here begins the actual implementation of FixedHashTable.
503//
504
505template <class KeyType, class ValueType, class DeviceType>
508 : minVal_(0)
509 , maxVal_(keys.size() == 0 ? static_cast<ValueType>(0) : static_cast<ValueType>(keys.size() - 1))
510 , checkedForDuplicateKeys_(false) {
511 const ValueType startingValue = static_cast<ValueType>(0);
512 const KeyType initMinKey = this->minKey_;
513 const KeyType initMaxKey = this->maxKey_;
515 initMinKey, initMinKey, false);
516}
517
518template <class KeyType, class ValueType, class DeviceType>
520 FixedHashTable(const Teuchos::ArrayView<const KeyType>& keys)
521 : minVal_(0)
522 , maxVal_(keys.size() == 0 ? static_cast<ValueType>(0) : static_cast<ValueType>(keys.size() - 1))
523 , checkedForDuplicateKeys_(false) {
524 typedef typename keys_type::non_const_type nonconst_keys_type;
525
526 // mfh 01 May 2015: I don't trust that
527 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
528 // so I ensure this manually.
529 const ValueType startingValue = static_cast<ValueType>(0);
530 host_input_keys_type keys_k(keys.size() == 0 ? NULL : keys.getRawPtr(),
531 keys.size());
532 using Kokkos::ViewAllocateWithoutInitializing;
534 keys_k.extent(0));
535 // DEEP_COPY REVIEW - NOT TESTED
536 Kokkos::deep_copy(keys_d, keys_k);
537 const KeyType initMinKey = this->minKey_;
538 const KeyType initMaxKey = this->maxKey_;
540 initMinKey, initMinKey, false);
541}
542
543template <class KeyType, class ValueType, class DeviceType>
545 FixedHashTable(const Teuchos::ArrayView<const KeyType>& keys,
547 : minVal_(startingValue)
548 , maxVal_(keys.size() == 0 ? startingValue : static_cast<ValueType>(startingValue + keys.size() - 1))
549 , checkedForDuplicateKeys_(false) {
550 typedef typename keys_type::non_const_type nonconst_keys_type;
551
552 // mfh 01 May 2015: I don't trust that
553 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
554 // so I ensure this manually.
555 host_input_keys_type keys_k(keys.size() == 0 ? NULL : keys.getRawPtr(),
556 keys.size());
557 using Kokkos::ViewAllocateWithoutInitializing;
559 keys_k.extent(0));
560 // DEEP_COPY REVIEW - HOST-TO_DEVICE
561 Kokkos::deep_copy(execution_space(), keys_d, keys_k);
562
563#if KOKKOS_VERSION >= 40799
564 const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
565#else
566 const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max();
567#endif
568 // min() for a floating-point type returns the minimum _positive_
569 // normalized value. This is different than for integer types.
570 // lowest() is new in C++11 and returns the least value, always
571 // negative for signed finite types.
572 //
573 // mfh 23 May 2015: I have heard reports that
574 // std::numeric_limits<int>::lowest() does not exist with the Intel
575 // compiler. I'm not sure if the users in question actually enabled
576 // C++11. However, it's easy enough to work around this issue. The
577 // standard floating-point types are signed and have a sign bit, so
578 // lowest() is just -max(). For integer types, we can use min()
579 // instead.
580#if KOKKOS_VERSION >= 40799
581 const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
582#else
583 const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ? ::Kokkos::ArithTraits<KeyType>::min() : -::Kokkos::ArithTraits<KeyType>::max();
584#endif
586 initMinKey, initMinKey, false);
587}
588
589template <class KeyType, class ValueType, class DeviceType>
595 : minVal_(startingValue)
596 , maxVal_(keys.size() == 0 ? startingValue : static_cast<ValueType>(startingValue + keys.size() - 1))
597 , firstContigKey_(firstContigKey)
598 , lastContigKey_(lastContigKey)
599 , checkedForDuplicateKeys_(false) {
600#if KOKKOS_VERSION >= 40799
601 const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
602#else
603 const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max();
604#endif
605 // min() for a floating-point type returns the minimum _positive_
606 // normalized value. This is different than for integer types.
607 // lowest() is new in C++11 and returns the least value, always
608 // negative for signed finite types.
609 //
610 // mfh 23 May 2015: I have heard reports that
611 // std::numeric_limits<int>::lowest() does not exist with the Intel
612 // compiler. I'm not sure if the users in question actually enabled
613 // C++11. However, it's easy enough to work around this issue. The
614 // standard floating-point types are signed and have a sign bit, so
615 // lowest() is just -max(). For integer types, we can use min()
616 // instead.
617#if KOKKOS_VERSION >= 40799
618 const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
619#else
620 const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ? ::Kokkos::ArithTraits<KeyType>::min() : -::Kokkos::ArithTraits<KeyType>::max();
621#endif
624}
625
626template <class KeyType, class ValueType, class DeviceType>
628 FixedHashTable(const Teuchos::ArrayView<const KeyType>& keys,
632 : minVal_(startingValue)
633 , maxVal_(keys.size() == 0 ? startingValue : static_cast<ValueType>(startingValue + keys.size() - 1))
634 , firstContigKey_(firstContigKey)
635 , lastContigKey_(lastContigKey)
636 , checkedForDuplicateKeys_(false) {
637 typedef typename keys_type::non_const_type nonconst_keys_type;
638
639 // mfh 01 May 2015: I don't trust that
640 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
641 // so I ensure this manually.
642 host_input_keys_type keys_k(keys.size() == 0 ? NULL : keys.getRawPtr(),
643 keys.size());
644 using Kokkos::ViewAllocateWithoutInitializing;
646 keys_k.extent(0));
647 // DEEP_COPY REVIEW - NOT TESTED
648 Kokkos::deep_copy(keys_d, keys_k);
649
650#if KOKKOS_VERSION >= 40799
651 const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
652#else
653 const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max();
654#endif
655 // min() for a floating-point type returns the minimum _positive_
656 // normalized value. This is different than for integer types.
657 // lowest() is new in C++11 and returns the least value, always
658 // negative for signed finite types.
659 //
660 // mfh 23 May 2015: I have heard reports that
661 // std::numeric_limits<int>::lowest() does not exist with the Intel
662 // compiler. I'm not sure if the users in question actually enabled
663 // C++11. However, it's easy enough to work around this issue. The
664 // standard floating-point types are signed and have a sign bit, so
665 // lowest() is just -max(). For integer types, we can use min()
666 // instead.
667#if KOKKOS_VERSION >= 40799
668 const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
669#else
670 const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ? ::Kokkos::ArithTraits<KeyType>::min() : -::Kokkos::ArithTraits<KeyType>::max();
671#endif
674}
675
676template <class KeyType, class ValueType, class DeviceType>
680 : minVal_(startingValue)
681 , maxVal_(keys.size() == 0 ? startingValue : static_cast<ValueType>(startingValue + keys.size() - 1))
682 , checkedForDuplicateKeys_(false) {
683#if KOKKOS_VERSION >= 40799
684 const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
685#else
686 const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max();
687#endif
688 // min() for a floating-point type returns the minimum _positive_
689 // normalized value. This is different than for integer types.
690 // lowest() is new in C++11 and returns the least value, always
691 // negative for signed finite types.
692 //
693 // mfh 23 May 2015: I have heard reports that
694 // std::numeric_limits<int>::lowest() does not exist with the Intel
695 // compiler. I'm not sure if the users in question actually enabled
696 // C++11. However, it's easy enough to work around this issue. The
697 // standard floating-point types are signed and have a sign bit, so
698 // lowest() is just -max(). For integer types, we can use min()
699 // instead.
700#if KOKKOS_VERSION >= 40799
701 const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
702#else
703 const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ? ::Kokkos::ArithTraits<KeyType>::min() : -::Kokkos::ArithTraits<KeyType>::max();
704#endif
706 initMinKey, initMinKey, false);
707}
708
709template <class KeyType, class ValueType, class DeviceType>
711 FixedHashTable(const Teuchos::ArrayView<const KeyType>& keys,
712 const Teuchos::ArrayView<const ValueType>& vals)
713 : contiguousValues_(false)
714 , checkedForDuplicateKeys_(false) {
715 // mfh 01 May 2015: I don't trust that
716 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
717 // so I ensure this manually.
718 host_input_keys_type keys_k(keys.size() == 0 ? NULL : keys.getRawPtr(),
719 keys.size());
720 host_input_vals_type vals_k(vals.size() == 0 ? NULL : vals.getRawPtr(),
721 vals.size());
722#if KOKKOS_VERSION >= 40799
723 const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
724#else
725 const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max();
726#endif
727 // min() for a floating-point type returns the minimum _positive_
728 // normalized value. This is different than for integer types.
729 // lowest() is new in C++11 and returns the least value, always
730 // negative for signed finite types.
731 //
732 // mfh 23 May 2015: I have heard reports that
733 // std::numeric_limits<int>::lowest() does not exist with the Intel
734 // compiler. I'm not sure if the users in question actually enabled
735 // C++11. However, it's easy enough to work around this issue. The
736 // standard floating-point types are signed and have a sign bit, so
737 // lowest() is just -max(). For integer types, we can use min()
738 // instead.
739#if KOKKOS_VERSION >= 40799
740 const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
741#else
742 const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ? ::Kokkos::ArithTraits<KeyType>::min() : -::Kokkos::ArithTraits<KeyType>::max();
743#endif
744 this->init(keys_k, vals_k, initMinKey, initMaxKey);
745}
746
747template <class KeyType, class ValueType, class DeviceType>
749 init(const keys_type& keys,
755 const bool computeInitContigKeys) {
756 using Kokkos::subview;
757 using Kokkos::ViewAllocateWithoutInitializing;
758 using Teuchos::TypeNameTraits;
759 typedef typename std::decay<decltype(keys.extent(0))>::type size_type;
760 Tpetra::Details::ProfilingRegion pr("Tpetra::Details::FixedHashTable::init(7-arg)");
761 const char prefix[] = "Tpetra::Details::FixedHashTable: ";
762
763 const offset_type numKeys = static_cast<offset_type>(keys.extent(0));
764 {
765#if KOKKOS_VERSION >= 40799
766 const offset_type theMaxVal = ::KokkosKernels::ArithTraits<offset_type>::max();
767#else
768 const offset_type theMaxVal = ::Kokkos::ArithTraits<offset_type>::max();
769#endif
770 const size_type maxValST = static_cast<size_type>(theMaxVal);
771 TEUCHOS_TEST_FOR_EXCEPTION(keys.extent(0) > maxValST, std::invalid_argument, prefix << "The "
772 "number of keys "
773 << keys.extent(0) << " does not fit in "
774 "offset_type = "
775 << TypeNameTraits<offset_type>::name() << ", whose "
776 "max value is "
777 << theMaxVal << ". This means that it is not possible to "
778 "use this constructor.");
779 }
780#if KOKKOS_VERSION >= 40799
781 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<unsigned long long>(numKeys) >
782 static_cast<unsigned long long>(::KokkosKernels::ArithTraits<ValueType>::max()),
783 std::invalid_argument,
784 "Tpetra::Details::FixedHashTable: The number of "
785 "keys "
786 << numKeys << " is greater than the maximum representable "
787 "ValueType value "
788 << ::KokkosKernels::ArithTraits<ValueType>::max() << ". "
789 "This means that it is not possible to use this constructor.");
790#else
791 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<unsigned long long>(numKeys) >
792 static_cast<unsigned long long>(::Kokkos::ArithTraits<ValueType>::max()),
793 std::invalid_argument,
794 "Tpetra::Details::FixedHashTable: The number of "
795 "keys "
796 << numKeys << " is greater than the maximum representable "
797 "ValueType value "
798 << ::Kokkos::ArithTraits<ValueType>::max() << ". "
799 "This means that it is not possible to use this constructor.");
800#endif
801 TEUCHOS_TEST_FOR_EXCEPTION(numKeys > static_cast<offset_type>(INT_MAX), std::logic_error, prefix << "This class currently only works when the number of keys is <= INT_MAX = " << INT_MAX << ". If this is a problem for you, please talk to the Tpetra "
802 "developers.");
803
804 const bool buildInParallel =
805 FHT::worthBuildingFixedHashTableInParallel<execution_space>();
806 const bool debug = ::Tpetra::Details::Behavior::debug();
807
808 // NOTE (mfh 14 May 2015) This method currently assumes UVM. We
809 // could change that by setting up ptr and val as Kokkos::DualView
810 // instances. If we do that, since we are filling on host for now,
811 // we want to make sure that we only zero-fill ptr on host
812 // initially, and that we don't fill val at all. Once we finish
813 // Kokkos-izing all the set-up kernels, we won't need DualView for
814 // either ptr or val.
815
817 // Find the first and last initial contiguous keys. If we find a
818 // long sequence of initial contiguous keys, we can save space by
819 // not storing them explicitly as pairs in the hash table.
820 //
821 // NOTE (mfh 01 Jun 2015) Doing this in parallel requires a scan
822 // ("min index such that the difference between the current key and
823 // the next != 1"), which takes multiple passes over the data. We
824 // could fuse it with CountBuckets (only update counts on 'final'
825 // pass). However, we're really just moving this sequential search
826 // out of Map's constructor here, so there is no loss in doing it
827 // sequentially for now. Later, we can work on parallelization.
828 if (numKeys > 0) {
829 // FIXME: make it a parallel kernel with no host copy
830 auto keys_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
831 keys);
832 firstContigKey_ = keys_h[0];
833 // Start with one plus, then decrement at the end. That lets us do
834 // only one addition per loop iteration, rather than two (if we test
835 // against lastContigKey + 1 and then increment lastContigKey).
836 lastContigKey_ = firstContigKey_ + 1;
837
838 // We will only store keys in the table that are not part of the
839 // initial contiguous sequence. It's possible for the initial
840 // contiguous sequence to be trivial, which for a nonzero number of
841 // keys means that the "sequence" has length 1.
842 for (offset_type k = 1; k < numKeys; ++k) {
843 if (lastContigKey_ != keys_h[k]) {
844 break;
845 }
846 ++lastContigKey_;
847 }
848 --lastContigKey_;
849 }
850 } else {
851 firstContigKey_ = firstContigKey;
852 lastContigKey_ = lastContigKey;
853 }
854
855 offset_type startIndex;
856 if (numKeys > 0) {
857 initMinKey = std::min(initMinKey, firstContigKey_);
858 initMaxKey = std::max(initMaxKey, lastContigKey_);
859 startIndex = static_cast<offset_type>(lastContigKey_ - firstContigKey_);
860 } else {
861 startIndex = 0;
862 }
863
864 const offset_type theNumKeys = numKeys - startIndex;
865 const offset_type size = hash_type::getRecommendedSize(theNumKeys);
866#ifdef HAVE_TPETRA_DEBUG
867 TEUCHOS_TEST_FOR_EXCEPTION(
868 size == 0 && numKeys != 0, std::logic_error,
869 "Tpetra::Details::FixedHashTable constructor: "
870 "getRecommendedSize("
871 << numKeys << ") returned zero, "
872 "even though the number of keys "
873 << numKeys << " is nonzero. "
874 "Please report this bug to the Tpetra developers.");
875#endif // HAVE_TPETRA_DEBUG
876 keys_type theKeys =
877 subview(keys, std::pair<offset_type, offset_type>(startIndex, numKeys));
878
879 // FIXME (mfh 28 Mar 2016) For some reason, we don't seem to need a
880 // fence here, but we do before other allocations.
881
882 // The array of counts must be separate from the array of offsets,
883 // in order for parallel_scan to work correctly.
884 typedef typename ptr_type::non_const_type counts_type;
885 counts_type counts("Tpetra::FixedHashTable::counts", size);
886
887 //
888 // Count the number of "buckets" per offsets array (ptr) entry.
889 //
890
891 // Will only create the mirror for buildInParallel false - but then use it in two places
892 typename keys_type::host_mirror_type theKeysHost;
893
894 // The Kokkos kernel uses atomic update instructions to count the
895 // number of "buckets" per offsets array (ptr) entry. Atomic
896 // updates incur overhead, even in the sequential case. The Kokkos
897 // kernel is still correct in that case, but I would rather not
898 // incur overhead then.
899 if (buildInParallel) {
900 FHT::CountBuckets<counts_type, keys_type> functor(counts, theKeys, size);
901 using range_type = Kokkos::RangePolicy<execution_space, offset_type>;
902 const char kernelLabel[] = "Tpetra::Details::FixedHashTable CountBuckets";
903 if (debug) {
904 using key_type = typename keys_type::non_const_value_type;
905 Kokkos::pair<int, key_type> err;
906 Kokkos::parallel_reduce(kernelLabel, range_type(0, theNumKeys),
907 functor, err);
908 TEUCHOS_TEST_FOR_EXCEPTION(err.first != 0, std::logic_error,
909 "Tpetra::Details::FixedHashTable "
910 "constructor: CountBuckets found a key "
911 << err.second << " that "
912 "results in an out-of-bounds hash value.");
913 } else {
914 Kokkos::parallel_for(kernelLabel, range_type(0, theNumKeys), functor);
915 }
916 } else {
917 Kokkos::HostSpace hostMemSpace;
918 theKeysHost = Kokkos::create_mirror_view(theKeys);
919 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
920 Kokkos::deep_copy(execution_space(), theKeysHost, theKeys);
921 auto countsHost = Kokkos::create_mirror_view(hostMemSpace, counts);
922
923 for (offset_type k = 0; k < theNumKeys; ++k) {
924 using key_type = typename keys_type::non_const_value_type;
925 const key_type key = theKeysHost[k];
926
927 using hash_value_type = typename hash_type::result_type;
928 const hash_value_type hashVal = hash_type::hashFunc(key, size);
929 TEUCHOS_TEST_FOR_EXCEPTION(hashVal < hash_value_type(0) ||
930 hashVal >= hash_value_type(countsHost.extent(0)),
931 std::logic_error,
932 "Tpetra::Details::FixedHashTable "
933 "constructor: Sequential CountBuckets found a key "
934 << key
935 << " that results in an out-of-bounds hash value.");
936
937 ++countsHost[hashVal];
938 }
939 // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
940 Kokkos::deep_copy(execution_space(), counts, countsHost);
941 }
942
943 // KJ: This fence is not required for the 2-argument deep_copy which calls
944 // fence, but will be required if switched to the 3-argumemt deep_copy which
945 // passes a space. The 3-argument form does not fence.
946 execution_space().fence();
947
948 // Kokkos::View fills with zeros by default.
949 typename ptr_type::non_const_type ptr("Tpetra::FixedHashTable::ptr", size + 1);
950
951 // Compute row offsets via prefix sum:
952 //
953 // ptr[i+1] = \sum_{j=0}^{i} counts[j].
954 //
955 // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
956 // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
957 // formula is ptr[i+1] += ptr[i].
958 //
959 // parallel_scan does not incur overhead with Kokkos::Serial, but
960 // with actual parallel execution spaces, it does require multiple
961 // passes over the data. Thus, it still makes sense to have a
962 // sequential fall-back.
963
964 using ::Tpetra::Details::computeOffsetsFromCounts;
965 if (buildInParallel) {
966 computeOffsetsFromCounts(ptr, counts);
967 }
968
969 if (!buildInParallel || debug) {
970 Kokkos::HostSpace hostMemSpace;
971 auto counts_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, counts);
972 auto ptr_h = Kokkos::create_mirror_view(hostMemSpace, ptr);
973
974#ifdef KOKKOS_ENABLE_SERIAL
975 Kokkos::Serial hostExecSpace;
976#else
977 Kokkos::DefaultHostExecutionSpace hostExecSpace;
978#endif // KOKKOS_ENABLE_SERIAL
979
980 computeOffsetsFromCounts(hostExecSpace, ptr_h, counts_h);
981 // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
982 Kokkos::deep_copy(execution_space(), ptr, ptr_h);
983
984 if (debug) {
985 bool bad = false;
986 for (offset_type i = 0; i < size; ++i) {
987 if (ptr_h[i + 1] != ptr_h[i] + counts_h[i]) {
988 bad = true;
989 }
990 }
991 TEUCHOS_TEST_FOR_EXCEPTION(bad, std::logic_error,
992 "Tpetra::Details::FixedHashTable "
993 "constructor: computeOffsetsFromCounts gave an incorrect "
994 "result.");
995 }
996 }
997
998 // KJ: computeOffsetsFromCounts calls parallel_scan which does not fence.
999 // This fence is necessary as we need to make sure that the offset view
1000 // completes before the view is used in the next functor.
1001 execution_space().fence();
1002
1003 // Allocate the array of (key,value) pairs. Don't fill it with
1004 // zeros, because we will fill it with actual data below.
1005 typedef typename val_type::non_const_type nonconst_val_type;
1006 nonconst_val_type val(ViewAllocateWithoutInitializing("Tpetra::FixedHashTable::pairs"),
1007 theNumKeys);
1008
1009 // Fill in the hash table's "values" (the (key,value) pairs).
1010 typedef FHT::FillPairs<typename val_type::non_const_type, keys_type,
1011 typename ptr_type::non_const_type>
1012 functor_type;
1013 typename functor_type::value_type result(initMinKey, initMaxKey);
1014
1015 const ValueType newStartingValue = startingValue + static_cast<ValueType>(startIndex);
1016 if (buildInParallel) {
1017 functor_type functor(val, counts, ptr, theKeys, newStartingValue,
1018 initMinKey, initMaxKey);
1019 typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1020 Kokkos::parallel_reduce("Tpetra::Details::FixedHashTable::FillPairs", range_type(0, theNumKeys), functor, result);
1021 } else {
1022 Kokkos::HostSpace hostMemSpace;
1023 auto counts_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, counts);
1024 auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
1025 auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
1026 for (offset_type k = 0; k < theNumKeys; ++k) {
1027 typedef typename hash_type::result_type hash_value_type;
1028 const KeyType key = theKeysHost[k];
1029 if (key > result.maxKey_) {
1030 result.maxKey_ = key;
1031 }
1032 if (key < result.minKey_) {
1033 result.minKey_ = key;
1034 }
1035 const ValueType theVal = newStartingValue + static_cast<ValueType>(k);
1036 const hash_value_type hashVal = hash_type::hashFunc(key, size);
1037
1038 // Return the old count; decrement afterwards.
1039 const offset_type count = counts_h[hashVal];
1040 --counts_h[hashVal];
1041 if (count == 0) {
1042 result.success_ = false; // FAILURE!
1043 break;
1044 } else {
1045 const offset_type curPos = ptr_h[hashVal + 1] - count;
1046 val_h[curPos].first = key;
1047 val_h[curPos].second = theVal;
1048 }
1049 }
1050 Kokkos::deep_copy(counts, counts_h); // restore
1051 Kokkos::deep_copy(val, val_h); // restore
1052 }
1053
1054 // FIXME (mfh 01 Jun 2015) Temporarily commented out because of
1055 // reports of exceptions being thrown in Albany.
1056 //
1057 // TEUCHOS_TEST_FOR_EXCEPTION
1058 // (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1059 // "init: Filling the hash table failed! Please report this bug to the "
1060 // "Tpetra developers.");
1061 (void)result; // prevent build warning (set but unused variable)
1062
1063 // "Commit" the computed arrays and other computed quantities.
1064 ptr_ = ptr;
1065 val_ = val;
1066 minKey_ = result.minKey_;
1067 maxKey_ = result.maxKey_;
1068 // We've already set firstContigKey_ and lastContigKey_ above.
1069}
1070
1071template <class KeyType, class ValueType, class DeviceType>
1072void FixedHashTable<KeyType, ValueType, DeviceType>::
1073 init(const host_input_keys_type& keys,
1074 const host_input_vals_type& vals,
1075 KeyType initMinKey,
1076 KeyType initMaxKey) {
1077 Tpetra::Details::ProfilingRegion pr("Tpetra::Details::FixedHashTable::init(4-arg)");
1078 const offset_type numKeys = static_cast<offset_type>(keys.extent(0));
1079#if KOKKOS_VERSION >= 40799
1080 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<unsigned long long>(numKeys) > static_cast<unsigned long long>(::KokkosKernels::ArithTraits<ValueType>::max()),
1081 std::invalid_argument,
1082 "Tpetra::Details::FixedHashTable: The number of "
1083 "keys "
1084 << numKeys << " is greater than the maximum representable "
1085 "ValueType value "
1086 << ::KokkosKernels::ArithTraits<ValueType>::max() << ".");
1087#else
1088 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<unsigned long long>(numKeys) > static_cast<unsigned long long>(::Kokkos::ArithTraits<ValueType>::max()),
1089 std::invalid_argument,
1090 "Tpetra::Details::FixedHashTable: The number of "
1091 "keys "
1092 << numKeys << " is greater than the maximum representable "
1093 "ValueType value "
1094 << ::Kokkos::ArithTraits<ValueType>::max() << ".");
1095#endif
1096 TEUCHOS_TEST_FOR_EXCEPTION(numKeys > static_cast<offset_type>(INT_MAX), std::logic_error,
1097 "Tpetra::"
1098 "Details::FixedHashTable: This class currently only works when the number "
1099 "of keys is <= INT_MAX = "
1100 << INT_MAX << ". If this is a problem for you"
1101 ", please talk to the Tpetra developers.");
1102
1103 // There's no need to find the first and last initial contiguous
1104 // keys in this case, because if we reach this init() function, then
1105 // hasContiguousValues() is false and so get() doesn't use the
1106 // initial contiguous sequence of keys.
1107
1108 const offset_type size = hash_type::getRecommendedSize(numKeys);
1109#ifdef HAVE_TPETRA_DEBUG
1110 TEUCHOS_TEST_FOR_EXCEPTION(
1111 size == 0 && numKeys != 0, std::logic_error,
1112 "Tpetra::Details::FixedHashTable constructor: "
1113 "getRecommendedSize("
1114 << numKeys << ") returned zero, "
1115 "even though the number of keys "
1116 << numKeys << " is nonzero. "
1117 "Please report this bug to the Tpetra developers.");
1118#endif // HAVE_TPETRA_DEBUG
1119
1120 // FIXME: Investigate a couple options:
1121 // 1. Allocate ptr_h, val_h directly on host and only deep_copy to ptr_ and val_ once at the end
1122 // 2. Do all this work as a parallel kernel with the same execution/memory spaces as ptr_ and val_
1123 // An old comment from MFH noted ptr_h should be zero-initialized, while val_h should not be initialized.
1124 // It further noted that we shouldn't need a DualView type arrangement when all setup kernels have
1125 // been "Kokkos-ized".
1126 Kokkos::HostSpace hostMemSpace;
1127 typename ptr_type::non_const_type ptr("Tpetra::FixedHashTable::ptr", size + 1);
1128 auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
1129
1130 // Allocate the array of key,value pairs. Don't waste time filling
1131 // it with zeros, because we will fill it with actual data below.
1132 using Kokkos::ViewAllocateWithoutInitializing;
1133 typedef typename val_type::non_const_type nonconst_val_type;
1134 nonconst_val_type val(ViewAllocateWithoutInitializing("Tpetra::FixedHashTable::pairs"),
1135 numKeys);
1136 auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
1137
1138 // Compute number of entries in each hash table position.
1139 for (offset_type k = 0; k < numKeys; ++k) {
1140 const typename hash_type::result_type hashVal =
1141 hash_type::hashFunc(keys[k], size);
1142 // Shift over one, so that counts[j] = ptr[j+1]. See below.
1143 ++ptr_h[hashVal + 1];
1144 }
1145
1146 // Compute row offsets via prefix sum:
1147 //
1148 // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1149 //
1150 // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1151 // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1152 // formula is ptr[i+1] += ptr[i].
1153 for (offset_type i = 0; i < size; ++i) {
1154 ptr_h[i + 1] += ptr_h[i];
1155 }
1156 // ptr[0] = 0; // We've already done this when initializing ptr above.
1157
1158 // curRowStart[i] is the offset of the next element in row i.
1159 typename ptr_type::non_const_type::host_mirror_type curRowStart("Tpetra::FixedHashTable::curRowStart", size);
1160
1161 // Fill in the hash table.
1162 FHT::FillPairsResult<KeyType> result(initMinKey, initMaxKey);
1163 for (offset_type k = 0; k < numKeys; ++k) {
1164 typedef typename hash_type::result_type hash_value_type;
1165 const KeyType key = keys[k];
1166 if (key > result.maxKey_) {
1167 result.maxKey_ = key;
1168 }
1169 if (key < result.minKey_) {
1170 result.minKey_ = key;
1171 }
1172 const ValueType theVal = vals[k];
1173 if (theVal > maxVal_) {
1174 maxVal_ = theVal;
1175 }
1176 if (theVal < minVal_) {
1177 minVal_ = theVal;
1178 }
1179 const hash_value_type hashVal = hash_type::hashFunc(key, size);
1180
1181 const offset_type offset = curRowStart[hashVal];
1182 const offset_type curPos = ptr_h[hashVal] + offset;
1183 if (curPos >= ptr_h[hashVal + 1]) {
1184 result.success_ = false; // FAILURE!
1185 } else {
1186 val_h[curPos].first = key;
1187 val_h[curPos].second = theVal;
1188 ++curRowStart[hashVal];
1189 }
1190 }
1191
1192 TEUCHOS_TEST_FOR_EXCEPTION(!result.success_, std::logic_error,
1193 "Tpetra::Details::FixedHashTable::"
1194 "init: Filling the hash table failed! Please report this bug to the "
1195 "Tpetra developers.");
1196
1197 // "Commit" the computed arrays and other computed quantities.
1198 Kokkos::deep_copy(ptr, ptr_h);
1199 Kokkos::deep_copy(val, val_h);
1200
1201 ptr_ = ptr;
1202 val_ = val;
1203 minKey_ = result.minKey_;
1204 maxKey_ = result.maxKey_;
1205 // We've already assigned to minVal_ and maxVal_ above.
1206}
1207
1208template <class KeyType, class ValueType, class DeviceType>
1211 if (!checkedForDuplicateKeys_) {
1212 hasDuplicateKeys_ = checkForDuplicateKeys();
1213 checkedForDuplicateKeys_ = true;
1214 }
1215 return hasDuplicateKeys_;
1216}
1217
1218template <class KeyType, class ValueType, class DeviceType>
1220 checkForDuplicateKeys() const {
1221 const offset_type size = this->getSize();
1222 // It's allowed for the hash table to have a positive number of
1223 // buckets (getSize()), but still store no entries (numPairs()).
1224 // Both cases trivially entail no duplicates.
1225 if (size == 0 || this->numPairs() == 0) {
1226 return false;
1227 } else {
1228 typedef FHT::CheckForDuplicateKeys<ptr_type, val_type> functor_type;
1229 functor_type functor(val_, ptr_);
1230 int hasDupKeys = 0;
1231 typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1232 Kokkos::parallel_reduce("Tpetra::Details::FixedHashTable::CheckForDuplicateKeys", range_type(0, size), functor, hasDupKeys);
1233 return hasDupKeys > 0;
1234 }
1235}
1236
1237template <class KeyType, class ValueType, class DeviceType>
1238std::string
1240 description() const {
1241 std::ostringstream oss;
1242 oss << "FixedHashTable<"
1243 << Teuchos::TypeNameTraits<KeyType>::name() << ","
1244 << Teuchos::TypeNameTraits<ValueType>::name() << ">: "
1245 << "{ numKeys: " << val_.extent(0)
1246 << ", tableSize: " << this->getSize() << " }";
1247 return oss.str();
1248}
1249
1250template <class KeyType, class ValueType, class DeviceType>
1252 describe(Teuchos::FancyOStream& out,
1253 const Teuchos::EVerbosityLevel verbLevel) const {
1254 using std::endl;
1255 using std::setw;
1256 using Teuchos::OSTab;
1257 using Teuchos::rcpFromRef;
1258 using Teuchos::TypeNameTraits;
1259 using Teuchos::VERB_DEFAULT;
1260 using Teuchos::VERB_EXTREME;
1261 using Teuchos::VERB_LOW;
1262 using Teuchos::VERB_NONE;
1263
1264 // NOTE (mfh 14 May 2015) This method currently assumes UVM for
1265 // access to ptr_ and val_ from the host.
1266
1267 Teuchos::EVerbosityLevel vl = verbLevel;
1268 if (vl == VERB_DEFAULT) vl = VERB_LOW;
1269
1270 if (vl == VERB_NONE) {
1271 // do nothing
1272 } else if (vl == VERB_LOW) {
1273 out << this->description() << endl;
1274 } else { // MEDIUM, HIGH or EXTREME
1275 out << "FixedHashTable:" << endl;
1276 {
1278
1279 // const std::string label = this->getObjectLabel ();
1280 // if (label != "") {
1281 // out << "label: " << label << endl;
1282 // }
1283 out << "Template parameters:" << endl;
1284 {
1286 out << "KeyType: " << TypeNameTraits<KeyType>::name() << endl
1287 << "ValueType: " << TypeNameTraits<ValueType>::name() << endl;
1288 }
1289
1290 const offset_type tableSize = this->getSize();
1291 const offset_type numKeys = val_.extent(0);
1292
1293 out << "Table parameters:" << endl;
1294 {
1296 out << "numKeys: " << numKeys << endl
1297 << "tableSize: " << tableSize << endl;
1298 }
1299
1300 if (vl >= VERB_EXTREME) {
1301 out << "Contents: ";
1302 if (tableSize == 0 || numKeys == 0) {
1303 out << "[]" << endl;
1304 } else {
1305 out << "[ " << endl;
1306 {
1308 for (offset_type i = 0; i < tableSize; ++i) {
1310 out << "[";
1311 for (offset_type k = ptr_[i]; k < ptr_[i + 1]; ++k) {
1312 out << "(" << val_[k].first << "," << val_[k].second << ")";
1313 if (k + 1 < ptr_[i + 1]) {
1314 out << ", ";
1315 }
1316 }
1317 out << "]" << endl;
1318 } // for each table position i
1319 }
1320 out << "]" << endl;
1321 } // The table contains entries
1322 } // vl >= VERB_EXTREME
1323 }
1324 out << endl;
1325 } // if vl > VERB_LOW
1326}
1327
1328} // namespace Details
1329} // namespace Tpetra
1330
1331// Macro that explicitly instantiates FixedHashTable for the given
1332// local ordinal (LO), global ordinal (GO), and Kokkos device (NODE::device_type)
1333// types. Note that FixedHashTable's first two template parameters
1334// occur in the opposite order of most Tpetra classes. This is
1335// because FixedHashTable performs global-to-local lookup, and the
1336// convention in templated C++ lookup tables (such as std::map) is
1337// <KeyType, ValueType>.
1338//
1339// This macro must be explanded within the Tpetra::Details namespace.
1340// Moreover, we need (GO,LO,Host) and (LO,LO,Host)
1341#if defined(HAVE_TPETRA_INST_SERIAL)
1342#if defined(HAVE_TPETRA_INST_INT_INT)
1343#define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1344 template class Details::FixedHashTable<GO, LO, typename NODE::device_type>;
1345#else
1346#define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1347 template class Details::FixedHashTable<GO, LO, typename NODE::device_type>; \
1348 template class Details::FixedHashTable<LO, LO, typename NODE::device_type>;
1349#endif
1350#elif defined(HAVE_TPETRA_INST_OPENMP)
1351#if defined(HAVE_TPETRA_INST_INT_INT)
1352#define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1353 template class Details::FixedHashTable<GO, LO, typename NODE::device_type>; \
1354 template class Details::FixedHashTable<GO, LO, Kokkos::HostSpace::device_type>;
1355#else
1356#define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1357 template class Details::FixedHashTable<GO, LO, typename NODE::device_type>; \
1358 template class Details::FixedHashTable<LO, LO, typename NODE::device_type>; \
1359 template class Details::FixedHashTable<GO, LO, Kokkos::Device<Kokkos::Serial, Kokkos::HostSpace>>; \
1360 template class Details::FixedHashTable<LO, LO, Kokkos::Device<Kokkos::Serial, Kokkos::HostSpace>>;
1361#endif
1362#else
1363#define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1364 template class Details::FixedHashTable<GO, LO, typename NODE::device_type>; \
1365 template class Details::FixedHashTable<GO, LO, Kokkos::HostSpace::device_type>; \
1366 template class Details::FixedHashTable<LO, LO, Kokkos::HostSpace::device_type>;
1367#endif
1368#endif
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Struct that holds views of the contents of a CrsMatrix.
static bool debug()
Whether Tpetra is in debug mode.
Functor for checking whether a FixedHashTable has one or more duplicate entries.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body.
KOKKOS_INLINE_FUNCTION void join(value_type &dst, const value_type &src) const
Combine two intermediate reduction results.
CheckForDuplicateKeys(const pairs_view_type &pairs, const offsets_view_type &ptr)
Constructor.
Parallel for functor for counting "buckets" in the FixedHashTable.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i) const
Do this for every entry of keys_.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Debug reduce version of above operator().
CountBuckets(const counts_view_type &counts, const keys_view_type &keys, const size_type size)
Constructor.
Parallel reduce functor for filling the FixedHashTable, and computing the min and max keys.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue, const key_type initMinKey, const key_type initMaxKey)
Constructor that takes initial min and max key values.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body; do this for every entry of keys_.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue)
Constructor.
std::string description() const
Implementation of Teuchos::Describable interface.
bool hasDuplicateKeys()
Whether the table has any duplicate keys.
Kokkos::View< const KeyType *, Kokkos::LayoutLeft, device_type > keys_type
Type of a 1-D Kokkos::View (array) used to store keys.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with the given verbosity to the output stream.
Implementation details of Tpetra.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Reduction result for FillPairs functor below.
bool success_
Whether fill succeeded (it can only fail on a bug)
The hash function for FixedHashTable.
ResultType result_type
Type of the return value of the hash function.
static KOKKOS_INLINE_FUNCTION result_type hashFunc(const argument_type &, const offset_type &)
The hash function.