Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_radixSort.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_RADIXSORT_HPP
11#define TPETRA_DETAILS_RADIXSORT_HPP
12
13#include "TpetraCore_config.h"
14#include "Kokkos_Macros.hpp"
15
16namespace Tpetra {
17namespace Details {
18
30template <typename KeyType, typename ValueType, typename IndexType>
31KOKKOS_INLINE_FUNCTION void
33 if (n <= 1)
34 return;
35 KeyType mask = 0xF;
36 bool inAux = false;
37 // maskPos counts the low bit index of mask (0, 4, 8, ...)
39 // Count number of bits required to sort (8 * sizeof(KeyType) - lzcnt(maxKey - minKey))
41 while (upperBound) {
42 upperBound >>= 1;
43 sortBits++;
44 }
45 for (IndexType s = 0; s < (sortBits + 3) / 4; s++) {
46 // Count the number of elements in each bucket
47 IndexType count[16] = {0};
48 IndexType offset[17] = {0};
49 if (!inAux) {
50 for (IndexType i = 0; i < n; i++) {
51 count[(keys[i] & mask) >> maskPos]++;
52 }
53 } else {
54 for (IndexType i = 0; i < n; i++) {
55 count[(keysAux[i] & mask) >> maskPos]++;
56 }
57 }
58 // get offset as the prefix sum for count
59 for (IndexType i = 0; i < 16; i++) {
60 offset[i + 1] = offset[i] + count[i];
61 }
62 // now for each element in [lo, hi), move it to its offset in the other buffer
63 // this branch should be ok because whichBuf is the same on all threads
64 if (!inAux) {
65 // copy from *Over to *Aux
66 for (IndexType i = 0; i < n; i++) {
68 keysAux[offset[bucket + 1] - count[bucket]] = keys[i];
69 valuesAux[offset[bucket + 1] - count[bucket]] = values[i];
70 count[bucket]--;
71 }
72 } else {
73 // copy from *Aux to *Over
74 for (IndexType i = 0; i < n; i++) {
76 keys[offset[bucket + 1] - count[bucket]] = keysAux[i];
77 values[offset[bucket + 1] - count[bucket]] = valuesAux[i];
78 count[bucket]--;
79 }
80 }
81 inAux = !inAux;
82 mask = mask << 4;
83 maskPos += 4;
84 }
85 if (inAux) {
86 // need to deep copy from aux arrays to main
87 for (IndexType i = 0; i < n; i++) {
88 keys[i] = keysAux[i];
89 values[i] = valuesAux[i];
90 }
91 }
92}
93
94} // namespace Details
95} // namespace Tpetra
96
97#endif // include guard
Struct that holds views of the contents of a CrsMatrix.
Implementation details of Tpetra.
KOKKOS_INLINE_FUNCTION void radixSortKeysAndValues(KeyType *keys, KeyType *keysAux, ValueType *values, ValueType *valuesAux, IndexType n, IndexType upperBound)
Radix sort the input array keys, and permute values identically to the keys.
Namespace Tpetra contains the class and methods constituting the Tpetra library.