Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_radixSort.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_RADIXSORT_HPP
11#define TPETRA_DETAILS_RADIXSORT_HPP
12
13#include "TpetraCore_config.h"
14#include "Kokkos_Macros.hpp"
15
16namespace Tpetra {
17namespace Details {
18
31template <typename KeyType, typename ValueType, typename IndexType>
32KOKKOS_INLINE_FUNCTION void
34 if (n <= 1)
35 return;
36 KeyType mask = 0xF;
37 bool inAux = false;
38 // maskPos counts the low bit index of mask (0, 4, 8, ...)
40 // Count number of bits required to sort (8 * sizeof(KeyType) - lzcnt(maxKey - minKey))
42 while (upperBound) {
43 upperBound >>= 1;
44 sortBits++;
45 }
46 for (IndexType s = 0; s < (sortBits + 3) / 4; s++) {
47 // Count the number of elements in each bucket
48 IndexType count[16] = {0};
49 IndexType offset[17] = {0};
50 if (!inAux) {
51 for (IndexType i = 0; i < n; i++) {
52 count[(keys[i] & mask) >> maskPos]++;
53 }
54 } else {
55 for (IndexType i = 0; i < n; i++) {
56 count[(keysAux[i] & mask) >> maskPos]++;
57 }
58 }
59 // get offset as the prefix sum for count
60 for (IndexType i = 0; i < 16; i++) {
61 offset[i + 1] = offset[i] + count[i];
62 }
63 // now for each element in [lo, hi), move it to its offset in the other buffer
64 // this branch should be ok because whichBuf is the same on all threads
65 if (!inAux) {
66 // copy from *Over to *Aux
67 for (IndexType i = 0; i < n; i++) {
68 IndexType bucket = (keys[i] & mask) >> maskPos;
69 keysAux[offset[bucket + 1] - count[bucket]] = keys[i];
70 valuesAux[offset[bucket + 1] - count[bucket]] = values[i];
71 count[bucket]--;
72 }
73 } else {
74 // copy from *Aux to *Over
75 for (IndexType i = 0; i < n; i++) {
77 keys[offset[bucket + 1] - count[bucket]] = keysAux[i];
78 values[offset[bucket + 1] - count[bucket]] = valuesAux[i];
79 count[bucket]--;
80 }
81 }
82 inAux = !inAux;
83 mask = mask << 4;
84 maskPos += 4;
85 }
86 if (inAux) {
87 // need to deep copy from aux arrays to main
88 for (IndexType i = 0; i < n; i++) {
89 keys[i] = keysAux[i];
90 values[i] = valuesAux[i];
91 }
92 }
93}
94
95} // namespace Details
96} // namespace Tpetra
97
98#endif // include guard
Struct that holds views of the contents of a CrsMatrix.
Implementation details of Tpetra.
KOKKOS_INLINE_FUNCTION void radixSortKeysAndValues(KeyType *keys, KeyType *keysAux, ValueType *values, ValueType *valuesAux, IndexType n, IndexType upperBound)
Radix sort the input array keys, and permute values identically to the keys.
Namespace Tpetra contains the class and methods constituting the Tpetra library.