Tpetra parallel linear algebra
Version of the Day
Loading...
Searching...
No Matches
core
src
Tpetra_Details_radixSort.hpp
1
// @HEADER
2
// *****************************************************************************
3
// Tpetra: Templated Linear Algebra Services Package
4
//
5
// Copyright 2008 NTESS and the Tpetra contributors.
6
// SPDX-License-Identifier: BSD-3-Clause
7
// *****************************************************************************
8
// @HEADER
9
10
#ifndef TPETRA_DETAILS_RADIXSORT_HPP
11
#define TPETRA_DETAILS_RADIXSORT_HPP
12
13
#include "TpetraCore_config.h"
14
#include "Kokkos_Macros.hpp"
15
16
namespace
Tpetra
{
17
namespace
Details
{
18
30
template
<
typename
KeyType,
typename
ValueType,
typename
IndexType>
31
KOKKOS_INLINE_FUNCTION
void
32
radixSortKeysAndValues
(
KeyType
*
keys
,
KeyType
*
keysAux
,
ValueType
*
values
,
ValueType
*
valuesAux
,
IndexType
n
,
IndexType
upperBound
) {
33
if
(
n
<= 1)
34
return
;
35
KeyType
mask
= 0xF;
36
bool
inAux
=
false
;
37
// maskPos counts the low bit index of mask (0, 4, 8, ...)
38
IndexType
maskPos
= 0;
39
// Count number of bits required to sort (8 * sizeof(KeyType) - lzcnt(maxKey - minKey))
40
IndexType
sortBits
= 0;
41
while
(
upperBound
) {
42
upperBound
>>= 1;
43
sortBits
++;
44
}
45
for
(
IndexType
s
= 0;
s
< (
sortBits
+ 3) / 4;
s
++) {
46
// Count the number of elements in each bucket
47
IndexType
count[16] = {0};
48
IndexType
offset
[17] = {0};
49
if
(!
inAux
) {
50
for
(
IndexType
i
= 0;
i
<
n
;
i
++) {
51
count[(
keys
[
i
] &
mask
) >>
maskPos
]++;
52
}
53
}
else
{
54
for
(
IndexType
i
= 0;
i
<
n
;
i
++) {
55
count[(
keysAux
[
i
] &
mask
) >>
maskPos
]++;
56
}
57
}
58
// get offset as the prefix sum for count
59
for
(
IndexType
i
= 0;
i
< 16;
i
++) {
60
offset
[
i
+ 1] =
offset
[
i
] + count[
i
];
61
}
62
// now for each element in [lo, hi), move it to its offset in the other buffer
63
// this branch should be ok because whichBuf is the same on all threads
64
if
(!
inAux
) {
65
// copy from *Over to *Aux
66
for
(
IndexType
i
= 0;
i
<
n
;
i
++) {
67
IndexType
bucket
= (
keys
[
i
] &
mask
) >>
maskPos
;
68
keysAux
[
offset
[
bucket
+ 1] - count[
bucket
]] =
keys
[
i
];
69
valuesAux
[
offset
[
bucket
+ 1] - count[
bucket
]] =
values
[
i
];
70
count[
bucket
]--;
71
}
72
}
else
{
73
// copy from *Aux to *Over
74
for
(
IndexType
i
= 0;
i
<
n
;
i
++) {
75
IndexType
bucket
= (
keysAux
[
i
] &
mask
) >>
maskPos
;
76
keys
[
offset
[
bucket
+ 1] - count[
bucket
]] =
keysAux
[
i
];
77
values
[
offset
[
bucket
+ 1] - count[
bucket
]] =
valuesAux
[
i
];
78
count[
bucket
]--;
79
}
80
}
81
inAux
= !
inAux
;
82
mask
=
mask
<< 4;
83
maskPos
+= 4;
84
}
85
if
(
inAux
) {
86
// need to deep copy from aux arrays to main
87
for
(
IndexType
i
= 0;
i
<
n
;
i
++) {
88
keys
[
i
] =
keysAux
[
i
];
89
values
[
i
] =
valuesAux
[
i
];
90
}
91
}
92
}
93
94
}
// namespace Details
95
}
// namespace Tpetra
96
97
#endif
// include guard
Tpetra::CrsMatrixStruct
Struct that holds views of the contents of a CrsMatrix.
Definition
TpetraExt_MMHelpers_decl.hpp:36
Details
Implementation details of Tpetra.
Tpetra::Details::radixSortKeysAndValues
KOKKOS_INLINE_FUNCTION void radixSortKeysAndValues(KeyType *keys, KeyType *keysAux, ValueType *values, ValueType *valuesAux, IndexType n, IndexType upperBound)
Radix sort the input array keys, and permute values identically to the keys.
Definition
Tpetra_Details_radixSort.hpp:32
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Generated on Thu Oct 9 2025 21:01:01 for Tpetra parallel linear algebra by
1.9.8