Tpetra parallel linear algebra
Version of the Day
Loading...
Searching...
No Matches
core
src
Tpetra_Details_radixSort.hpp
1
// @HEADER
2
// *****************************************************************************
3
// Tpetra: Templated Linear Algebra Services Package
4
//
5
// Copyright 2008 NTESS and the Tpetra contributors.
6
// SPDX-License-Identifier: BSD-3-Clause
7
// *****************************************************************************
8
// @HEADER
9
10
#ifndef TPETRA_DETAILS_RADIXSORT_HPP
11
#define TPETRA_DETAILS_RADIXSORT_HPP
12
13
#include "TpetraCore_config.h"
14
#include "Kokkos_Macros.hpp"
15
16
namespace
Tpetra
{
17
namespace
Details
{
18
31
template
<
typename
KeyType,
typename
ValueType,
typename
IndexType>
32
KOKKOS_INLINE_FUNCTION
void
33
radixSortKeysAndValues
(
KeyType
* keys,
KeyType
*
keysAux
,
ValueType
*
values
,
ValueType
*
valuesAux
,
IndexType
n
,
IndexType
upperBound
) {
34
if
(
n
<= 1)
35
return
;
36
KeyType
mask
= 0xF;
37
bool
inAux
=
false
;
38
// maskPos counts the low bit index of mask (0, 4, 8, ...)
39
IndexType
maskPos
= 0;
40
// Count number of bits required to sort (8 * sizeof(KeyType) - lzcnt(maxKey - minKey))
41
IndexType
sortBits
= 0;
42
while
(
upperBound
) {
43
upperBound
>>= 1;
44
sortBits
++;
45
}
46
for
(
IndexType
s
= 0;
s
< (
sortBits
+ 3) / 4;
s
++) {
47
// Count the number of elements in each bucket
48
IndexType
count[16] = {0};
49
IndexType
offset
[17] = {0};
50
if
(!
inAux
) {
51
for
(
IndexType
i
= 0;
i
<
n
;
i
++) {
52
count[(keys[
i
] &
mask
) >>
maskPos
]++;
53
}
54
}
else
{
55
for
(
IndexType
i
= 0;
i
<
n
;
i
++) {
56
count[(
keysAux
[
i
] &
mask
) >>
maskPos
]++;
57
}
58
}
59
// get offset as the prefix sum for count
60
for
(
IndexType
i
= 0;
i
< 16;
i
++) {
61
offset
[
i
+ 1] =
offset
[
i
] + count[
i
];
62
}
63
// now for each element in [lo, hi), move it to its offset in the other buffer
64
// this branch should be ok because whichBuf is the same on all threads
65
if
(!
inAux
) {
66
// copy from *Over to *Aux
67
for
(
IndexType
i
= 0;
i
<
n
;
i
++) {
68
IndexType
bucket
= (keys[
i
] &
mask
) >>
maskPos
;
69
keysAux
[
offset
[
bucket
+ 1] - count[
bucket
]] = keys[
i
];
70
valuesAux
[
offset
[
bucket
+ 1] - count[
bucket
]] =
values
[
i
];
71
count[
bucket
]--;
72
}
73
}
else
{
74
// copy from *Aux to *Over
75
for
(
IndexType
i
= 0;
i
<
n
;
i
++) {
76
IndexType
bucket
= (
keysAux
[
i
] &
mask
) >>
maskPos
;
77
keys[
offset
[
bucket
+ 1] - count[
bucket
]] =
keysAux
[
i
];
78
values
[
offset
[
bucket
+ 1] - count[
bucket
]] =
valuesAux
[
i
];
79
count[
bucket
]--;
80
}
81
}
82
inAux
= !
inAux
;
83
mask
=
mask
<< 4;
84
maskPos
+= 4;
85
}
86
if
(
inAux
) {
87
// need to deep copy from aux arrays to main
88
for
(
IndexType
i
= 0;
i
<
n
;
i
++) {
89
keys[
i
] =
keysAux
[
i
];
90
values
[
i
] =
valuesAux
[
i
];
91
}
92
}
93
}
94
95
}
// namespace Details
96
}
// namespace Tpetra
97
98
#endif
// include guard
Tpetra::CrsMatrixStruct
Struct that holds views of the contents of a CrsMatrix.
Definition
TpetraExt_MMHelpers_decl.hpp:36
Details
Implementation details of Tpetra.
Tpetra::Details::radixSortKeysAndValues
KOKKOS_INLINE_FUNCTION void radixSortKeysAndValues(KeyType *keys, KeyType *keysAux, ValueType *values, ValueType *valuesAux, IndexType n, IndexType upperBound)
Radix sort the input array keys, and permute values identically to the keys.
Definition
Tpetra_Details_radixSort.hpp:33
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Generated by
1.9.8