Kokkos Core Kernels Package Version of the Day
Loading...
Searching...
No Matches
Kokkos_BitManipulation.hpp
1//@HEADER
2// ************************************************************************
3//
4// Kokkos v. 4.0
5// Copyright (2022) National Technology & Engineering
6// Solutions of Sandia, LLC (NTESS).
7//
8// Under the terms of Contract DE-NA0003525 with NTESS,
9// the U.S. Government retains certain rights in this software.
10//
11// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12// See https://kokkos.org/LICENSE for license information.
13// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14//
15//@HEADER
16
17#ifndef KOKKOS_BIT_MANIPULATION_HPP
18#define KOKKOS_BIT_MANIPULATION_HPP
19
20#include <Kokkos_Macros.hpp>
21#include <Kokkos_NumericTraits.hpp>
22#include <climits> // CHAR_BIT
23#include <cstring> //memcpy
24#include <type_traits>
25
26namespace Kokkos::Impl {
27
28template <class T>
29KOKKOS_FUNCTION constexpr T byteswap_fallback(T x) {
30 if constexpr (sizeof(T) > 1) {
31 using U = std::make_unsigned_t<T>;
32
33 size_t shift = CHAR_BIT * (sizeof(T) - 1);
34
35 U lo_mask = static_cast<unsigned char>(~0);
36 U hi_mask = lo_mask << shift;
37
38 U val = x;
39
40 for (size_t i = 0; i < sizeof(T) / 2; ++i) {
41 U lo_val = val & lo_mask;
42 U hi_val = val & hi_mask;
43
44 val = (val & ~lo_mask) | (hi_val >> shift);
45 val = (val & ~hi_mask) | (lo_val << shift);
46
47 lo_mask <<= CHAR_BIT;
48 hi_mask >>= CHAR_BIT;
49
50 shift -= static_cast<size_t>(2) * CHAR_BIT;
51 }
52 return val;
53 }
54 // sizeof(T) == 1
55 return x;
56}
57
58template <class T>
59KOKKOS_FUNCTION constexpr int countl_zero_fallback(T x) {
60 // From Hacker's Delight (2nd edition) section 5-3
61 unsigned int y = 0;
62 using ::Kokkos::Experimental::digits_v;
63 int n = digits_v<T>;
64 int c = digits_v<T> / 2;
65 do {
66 y = x >> c;
67 if (y != 0) {
68 n -= c;
69 x = y;
70 }
71 c >>= 1;
72 } while (c != 0);
73 return n - static_cast<int>(x);
74}
75
76template <class T>
77KOKKOS_FUNCTION constexpr int countr_zero_fallback(T x) {
78 using ::Kokkos::Experimental::digits_v;
79 return digits_v<T> - countl_zero_fallback(static_cast<T>(
80 static_cast<T>(~x) & static_cast<T>(x - 1)));
81}
82
83template <class T>
84KOKKOS_FUNCTION constexpr int popcount_fallback(T x) {
85 int c = 0;
86 for (; x != 0; x &= x - 1) {
87 ++c;
88 }
89 return c;
90}
91
92template <class T>
93inline constexpr bool is_standard_unsigned_integer_type_v =
94 std::is_same_v<T, unsigned char> || std::is_same_v<T, unsigned short> ||
95 std::is_same_v<T, unsigned int> || std::is_same_v<T, unsigned long> ||
96 std::is_same_v<T, unsigned long long>;
97
98} // namespace Kokkos::Impl
99
100namespace Kokkos {
101
102//<editor-fold desc="[bit.cast], bit_cast">
103template <class To, class From>
104KOKKOS_FUNCTION std::enable_if_t<sizeof(To) == sizeof(From) &&
105 std::is_trivially_copyable_v<To> &&
106 std::is_trivially_copyable_v<From>,
107 To>
108bit_cast(From const& from) noexcept {
109#if defined(KOKKOS_ENABLE_SYCL)
110 return sycl::bit_cast<To>(from);
111#else
112 To to;
113 memcpy(static_cast<void*>(&to), static_cast<const void*>(&from), sizeof(To));
114 return to;
115#endif
116}
117//</editor-fold>
118
119//<editor-fold desc="[bit.byteswap], byteswap">
120template <class T>
121KOKKOS_FUNCTION constexpr std::enable_if_t<std::is_integral_v<T>, T> byteswap(
122 T value) noexcept {
123 return Impl::byteswap_fallback(value);
124}
125//</editor-fold>
126
127//<editor-fold desc="[bit.count], counting">
128template <class T>
129KOKKOS_FUNCTION constexpr std::enable_if_t<
130 Impl::is_standard_unsigned_integer_type_v<T>, int>
131countl_zero(T x) noexcept {
132 using ::Kokkos::Experimental::digits_v;
133 if (x == 0) return digits_v<T>;
134 // TODO use compiler intrinsics when available
135 return Impl::countl_zero_fallback(x);
136}
137
138template <class T>
139KOKKOS_FUNCTION constexpr std::enable_if_t<
140 Impl::is_standard_unsigned_integer_type_v<T>, int>
141countl_one(T x) noexcept {
142 using ::Kokkos::Experimental::digits_v;
143 using ::Kokkos::Experimental::finite_max_v;
144 if (x == finite_max_v<T>) return digits_v<T>;
145 return countl_zero(static_cast<T>(~x));
146}
147
148template <class T>
149KOKKOS_FUNCTION constexpr std::enable_if_t<
150 Impl::is_standard_unsigned_integer_type_v<T>, int>
151countr_zero(T x) noexcept {
152 using ::Kokkos::Experimental::digits_v;
153 if (x == 0) return digits_v<T>;
154 // TODO use compiler intrinsics when available
155 return Impl::countr_zero_fallback(x);
156}
157
158template <class T>
159KOKKOS_FUNCTION constexpr std::enable_if_t<
160 Impl::is_standard_unsigned_integer_type_v<T>, int>
161countr_one(T x) noexcept {
162 using ::Kokkos::Experimental::digits_v;
163 using ::Kokkos::Experimental::finite_max_v;
164 if (x == finite_max_v<T>) return digits_v<T>;
165 return countr_zero(static_cast<T>(~x));
166}
167
168template <class T>
169KOKKOS_FUNCTION constexpr std::enable_if_t<
170 Impl::is_standard_unsigned_integer_type_v<T>, int>
171popcount(T x) noexcept {
172 if (x == 0) return 0;
173 // TODO use compiler intrinsics when available
174 return Impl::popcount_fallback(x);
175}
176//</editor-fold>
177
178//<editor-fold desc="[bit.pow.two], integral powers of 2">
179template <class T>
180KOKKOS_FUNCTION constexpr std::enable_if_t<
181 Impl::is_standard_unsigned_integer_type_v<T>, bool>
182has_single_bit(T x) noexcept {
183 return x != 0 && (((x & (x - 1)) == 0));
184}
185
186template <class T>
187KOKKOS_FUNCTION constexpr std::enable_if_t<
188 Impl::is_standard_unsigned_integer_type_v<T>, T>
189bit_ceil(T x) noexcept {
190 if (x <= 1) return 1;
191 using ::Kokkos::Experimental::digits_v;
192 return T{1} << (digits_v<T> - countl_zero(static_cast<T>(x - 1)));
193}
194
195template <class T>
196KOKKOS_FUNCTION constexpr std::enable_if_t<
197 Impl::is_standard_unsigned_integer_type_v<T>, T>
198bit_floor(T x) noexcept {
199 if (x == 0) return 0;
200 using ::Kokkos::Experimental::digits_v;
201 return T{1} << (digits_v<T> - 1 - countl_zero(x));
202}
203
204template <class T>
205KOKKOS_FUNCTION constexpr std::enable_if_t<
206 Impl::is_standard_unsigned_integer_type_v<T>, T>
207bit_width(T x) noexcept {
208 if (x == 0) return 0;
209 using ::Kokkos::Experimental::digits_v;
210 return digits_v<T> - countl_zero(x);
211}
212//</editor-fold>
213
214//<editor-fold desc="[bit.rotate], rotating">
215template <class T>
216[[nodiscard]] KOKKOS_FUNCTION constexpr std::enable_if_t<
217 Impl::is_standard_unsigned_integer_type_v<T>, T>
218rotl(T x, int s) noexcept {
219 using Experimental::digits_v;
220 constexpr auto dig = digits_v<T>;
221 int const rem = s % dig;
222 if (rem == 0) return x;
223 if (rem > 0) return (x << rem) | (x >> ((dig - rem) % dig));
224 return (x >> -rem) | (x << ((dig + rem) % dig)); // rotr(x, -rem)
225}
226
227template <class T>
228[[nodiscard]] KOKKOS_FUNCTION constexpr std::enable_if_t<
229 Impl::is_standard_unsigned_integer_type_v<T>, T>
230rotr(T x, int s) noexcept {
231 using Experimental::digits_v;
232 constexpr auto dig = digits_v<T>;
233 int const rem = s % dig;
234 if (rem == 0) return x;
235 if (rem > 0) return (x >> rem) | (x << ((dig - rem) % dig));
236 return (x << -rem) | (x >> ((dig + rem) % dig)); // rotl(x, -rem)
237}
238//</editor-fold>
239
240} // namespace Kokkos
241
242namespace Kokkos::Impl {
243
244#if defined(KOKKOS_COMPILER_CLANG) || defined(KOKKOS_COMPILER_INTEL_LLVM) || \
245 defined(KOKKOS_COMPILER_GNU)
246#define KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
247#endif
248
249template <class T>
250KOKKOS_IMPL_DEVICE_FUNCTION T byteswap_builtin_device(T x) noexcept {
251 return byteswap_fallback(x);
252}
253
254template <class T>
255KOKKOS_IMPL_HOST_FUNCTION T byteswap_builtin_host(T x) noexcept {
256#ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
257 if constexpr (sizeof(T) == 1) {
258 return x;
259 } else if constexpr (sizeof(T) == 2) {
260 return __builtin_bswap16(x);
261 } else if constexpr (sizeof(T) == 4) {
262 return __builtin_bswap32(x);
263 } else if constexpr (sizeof(T) == 8) {
264 return __builtin_bswap64(x);
265 } else if constexpr (sizeof(T) == 16) {
266#if defined(__has_builtin)
267#if __has_builtin(__builtin_bswap128)
268 return __builtin_bswap128(x);
269#endif
270#endif
271 return (__builtin_bswap64(x >> 64) |
272 (static_cast<T>(__builtin_bswap64(x)) << 64));
273 }
274#endif
275
276 return byteswap_fallback(x);
277}
278
279template <class T>
280KOKKOS_IMPL_DEVICE_FUNCTION
281 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
282 countl_zero_builtin_device(T x) noexcept {
283#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
284 if constexpr (sizeof(T) == sizeof(long long int))
285 return __clzll(reinterpret_cast<long long int&>(x));
286 if constexpr (sizeof(T) == sizeof(int))
287 return __clz(reinterpret_cast<int&>(x));
288 using ::Kokkos::Experimental::digits_v;
289 constexpr int shift = digits_v<unsigned int> - digits_v<T>;
290 return __clz(x) - shift;
291#elif defined(KOKKOS_ENABLE_SYCL)
292 return sycl::clz(x);
293#else
294 return countl_zero_fallback(x);
295#endif
296}
297
298template <class T>
299KOKKOS_IMPL_HOST_FUNCTION
300 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
301 countl_zero_builtin_host(T x) noexcept {
302 using ::Kokkos::Experimental::digits_v;
303 if (x == 0) return digits_v<T>;
304#ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
305 if constexpr (std::is_same_v<T, unsigned long long>) {
306 return __builtin_clzll(x);
307 } else if constexpr (std::is_same_v<T, unsigned long>) {
308 return __builtin_clzl(x);
309 } else if constexpr (std::is_same_v<T, unsigned int>) {
310 return __builtin_clz(x);
311 } else {
312 constexpr int shift = digits_v<unsigned int> - digits_v<T>;
313 return __builtin_clz(x) - shift;
314 }
315#else
316 return countl_zero_fallback(x);
317#endif
318}
319
320template <class T>
321KOKKOS_IMPL_DEVICE_FUNCTION
322 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
323 countr_zero_builtin_device(T x) noexcept {
324 using ::Kokkos::Experimental::digits_v;
325 if (x == 0) return digits_v<T>;
326#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
327 if constexpr (sizeof(T) == sizeof(long long int))
328 return __ffsll(reinterpret_cast<long long int&>(x)) - 1;
329 return __ffs(reinterpret_cast<int&>(x)) - 1;
330#elif defined(KOKKOS_ENABLE_SYCL)
331 return sycl::ctz(x);
332#else
333 return countr_zero_fallback(x);
334#endif
335}
336
337template <class T>
338KOKKOS_IMPL_HOST_FUNCTION
339 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
340 countr_zero_builtin_host(T x) noexcept {
341 using ::Kokkos::Experimental::digits_v;
342 if (x == 0) return digits_v<T>;
343#ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
344 if constexpr (std::is_same_v<T, unsigned long long>) {
345 return __builtin_ctzll(x);
346 } else if constexpr (std::is_same_v<T, unsigned long>) {
347 return __builtin_ctzl(x);
348 } else {
349 return __builtin_ctz(x);
350 }
351#else
352 return countr_zero_fallback(x);
353#endif
354}
355
356template <class T>
357KOKKOS_IMPL_DEVICE_FUNCTION
358 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
359 popcount_builtin_device(T x) noexcept {
360#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
361 if constexpr (sizeof(T) == sizeof(long long int)) return __popcll(x);
362 return __popc(x);
363#elif defined(KOKKOS_ENABLE_SYCL)
364 return sycl::popcount(x);
365#else
366 return popcount_fallback(x);
367#endif
368}
369
370template <class T>
371KOKKOS_IMPL_HOST_FUNCTION
372 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
373 popcount_builtin_host(T x) noexcept {
374#ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
375 if constexpr (std::is_same_v<T, unsigned long long>) {
376 return __builtin_popcountll(x);
377 } else if constexpr (std::is_same_v<T, unsigned long>) {
378 return __builtin_popcountl(x);
379 } else {
380 return __builtin_popcount(x);
381 }
382#else
383 return popcount_fallback(x);
384#endif
385}
386
387#undef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
388
389} // namespace Kokkos::Impl
390
391namespace Kokkos::Experimental {
392
393template <class To, class From>
394KOKKOS_FUNCTION std::enable_if_t<sizeof(To) == sizeof(From) &&
395 std::is_trivially_copyable_v<To> &&
396 std::is_trivially_copyable_v<From>,
397 To>
398bit_cast_builtin(From const& from) noexcept {
399 // qualify the call to avoid ADL
400 return Kokkos::bit_cast<To>(from); // no benefit to call the _builtin variant
401}
402
403template <class T>
404KOKKOS_FUNCTION std::enable_if_t<std::is_integral_v<T>, T> byteswap_builtin(
405 T x) noexcept {
406 KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::byteswap_builtin_device(x);))
407 KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::byteswap_builtin_host(x);))
408// FIXME-NVHPC: erroneous warning about return from non-void function
409#if defined(KOKKOS_ENABLE_OPENACC) && defined(KOKKOS_COMPILER_NVHPC)
410 return T();
411#endif
412}
413
414template <class T>
415KOKKOS_FUNCTION std::enable_if_t<
416 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
417countl_zero_builtin(T x) noexcept {
418 KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::countl_zero_builtin_device(x);))
419 KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::countl_zero_builtin_host(x);))
420// FIXME-NVHPC: erroneous warning about return from non-void function
421#if defined(KOKKOS_ENABLE_OPENACC) && defined(KOKKOS_COMPILER_NVHPC)
422 return 0;
423#endif
424}
425
426template <class T>
427KOKKOS_FUNCTION std::enable_if_t<
428 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
429countl_one_builtin(T x) noexcept {
430 if (x == finite_max_v<T>) return digits_v<T>;
431 return countl_zero_builtin(static_cast<T>(~x));
432}
433
434template <class T>
435KOKKOS_FUNCTION std::enable_if_t<
436 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
437countr_zero_builtin(T x) noexcept {
438 KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::countr_zero_builtin_device(x);))
439 KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::countr_zero_builtin_host(x);))
440// FIXME-NVHPC: erroneous warning about return from non-void function
441#if defined(KOKKOS_ENABLE_OPENACC) && defined(KOKKOS_COMPILER_NVHPC)
442 return 0;
443#endif
444}
445
446template <class T>
447KOKKOS_FUNCTION std::enable_if_t<
448 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
449countr_one_builtin(T x) noexcept {
450 if (x == finite_max_v<T>) return digits_v<T>;
451 return countr_zero_builtin(static_cast<T>(~x));
452}
453
454template <class T>
455KOKKOS_FUNCTION std::enable_if_t<
456 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
457popcount_builtin(T x) noexcept {
458 KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::popcount_builtin_device(x);))
459 KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::popcount_builtin_host(x);))
460// FIXME-NVHPC: erroneous warning about return from non-void function
461#if defined(KOKKOS_ENABLE_OPENACC) && defined(KOKKOS_COMPILER_NVHPC)
462 return 0;
463#endif
464}
465
466template <class T>
467KOKKOS_FUNCTION std::enable_if_t<
468 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, bool>
469has_single_bit_builtin(T x) noexcept {
470 return has_single_bit(x); // no benefit to call the _builtin variant
471}
472
473template <class T>
474KOKKOS_FUNCTION
475 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
476 bit_ceil_builtin(T x) noexcept {
477 if (x <= 1) return 1;
478 return T{1} << (digits_v<T> - countl_zero_builtin(static_cast<T>(x - 1)));
479}
480
481template <class T>
482KOKKOS_FUNCTION
483 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
484 bit_floor_builtin(T x) noexcept {
485 if (x == 0) return 0;
486 return T{1} << (digits_v<T> - 1 - countl_zero_builtin(x));
487}
488
489template <class T>
490KOKKOS_FUNCTION
491 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
492 bit_width_builtin(T x) noexcept {
493 if (x == 0) return 0;
494 return digits_v<T> - countl_zero_builtin(x);
495}
496
497template <class T>
498[[nodiscard]] KOKKOS_FUNCTION
499 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
500 rotl_builtin(T x, int s) noexcept {
501 return rotl(x, s); // no benefit to call the _builtin variant
502}
503
504template <class T>
505[[nodiscard]] KOKKOS_FUNCTION
506 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
507 rotr_builtin(T x, int s) noexcept {
508 return rotr(x, s); // no benefit to call the _builtin variant
509}
510
511} // namespace Kokkos::Experimental
512
513#endif
A thread safe view to a bitset.
ScopeGuard Some user scope issues have been identified with some Kokkos::finalize calls; ScopeGuard a...