51 using execution_space =
typename Device::execution_space;
54 static constexpr unsigned BIT_SCAN_REVERSE = 1u;
55 static constexpr unsigned MOVE_HINT_BACKWARD = 2u;
57 static constexpr unsigned BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0
u;
58 static constexpr unsigned BIT_SCAN_REVERSE_MOVE_HINT_FORWARD =
60 static constexpr unsigned BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD =
62 static constexpr unsigned BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD =
63 BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD;
67 static constexpr unsigned block_mask = block_size - 1u;
68 static constexpr unsigned block_shift =
69 Kokkos::has_single_bit(block_size) ? Kokkos::bit_width(block_size) - 1
83 : m_size(
arg_size), m_last_block_mask(0
u) {
86 static_assert(alloc_prop_t::initialize,
87 "Allocation property 'initialize' should be true.");
89 !alloc_prop_t::has_pointer,
90 "Allocation properties should not contain the 'pointer' property.");
94 Impl::with_properties_if_unset(
arg_prop, std::string(
"Bitset"));
96 block_view_type(
prop_copy, ((m_size + block_mask) >> block_shift));
98 for (
int i = 0, end =
static_cast<int>(m_size & block_mask);
i < end; ++
i) {
99 m_last_block_mask |= 1u <<
i;
103 KOKKOS_DEFAULTED_FUNCTION
106 KOKKOS_DEFAULTED_FUNCTION
109 KOKKOS_DEFAULTED_FUNCTION
112 KOKKOS_DEFAULTED_FUNCTION
115 KOKKOS_DEFAULTED_FUNCTION
120 KOKKOS_FORCEINLINE_FUNCTION
121 unsigned size()
const {
return m_size; }
126 Impl::BitsetCount<Bitset<Device>>
f(*
this);
133 Kokkos::deep_copy(m_blocks, ~0
u);
135 if (m_last_block_mask) {
137 auto last_block = Kokkos::subview(m_blocks, m_blocks.extent(0) - 1u);
138 Kokkos::deep_copy(
typename Device::execution_space{},
last_block,
141 "Bitset::set: fence after clearing unused bits copying from "
148 void reset() { Kokkos::deep_copy(m_blocks, 0
u); }
152 void clear() { Kokkos::deep_copy(m_blocks, 0
u); }
156 KOKKOS_FORCEINLINE_FUNCTION
159 unsigned*
block_ptr = &m_blocks[
i >> block_shift];
160 const unsigned mask = 1u <<
static_cast<int>(
i & block_mask);
169 KOKKOS_FORCEINLINE_FUNCTION
172 unsigned*
block_ptr = &m_blocks[
i >> block_shift];
173 const unsigned mask = 1u <<
static_cast<int>(
i & block_mask);
182 KOKKOS_FORCEINLINE_FUNCTION
185#ifdef KOKKOS_ENABLE_SYCL
186 const unsigned block = Kokkos::atomic_load(&m_blocks[
i >> block_shift]);
190 const unsigned mask = 1u <<
static_cast<int>(
i & block_mask);
199 KOKKOS_FORCEINLINE_FUNCTION
200 unsigned max_hint()
const {
return m_blocks.extent(0); }
206 KOKKOS_INLINE_FUNCTION
209 unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD)
const {
211 (
hint >> block_shift) < m_blocks.extent(0) ? (
hint >> block_shift) : 0;
213#ifdef KOKKOS_ENABLE_SYCL
218 block = !m_last_block_mask || (
block_idx < (m_blocks.extent(0) - 1))
220 :
block & m_last_block_mask;
229 KOKKOS_INLINE_FUNCTION
232 unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD)
const {
235#ifdef KOKKOS_ENABLE_SYCL
240 block = !m_last_block_mask || (
block_idx < (m_blocks.extent(0) - 1))
242 :
~block & m_last_block_mask;
247 KOKKOS_INLINE_FUNCTION
constexpr bool is_allocated()
const {
248 return m_blocks.is_allocated();
252 KOKKOS_FORCEINLINE_FUNCTION
254 unsigned offset,
unsigned block,
255 unsigned scan_direction)
const {
259 result.second = update_hint(block_idx, offset, scan_direction);
262 scan_block((block_idx << block_shift), offset, block, scan_direction);
267 KOKKOS_FORCEINLINE_FUNCTION
268 unsigned scan_block(
unsigned block_start,
int offset,
unsigned block,
269 unsigned scan_direction)
const {
270 offset = !(scan_direction & BIT_SCAN_REVERSE)
272 : (offset + block_mask) & block_mask;
273 block = Experimental::rotr_builtin(block, offset);
274 return (((!(scan_direction & BIT_SCAN_REVERSE)
275 ? Experimental::countr_zero_builtin(block)
282 KOKKOS_FORCEINLINE_FUNCTION
283 unsigned update_hint(
long long block_idx,
unsigned offset,
284 unsigned scan_direction)
const {
285 block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1;
286 block_idx = block_idx >= 0 ? block_idx : m_blocks.extent(0) - 1;
288 block_idx < static_cast<long long>(m_blocks.extent(0)) ? block_idx : 0;
290 return static_cast<unsigned>(block_idx) * block_size + offset;
295 unsigned m_last_block_mask = 0;
296 block_view_type m_blocks;
299 template <
typename DDevice>
302 template <
typename DDevice>
303 friend class ConstBitset;
305 template <
typename Bitset>
306 friend struct Impl::BitsetCount;
308 template <
typename DstDevice,
typename SrcDevice>
309 friend void deep_copy(Bitset<DstDevice>& dst, Bitset<SrcDevice>
const& src);
311 template <
typename DstDevice,
typename SrcDevice>
312 friend void deep_copy(Bitset<DstDevice>& dst,
313 ConstBitset<SrcDevice>
const& src);