Kokkos Core Kernels Package Version of the Day
Loading...
Searching...
No Matches
Kokkos_MemoryPool.hpp
1//@HEADER
2// ************************************************************************
3//
4// Kokkos v. 4.0
5// Copyright (2022) National Technology & Engineering
6// Solutions of Sandia, LLC (NTESS).
7//
8// Under the terms of Contract DE-NA0003525 with NTESS,
9// the U.S. Government retains certain rights in this software.
10//
11// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12// See https://kokkos.org/LICENSE for license information.
13// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14//
15//@HEADER
16
17#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18#include <Kokkos_Macros.hpp>
19static_assert(false,
20 "Including non-public Kokkos header files is not allowed.");
21#endif
22#ifndef KOKKOS_MEMORYPOOL_HPP
23#define KOKKOS_MEMORYPOOL_HPP
24
25#include <Kokkos_Core_fwd.hpp>
26#include <Kokkos_Parallel.hpp>
27#include <Kokkos_Atomic.hpp>
28#include <impl/Kokkos_ConcurrentBitset.hpp>
29#include <impl/Kokkos_Error.hpp>
30#include <impl/Kokkos_SharedAlloc.hpp>
31
32// NOLINTBEGIN(bugprone-implicit-widening-of-multiplication-result)
33namespace Kokkos {
34namespace Impl {
35/* Report violation of size constraints:
36 * min_block_alloc_size <= max_block_alloc_size
37 * max_block_alloc_size <= min_superblock_size
38 * min_superblock_size <= max_superblock_size
39 * min_superblock_size <= min_total_alloc_size
40 * min_superblock_size <= min_block_alloc_size *
41 * max_block_per_superblock
42 */
43void memory_pool_bounds_verification(size_t min_block_alloc_size,
44 size_t max_block_alloc_size,
45 size_t min_superblock_size,
46 size_t max_superblock_size,
47 size_t max_block_per_superblock,
48 size_t min_total_alloc_size);
49} // namespace Impl
50} // namespace Kokkos
51
52namespace Kokkos {
53
54namespace Impl {
55
56void _print_memory_pool_state(std::ostream &s, uint32_t const *sb_state_ptr,
57 int32_t sb_count, uint32_t sb_size_lg2,
58 uint32_t sb_state_size, uint32_t state_shift,
59 uint32_t state_used_mask);
60
61} // end namespace Impl
62
63template <typename DeviceType>
64class MemoryPool {
65 private:
66 using CB = Kokkos::Impl::concurrent_bitset;
67
68 enum : uint32_t { bits_per_int_lg2 = CB::bits_per_int_lg2 };
69 enum : uint32_t { state_shift = CB::state_shift };
70 enum : uint32_t { state_used_mask = CB::state_used_mask };
71 enum : uint32_t { state_header_mask = CB::state_header_mask };
72 enum : uint32_t { max_bit_count_lg2 = CB::max_bit_count_lg2 };
73 enum : uint32_t { max_bit_count = CB::max_bit_count };
74
75 enum : uint32_t { HINT_PER_BLOCK_SIZE = 2 };
76
77 static KOKKOS_FUNCTION unsigned integral_power_of_two_that_contains(
78 const unsigned N) {
79 return N ? Kokkos::bit_width(N - 1) : 0;
80 }
81
82 /* Each superblock has a concurrent bitset state
83 * which is an array of uint32_t integers.
84 * [ { block_count_lg2 : state_shift bits
85 * , used_block_count : ( 32 - state_shift ) bits
86 * }
87 * , { block allocation bit set }* ]
88 *
89 * As superblocks are assigned (allocated) to a block size
90 * and released (deallocated) back to empty the superblock state
91 * is concurrently updated.
92 */
93
94 /* Mapping between block_size <-> block_state
95 *
96 * block_state = ( m_sb_size_lg2 - block_size_lg2 ) << state_shift
97 * block_size = m_sb_size_lg2 - ( block_state >> state_shift )
98 *
99 * Thus A_block_size < B_block_size <=> A_block_state > B_block_state
100 */
101
102 using base_memory_space = typename DeviceType::memory_space;
103
104 enum {
106 base_memory_space>::accessible
107 };
108
109 using Tracker = Kokkos::Impl::SharedAllocationTracker;
110 using Record = Kokkos::Impl::SharedAllocationRecord<base_memory_space>;
111
112 Tracker m_tracker;
113 uint32_t *m_sb_state_array;
114 uint32_t m_sb_state_size;
115 uint32_t m_sb_size_lg2;
116 uint32_t m_max_block_size_lg2;
117 uint32_t m_min_block_size_lg2;
118 int32_t m_sb_count;
119 int32_t m_hint_offset; // Offset to K * #block_size array of hints
120 int32_t m_data_offset; // Offset to 0th superblock data
121 int32_t m_unused_padding;
122
123 public:
124 using memory_space = typename DeviceType::memory_space;
125
127 enum : uint32_t { max_superblock_size = 1LU << 31 /* 2 gigabytes */ };
128 enum : uint32_t { max_block_per_superblock = max_bit_count };
129
130 //--------------------------------------------------------------------------
131
132 KOKKOS_INLINE_FUNCTION
133 bool operator==(MemoryPool const &other) const {
134 return m_sb_state_array == other.m_sb_state_array;
135 }
136
137 KOKKOS_INLINE_FUNCTION
138 size_t capacity() const noexcept {
139 return size_t(m_sb_count) << m_sb_size_lg2;
140 }
141
142 KOKKOS_INLINE_FUNCTION
143 size_t min_block_size() const noexcept {
144 return (1LU << m_min_block_size_lg2);
145 }
146
147 KOKKOS_INLINE_FUNCTION
148 size_t max_block_size() const noexcept {
149 return (1LU << m_max_block_size_lg2);
150 }
151
152 struct usage_statistics {
153 size_t capacity_bytes;
154 size_t superblock_bytes;
155 size_t max_block_bytes;
156 size_t min_block_bytes;
157 size_t capacity_superblocks;
158 size_t consumed_superblocks;
159 size_t consumed_blocks;
160 size_t consumed_bytes;
161 size_t reserved_blocks;
162 size_t reserved_bytes;
163 };
164
165 // This function is templated to avoid needing a full definition of
166 // DefaultHostExecutionSpace at class instantiation
167 template <typename ExecutionSpace = Kokkos::DefaultHostExecutionSpace>
168 void get_usage_statistics(usage_statistics &stats) const {
170 static_assert(
171 std::is_same_v<ExecutionSpace, Kokkos::DefaultHostExecutionSpace>);
172
173 const size_t alloc_size = m_hint_offset * sizeof(uint32_t);
174
175 uint32_t *const sb_state_array =
176 accessible ? m_sb_state_array : (uint32_t *)host.allocate(alloc_size);
177
178 if (!accessible) {
179 Kokkos::Impl::DeepCopy<Kokkos::HostSpace, base_memory_space>(
180 ExecutionSpace{}, sb_state_array, m_sb_state_array, alloc_size);
181 Kokkos::fence(
182 "MemoryPool::get_usage_statistics(): fence after copying state "
183 "array to HostSpace");
184 }
185
186 stats.superblock_bytes = (1LU << m_sb_size_lg2);
187 stats.max_block_bytes = (1LU << m_max_block_size_lg2);
188 stats.min_block_bytes = (1LU << m_min_block_size_lg2);
189 stats.capacity_bytes = stats.superblock_bytes * m_sb_count;
190 stats.capacity_superblocks = m_sb_count;
191 stats.consumed_superblocks = 0;
192 stats.consumed_blocks = 0;
193 stats.consumed_bytes = 0;
194 stats.reserved_blocks = 0;
195 stats.reserved_bytes = 0;
196
197 const uint32_t *sb_state_ptr = sb_state_array;
198
199 for (int32_t i = 0; i < m_sb_count; ++i, sb_state_ptr += m_sb_state_size) {
200 const uint32_t block_count_lg2 = (*sb_state_ptr) >> state_shift;
201
202 if (block_count_lg2) {
203 const uint32_t block_count = 1u << block_count_lg2;
204 const uint32_t block_size_lg2 = m_sb_size_lg2 - block_count_lg2;
205 const uint32_t block_size = 1u << block_size_lg2;
206 const uint32_t block_used = (*sb_state_ptr) & state_used_mask;
207
208 stats.consumed_superblocks++;
209 stats.consumed_blocks += block_used;
210 stats.consumed_bytes += static_cast<size_t>(block_used) * block_size;
211 stats.reserved_blocks += block_count - block_used;
212 stats.reserved_bytes +=
213 static_cast<size_t>(block_count - block_used) * block_size;
214 }
215 }
216
217 if (!accessible) {
218 host.deallocate(sb_state_array, alloc_size);
219 }
220 }
221
222 // This function is templated to avoid needing a full definition of
223 // DefaultHostExecutionSpace at class instantiation
224 template <typename ExecutionSpace = Kokkos::DefaultHostExecutionSpace>
225 void print_state(std::ostream &s) const {
227 static_assert(
228 std::is_same_v<ExecutionSpace, Kokkos::DefaultHostExecutionSpace>);
229
230 const size_t alloc_size = m_hint_offset * sizeof(uint32_t);
231
232 uint32_t *const sb_state_array =
233 accessible ? m_sb_state_array : (uint32_t *)host.allocate(alloc_size);
234
235 if (!accessible) {
236 Kokkos::Impl::DeepCopy<Kokkos::HostSpace, base_memory_space>(
237 ExecutionSpace{}, sb_state_array, m_sb_state_array, alloc_size);
238 Kokkos::fence(
239 "MemoryPool::print_state(): fence after copying state array to "
240 "HostSpace");
241 }
242
243 Impl::_print_memory_pool_state(s, sb_state_array, m_sb_count, m_sb_size_lg2,
244 m_sb_state_size, state_shift,
245 state_used_mask);
246
247 if (!accessible) {
248 host.deallocate(sb_state_array, alloc_size);
249 }
250 }
251
252 //--------------------------------------------------------------------------
253
254 KOKKOS_DEFAULTED_FUNCTION MemoryPool(MemoryPool &&) = default;
255 KOKKOS_DEFAULTED_FUNCTION MemoryPool(const MemoryPool &) = default;
256 KOKKOS_DEFAULTED_FUNCTION MemoryPool &operator=(MemoryPool &&) = default;
257 KOKKOS_DEFAULTED_FUNCTION MemoryPool &operator=(const MemoryPool &) = default;
258
259 KOKKOS_INLINE_FUNCTION MemoryPool()
260 : m_tracker(),
261 m_sb_state_array(nullptr),
262 m_sb_state_size(0),
263 m_sb_size_lg2(0),
264 m_max_block_size_lg2(0),
265 m_min_block_size_lg2(0),
266 m_sb_count(0),
267 m_hint_offset(0),
268 m_data_offset(0),
269 m_unused_padding(0) {}
270
285 MemoryPool(const base_memory_space &memspace,
286 const size_t min_total_alloc_size, size_t min_block_alloc_size = 0,
287 size_t max_block_alloc_size = 0, size_t min_superblock_size = 0)
288 : m_tracker(),
289 m_sb_state_array(nullptr),
290 m_sb_state_size(0),
291 m_sb_size_lg2(0),
292 m_max_block_size_lg2(0),
293 m_min_block_size_lg2(0),
294 m_sb_count(0),
295 m_hint_offset(0),
296 m_data_offset(0),
297 m_unused_padding(0) {
298 const uint32_t int_align_lg2 = 3; /* align as int[8] */
299 const uint32_t int_align_mask = (1u << int_align_lg2) - 1;
300 const uint32_t default_min_block_size = 1u << 6; /* 64 bytes */
301 const uint32_t default_max_block_size = 1u << 12; /* 4k bytes */
302 const uint32_t default_min_superblock_size = 1u << 20; /* 1M bytes */
303
304 //--------------------------------------------------
305 // Default block and superblock sizes:
306
307 if (0 == min_block_alloc_size) {
308 // Default all sizes:
309
310 min_superblock_size =
311 std::min(size_t(default_min_superblock_size), min_total_alloc_size);
312
313 min_block_alloc_size =
314 std::min(size_t(default_min_block_size), min_superblock_size);
315
316 max_block_alloc_size =
317 std::min(size_t(default_max_block_size), min_superblock_size);
318 } else if (0 == min_superblock_size) {
319 // Choose superblock size as minimum of:
320 // max_block_per_superblock * min_block_size
321 // max_superblock_size
322 // min_total_alloc_size
323
324 const size_t max_superblock =
325 min_block_alloc_size * max_block_per_superblock;
326
327 min_superblock_size =
328 std::min(max_superblock,
329 std::min(size_t(max_superblock_size), min_total_alloc_size));
330 }
331
332 if (0 == max_block_alloc_size) {
333 max_block_alloc_size = min_superblock_size;
334 }
335
336 //--------------------------------------------------
337
338 /* Enforce size constraints:
339 * min_block_alloc_size <= max_block_alloc_size
340 * max_block_alloc_size <= min_superblock_size
341 * min_superblock_size <= max_superblock_size
342 * min_superblock_size <= min_total_alloc_size
343 * min_superblock_size <= min_block_alloc_size *
344 * max_block_per_superblock
345 */
346
347 Kokkos::Impl::memory_pool_bounds_verification(
348 min_block_alloc_size, max_block_alloc_size, min_superblock_size,
349 max_superblock_size, max_block_per_superblock, min_total_alloc_size);
350
351 //--------------------------------------------------
352 // Block and superblock size is power of two:
353 // Maximum value is 'max_superblock_size'
354
355 m_min_block_size_lg2 =
356 integral_power_of_two_that_contains(min_block_alloc_size);
357
358 m_max_block_size_lg2 =
359 integral_power_of_two_that_contains(max_block_alloc_size);
360
361 m_sb_size_lg2 = integral_power_of_two_that_contains(min_superblock_size);
362
363 {
364 // number of superblocks is multiple of superblock size that
365 // can hold min_total_alloc_size.
366
367 const uint64_t sb_size_mask = (1LU << m_sb_size_lg2) - 1;
368
369 m_sb_count = (min_total_alloc_size + sb_size_mask) >> m_sb_size_lg2;
370 }
371
372 {
373 // Any superblock can be assigned to the smallest size block
374 // Size the block bitset to maximum number of blocks
375
376 const uint32_t max_block_count_lg2 = m_sb_size_lg2 - m_min_block_size_lg2;
377
378 m_sb_state_size =
379 (CB::buffer_bound_lg2(max_block_count_lg2) + int_align_mask) &
380 ~int_align_mask;
381 }
382
383 // Array of all superblock states
384
385 const size_t all_sb_state_size =
386 (m_sb_count * m_sb_state_size + int_align_mask) & ~int_align_mask;
387
388 // Number of block sizes
389
390 const int32_t number_block_sizes =
391 1 + m_max_block_size_lg2 - m_min_block_size_lg2;
392
393 // Array length for possible block sizes
394 // Hint array is one uint32_t per block size
395
396 const int32_t block_size_array_size =
397 (number_block_sizes + int_align_mask) & ~int_align_mask;
398
399 m_hint_offset = all_sb_state_size;
400 m_data_offset = m_hint_offset + block_size_array_size * HINT_PER_BLOCK_SIZE;
401
402 // Allocation:
403
404 const size_t header_size = m_data_offset * sizeof(uint32_t);
405 const size_t alloc_size =
406 header_size + (size_t(m_sb_count) << m_sb_size_lg2);
407
408 Record *rec = Record::allocate(memspace, "Kokkos::MemoryPool", alloc_size);
409
410 m_tracker.assign_allocated_record_to_uninitialized(rec);
411
412 m_sb_state_array = (uint32_t *)rec->data();
413
415
416 uint32_t *const sb_state_array =
417 accessible ? m_sb_state_array : (uint32_t *)host.allocate(header_size);
418
419 for (int32_t i = 0; i < m_data_offset; ++i) sb_state_array[i] = 0;
420
421 // Initial assignment of empty superblocks to block sizes:
422
423 for (int32_t i = 0; i < number_block_sizes; ++i) {
424 const uint32_t block_size_lg2 = i + m_min_block_size_lg2;
425 const uint32_t block_count_lg2 = m_sb_size_lg2 - block_size_lg2;
426 const uint32_t block_state = block_count_lg2 << state_shift;
427 const uint32_t hint_begin = m_hint_offset + i * HINT_PER_BLOCK_SIZE;
428
429 // for block size index 'i':
430 // sb_id_hint = sb_state_array[ hint_begin ];
431 // sb_id_begin = sb_state_array[ hint_begin + 1 ];
432
433 const int32_t jbeg = (i * m_sb_count) / number_block_sizes;
434 const int32_t jend = ((i + 1) * m_sb_count) / number_block_sizes;
435
436 sb_state_array[hint_begin] = uint32_t(jbeg);
437 sb_state_array[hint_begin + 1] = uint32_t(jbeg);
438
439 for (int32_t j = jbeg; j < jend; ++j) {
440 sb_state_array[j * m_sb_state_size] = block_state;
441 }
442 }
443
444 // Write out initialized state:
445
446 if (!accessible) {
447 Kokkos::Impl::DeepCopy<base_memory_space, Kokkos::HostSpace>(
448 typename base_memory_space::execution_space{}, m_sb_state_array,
449 sb_state_array, header_size);
450 Kokkos::fence(
451 "MemoryPool::MemoryPool(): fence after copying state array from "
452 "HostSpace");
453
454 host.deallocate(sb_state_array, header_size);
455 } else {
456 Kokkos::memory_fence();
457 }
458 }
459
460 //--------------------------------------------------------------------------
461
462 private:
463 /* Given a size 'n' get the block size in which it can be allocated.
464 * Restrict lower bound to minimum block size.
465 */
466 KOKKOS_FORCEINLINE_FUNCTION
467 uint32_t get_block_size_lg2(uint32_t n) const noexcept {
468 const unsigned i = integral_power_of_two_that_contains(n);
469
470 return i < m_min_block_size_lg2 ? m_min_block_size_lg2 : i;
471 }
472
473 public:
474 /* Return 0 for invalid block size */
475 KOKKOS_INLINE_FUNCTION
476 uint32_t allocate_block_size(uint64_t alloc_size) const noexcept {
477 return alloc_size <= (uint64_t(1) << m_max_block_size_lg2)
478 ? (1UL << get_block_size_lg2(uint32_t(alloc_size)))
479 : 0;
480 }
481
482 //--------------------------------------------------------------------------
492 KOKKOS_FUNCTION
493 void *allocate(size_t alloc_size, int32_t attempt_limit = 1) const noexcept {
494 if ((size_t(1) << m_max_block_size_lg2) < alloc_size) {
495 Kokkos::abort(
496 "Kokkos MemoryPool allocation request exceeded specified maximum "
497 "allocation size");
498 }
499
500 if (0 == alloc_size) return nullptr;
501
502 void *p = nullptr;
503
504 const uint32_t block_size_lg2 = get_block_size_lg2(alloc_size);
505
506 // Allocation will fit within a superblock
507 // that has block sizes ( 1 << block_size_lg2 )
508
509 const uint32_t block_count_lg2 = m_sb_size_lg2 - block_size_lg2;
510 const uint32_t block_state = block_count_lg2 << state_shift;
511 const uint32_t block_count = 1u << block_count_lg2;
512
513 // Superblock hints for this block size:
514 // hint_sb_id_ptr[0] is the dynamically changing hint
515 // hint_sb_id_ptr[1] is the static start point
516
517 volatile uint32_t *const hint_sb_id_ptr =
518 m_sb_state_array /* memory pool state array */
519 + m_hint_offset /* offset to hint portion of array */
520 + HINT_PER_BLOCK_SIZE /* number of hints per block size */
521 * (block_size_lg2 - m_min_block_size_lg2); /* block size id */
522
523 const int32_t sb_id_begin = int32_t(hint_sb_id_ptr[1]);
524
525 // Fast query clock register 'tic' to pseudo-randomize
526 // the guess for which block within a superblock should
527 // be claimed. If not available then a search occurs.
528#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ARCH_INTEL_GPU)
529 const uint32_t block_id_hint = alloc_size;
530#else
531 const uint32_t block_id_hint =
532 (uint32_t)(Kokkos::Impl::clock_tic()
533#ifdef __CUDA_ARCH__ // FIXME_CUDA
534 // Spread out potentially concurrent access
535 // by threads within a warp or thread block.
536 + (threadIdx.x + blockDim.x * threadIdx.y)
537#endif
538 );
539#endif
540
541 // expected state of superblock for allocation
542 uint32_t sb_state = block_state;
543
544 int32_t sb_id = -1;
545
546 volatile uint32_t *sb_state_array = nullptr;
547
548 while (attempt_limit) {
549 int32_t hint_sb_id = -1;
550
551 if (sb_id < 0) {
552 // No superblock specified, try the hint for this block size
553
554 sb_id = hint_sb_id = int32_t(*hint_sb_id_ptr);
555
556 sb_state_array = m_sb_state_array + (sb_id * m_sb_state_size);
557 }
558
559 // Require:
560 // 0 <= sb_id
561 // sb_state_array == m_sb_state_array + m_sb_state_size * sb_id
562
563 if (sb_state == (state_header_mask & *sb_state_array)) {
564 // This superblock state is as expected, for the moment.
565 // Attempt to claim a bit. The attempt updates the state
566 // so have already made sure the state header is as expected.
567
568 const uint32_t count_lg2 = sb_state >> state_shift;
569 const uint32_t mask = (1u << count_lg2) - 1;
570
571 const Kokkos::pair<int, int> result = CB::acquire_bounded_lg2(
572 sb_state_array, count_lg2, block_id_hint & mask, sb_state);
573
574 // If result.first < 0 then failed to acquire
575 // due to either full or buffer was wrong state.
576 // Could be wrong state if a deallocation raced the
577 // superblock to empty before the acquire could succeed.
578
579 if (0 <= result.first) { // acquired a bit
580
581 const uint32_t size_lg2 = m_sb_size_lg2 - count_lg2;
582
583 // Set the allocated block pointer
584
585 p = ((char *)(m_sb_state_array + m_data_offset)) +
586 (uint64_t(sb_id) << m_sb_size_lg2) // superblock memory
587 + (uint64_t(result.first) << size_lg2); // block memory
588
589 break; // Success
590 }
591 }
592 //------------------------------------------------------------------
593 // Arrive here if failed to acquire a block.
594 // Must find a new superblock.
595
596 // Start searching at designated index for this block size.
597 // Look for superblock that, in preferential order,
598 // 1) part-full superblock of this block size
599 // 2) empty superblock to claim for this block size
600 // 3) part-full superblock of the next larger block size
601
602 sb_state = block_state; // Expect to find the desired state
603 sb_id = -1;
604
605 bool update_hint = false;
606 int32_t sb_id_empty = -1;
607 int32_t sb_id_large = -1;
608 uint32_t sb_state_large = 0;
609
610 sb_state_array = m_sb_state_array + sb_id_begin * m_sb_state_size;
611
612 for (int32_t i = 0, id = sb_id_begin; i < m_sb_count; ++i) {
613 // Query state of the candidate superblock.
614 // Note that the state may change at any moment
615 // as concurrent allocations and deallocations occur.
616
617 const uint32_t full_state = *sb_state_array;
618 const uint32_t used = full_state & state_used_mask;
619 const uint32_t state = full_state & state_header_mask;
620
621 if (state == block_state) {
622 // Superblock is assigned to this block size
623
624 if (used < block_count) {
625 // There is room to allocate one block
626
627 sb_id = id;
628
629 // Is there room to allocate more than one block?
630
631 update_hint = used + 1 < block_count;
632
633 break;
634 }
635 } else if (0 == used) {
636 // Superblock is empty
637
638 if (-1 == sb_id_empty) {
639 // Superblock is not assigned to this block size
640 // and is the first empty superblock encountered.
641 // Save this id to use if a partfull superblock is not found.
642
643 sb_id_empty = id;
644 }
645 } else if ((-1 == sb_id_empty /* have not found an empty */) &&
646 (-1 == sb_id_large /* have not found a larger */) &&
647 (state < block_state /* a larger block */) &&
648 // is not full:
649 (used < (1u << (state >> state_shift)))) {
650 // First superblock encountered that is
651 // larger than this block size and
652 // has room for an allocation.
653 // Save this id to use of partfull or empty superblock not found
654 sb_id_large = id;
655 sb_state_large = state;
656 }
657
658 // Iterate around the superblock array:
659
660 if (++id < m_sb_count) {
661 sb_state_array += m_sb_state_size;
662 } else {
663 id = 0;
664 sb_state_array = m_sb_state_array;
665 }
666 }
667
668 // printf(" search m_sb_count(%d) sb_id(%d) sb_id_empty(%d)
669 // sb_id_large(%d)\n" , m_sb_count , sb_id , sb_id_empty , sb_id_large);
670
671 if (sb_id < 0) {
672 // Did not find a partfull superblock for this block size.
673
674 if (0 <= sb_id_empty) {
675 // Found first empty superblock following designated superblock
676 // Attempt to claim it for this block size.
677 // If the claim fails assume that another thread claimed it
678 // for this block size and try to use it anyway,
679 // but do not update hint.
680
681 sb_id = sb_id_empty;
682
683 sb_state_array = m_sb_state_array + (sb_id * m_sb_state_size);
684
685 // If successfully changed assignment of empty superblock 'sb_id'
686 // to this block_size then update the hint.
687
688 const uint32_t state_empty = state_header_mask & *sb_state_array;
689
690 // If this thread claims the empty block then update the hint
691 update_hint =
692 state_empty == Kokkos::atomic_compare_exchange(
693 sb_state_array, state_empty, block_state);
694 } else if (0 <= sb_id_large) {
695 // Found a larger superblock with space available
696
697 sb_id = sb_id_large;
698 sb_state = sb_state_large;
699
700 sb_state_array = m_sb_state_array + (sb_id * m_sb_state_size);
701 } else {
702 // Did not find a potentially usable superblock
703 --attempt_limit;
704 }
705 }
706
707 if (update_hint) {
708 Kokkos::atomic_compare_exchange(hint_sb_id_ptr, uint32_t(hint_sb_id),
709 uint32_t(sb_id));
710 }
711 } // end allocation attempt loop
712 //--------------------------------------------------------------------
713
714 return p;
715 }
716 // end allocate
717 //--------------------------------------------------------------------------
718
725 KOKKOS_INLINE_FUNCTION
726 void deallocate(void *p, size_t /* alloc_size */) const noexcept {
727 if (nullptr == p) return;
728
729 // Determine which superblock and block
730 const ptrdiff_t d =
731 static_cast<char *>(p) -
732 reinterpret_cast<char *>(m_sb_state_array + m_data_offset);
733
734 // Verify contained within the memory pool's superblocks:
735 const int ok_contains =
736 (0 <= d) && (size_t(d) < (size_t(m_sb_count) << m_sb_size_lg2));
737
738 int ok_block_aligned = 0;
739 int ok_dealloc_once = 0;
740
741 if (ok_contains) {
742 const int sb_id = d >> m_sb_size_lg2;
743
744 // State array for the superblock.
745 volatile uint32_t *const sb_state_array =
746 m_sb_state_array + (sb_id * m_sb_state_size);
747
748 const uint32_t block_state = (*sb_state_array) & state_header_mask;
749 const uint32_t block_size_lg2 =
750 m_sb_size_lg2 - (block_state >> state_shift);
751
752 ok_block_aligned = 0 == (d & ((1UL << block_size_lg2) - 1));
753
754 if (ok_block_aligned) {
755 // Map address to block's bit
756 // mask into superblock and then shift down for block index
757
758 const uint32_t bit =
759 (d & ((ptrdiff_t(1) << m_sb_size_lg2) - 1)) >> block_size_lg2;
760
761 const int result = CB::release(sb_state_array, bit, block_state);
762
763 ok_dealloc_once = 0 <= result;
764 }
765 }
766
767 if (!ok_contains || !ok_block_aligned || !ok_dealloc_once) {
768 Kokkos::abort("Kokkos MemoryPool::deallocate given erroneous pointer");
769 }
770 }
771 // end deallocate
772 //--------------------------------------------------------------------------
773
774 KOKKOS_INLINE_FUNCTION
775 int number_of_superblocks() const noexcept { return m_sb_count; }
776
777 KOKKOS_INLINE_FUNCTION
778 void superblock_state(int sb_id, int &block_size, int &block_count_capacity,
779 int &block_count_used) const noexcept {
780 block_size = 0;
781 block_count_capacity = 0;
782 block_count_used = 0;
783
784 bool can_access_state_array = []() {
785 KOKKOS_IF_ON_HOST(
786 (return SpaceAccessibility<DefaultHostExecutionSpace,
787 base_memory_space>::accessible;))
788 KOKKOS_IF_ON_DEVICE(
789 (return SpaceAccessibility<DefaultExecutionSpace,
790 base_memory_space>::accessible;))
791 }();
792
793 if (can_access_state_array) {
794 // Can access the state array
795
796 const uint32_t state =
797 ((uint32_t volatile *)m_sb_state_array)[sb_id * m_sb_state_size];
798
799 const uint32_t block_count_lg2 = state >> state_shift;
800 const uint32_t block_used = state & state_used_mask;
801
802 block_size = 1LU << (m_sb_size_lg2 - block_count_lg2);
803 block_count_capacity = 1LU << block_count_lg2;
804 block_count_used = block_used;
805 }
806 }
807};
808
809} // namespace Kokkos
810 // NOLINTEND(bugprone-implicit-widening-of-multiplication-result)
811
812#endif /* #ifndef KOKKOS_MEMORYPOOL_HPP */
Atomic functions.
Declaration of parallel operators.
A thread safe view to a bitset.
Memory management for host memory.
void * allocate(const ExecutionSpace &, const size_t arg_alloc_size) const
Allocate untracked memory in the space.
void deallocate(void *const arg_alloc_ptr, const size_t arg_alloc_size) const
Deallocate untracked memory in the space.
Access relationship between DstMemorySpace and SrcMemorySpace.