Kokkos Core Kernels Package Version of the Day
Loading...
Searching...
No Matches
KokkosExp_MDRangePolicy.hpp
1//@HEADER
2// ************************************************************************
3//
4// Kokkos v. 4.0
5// Copyright (2022) National Technology & Engineering
6// Solutions of Sandia, LLC (NTESS).
7//
8// Under the terms of Contract DE-NA0003525 with NTESS,
9// the U.S. Government retains certain rights in this software.
10//
11// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12// See https://kokkos.org/LICENSE for license information.
13// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14//
15//@HEADER
16
17#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18#include <Kokkos_Macros.hpp>
19static_assert(false,
20 "Including non-public Kokkos header files is not allowed.");
21#endif
22#ifndef KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
23#define KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
24
25#include <initializer_list>
26
27#include <Kokkos_Layout.hpp>
28#include <Kokkos_Rank.hpp>
29#include <Kokkos_Array.hpp>
30#include <impl/KokkosExp_Host_IterateTile.hpp>
31#include <Kokkos_ExecPolicy.hpp>
32#include <type_traits>
33#include <cmath>
34
35namespace Kokkos {
36
37// ------------------------------------------------------------------ //
38// Moved to Kokkos_Layout.hpp for more general accessibility
39/*
40enum class Iterate
41{
42 Default, // Default for the device
43 Left, // Left indices stride fastest
44 Right, // Right indices stride fastest
45};
46*/
47
48template <typename ExecSpace>
49struct default_outer_direction {
50 using type = Iterate;
51 static constexpr Iterate value = Iterate::Right;
52};
53
54template <typename ExecSpace>
55struct default_inner_direction {
56 using type = Iterate;
57 static constexpr Iterate value = Iterate::Right;
58};
59
60namespace Impl {
61// NOTE the comparison below is encapsulated to silent warnings about pointless
62// comparison of unsigned integer with zero
63template <class T>
64constexpr std::enable_if_t<!std::is_signed_v<T>, bool>
65is_less_than_value_initialized_variable(T) {
66 return false;
67}
68
69template <class T>
70constexpr std::enable_if_t<std::is_signed_v<T>, bool>
71is_less_than_value_initialized_variable(T arg) {
72 return arg < T{};
73}
74
75// Checked narrowing conversion that calls abort if the cast changes the value
76template <class To, class From>
77constexpr To checked_narrow_cast(From arg, std::size_t idx) {
78 constexpr const bool is_different_signedness =
79 (std::is_signed_v<To> != std::is_signed_v<From>);
80 auto const ret = static_cast<To>(arg); // NOLINT(bugprone-signed-char-misuse)
81 if (static_cast<From>(ret) != arg ||
82 (is_different_signedness &&
83 is_less_than_value_initialized_variable(arg) !=
84 is_less_than_value_initialized_variable(ret))) {
85 auto msg =
86 "Kokkos::MDRangePolicy bound type error: an unsafe implicit conversion "
87 "is performed on a bound (" +
88 std::to_string(arg) + ") in dimension (" + std::to_string(idx) +
89 "), which may not preserve its original value.\n";
90 Kokkos::abort(msg.c_str());
91 }
92 return ret;
93}
94// NOTE prefer C array U[M] to std::initalizer_list<U> so that the number of
95// elements can be deduced (https://stackoverflow.com/q/40241370)
96// NOTE for some unfortunate reason the policy bounds are stored as signed
97// integer arrays (point_type which is Kokkos::Array<std::int64_t>) so we
98// specify the index type (actual policy index_type from the traits) and check
99// ahead of time that narrowing conversions will be safe.
100template <class IndexType, class Array, class U, std::size_t M>
101constexpr Array to_array_potentially_narrowing(const U (&init)[M]) {
102 using T = typename Array::value_type;
103 Array a{};
104 constexpr std::size_t N = a.size();
105 static_assert(M <= N);
106 auto* ptr = a.data();
107 // NOTE equivalent to
108 // std::transform(std::begin(init), std::end(init), a.data(),
109 // [](U x) { return static_cast<T>(x); });
110 // except that std::transform is not constexpr.
111 for (std::size_t i = 0; i < M; ++i) {
112 *ptr++ = checked_narrow_cast<T>(init[i], i);
113 (void)checked_narrow_cast<IndexType>(init[i], i); // see note above
114 }
115 return a;
116}
117
118// NOTE Making a copy even when std::is_same<Array, Kokkos::Array<U, M>>::value
119// is true to reduce code complexity. You may change this if you have a good
120// reason to. Intentionally not enabling std::array at this time but this may
121// change too.
122template <class IndexType, class NVCC_WONT_LET_ME_CALL_YOU_Array, class U,
123 std::size_t M>
124constexpr NVCC_WONT_LET_ME_CALL_YOU_Array to_array_potentially_narrowing(
125 Kokkos::Array<U, M> const& other) {
126 using T = typename NVCC_WONT_LET_ME_CALL_YOU_Array::value_type;
127 NVCC_WONT_LET_ME_CALL_YOU_Array a{};
128 constexpr std::size_t N = a.size();
129 static_assert(M <= N);
130 for (std::size_t i = 0; i < M; ++i) {
131 a[i] = checked_narrow_cast<T>(other[i], i);
132 (void)checked_narrow_cast<IndexType>(other[i], i); // see note above
133 }
134 return a;
135}
136
137struct TileSizeProperties {
138 int max_threads;
139 int default_largest_tile_size;
140 int default_tile_size;
141 int max_total_tile_size;
142};
143
144template <typename ExecutionSpace>
145TileSizeProperties get_tile_size_properties(const ExecutionSpace&) {
146 // Host settings
147 TileSizeProperties properties;
148 properties.max_threads = std::numeric_limits<int>::max();
149 properties.default_largest_tile_size = 0;
150 properties.default_tile_size = 2;
151 properties.max_total_tile_size = std::numeric_limits<int>::max();
152 return properties;
153}
154
155} // namespace Impl
156
157// multi-dimensional iteration pattern
158template <typename... Properties>
159struct MDRangePolicy;
160
161// Note: If MDRangePolicy has a primary template, implicit CTAD (deduction
162// guides) are generated -> MDRangePolicy<> by some compilers, which is
163// incorrect. By making it a template specialization instead, no implicit CTAD
164// is generated. This works because there has to be at least one property
165// specified (which is Rank<...>); otherwise, we'd get the static_assert
166// "Kokkos::Error: MD iteration pattern not defined". This template
167// specialization uses <P, Properties...> in all places for correctness.
168template <typename P, typename... Properties>
169struct MDRangePolicy<P, Properties...>
170 : public Kokkos::Impl::PolicyTraits<P, Properties...> {
171 using traits = Kokkos::Impl::PolicyTraits<P, Properties...>;
172 using range_policy = RangePolicy<P, Properties...>;
173
174 typename traits::execution_space m_space;
175
176 using impl_range_policy =
177 RangePolicy<typename traits::execution_space,
178 typename traits::schedule_type, typename traits::index_type>;
179
180 using execution_policy =
181 MDRangePolicy<P, Properties...>; // needed for is_execution_policy
182 // interrogation
183
184 template <class... OtherProperties>
185 friend struct MDRangePolicy;
186
187 static_assert(!std::is_void_v<typename traits::iteration_pattern>,
188 "Kokkos Error: MD iteration pattern not defined");
189
190 using iteration_pattern = typename traits::iteration_pattern;
191 using work_tag = typename traits::work_tag;
192 using launch_bounds = typename traits::launch_bounds;
193 using member_type = typename range_policy::member_type;
194
195 static constexpr int rank = iteration_pattern::rank;
196 static_assert(rank < 7, "Kokkos MDRangePolicy Error: Unsupported rank...");
197
198 using index_type = typename traits::index_type;
199 using array_index_type = std::int64_t;
200 using point_type = Kokkos::Array<array_index_type, rank>; // was index_type
202 // If point_type or tile_type is not templated on a signed integral type (if
203 // it is unsigned), then if user passes in intializer_list of
204 // runtime-determined values of signed integral type that are not const will
205 // receive a compiler error due to an invalid case for implicit conversion -
206 // "conversion from integer or unscoped enumeration type to integer type that
207 // cannot represent all values of the original, except where source is a
208 // constant expression whose value can be stored exactly in the target type"
209 // This would require the user to either pass a matching index_type parameter
210 // as template parameter to the MDRangePolicy or static_cast the individual
211 // values
212
213 point_type m_lower = {};
214 point_type m_upper = {};
215 tile_type m_tile = {};
216 point_type m_tile_end = {};
217 index_type m_num_tiles = 1;
218 index_type m_prod_tile_dims = 1;
219 bool m_tune_tile_size = false;
220
221 static constexpr auto outer_direction =
222 (iteration_pattern::outer_direction != Iterate::Default)
223 ? iteration_pattern::outer_direction
224 : default_outer_direction<typename traits::execution_space>::value;
225
226 static constexpr auto inner_direction =
227 iteration_pattern::inner_direction != Iterate::Default
228 ? iteration_pattern::inner_direction
229 : default_inner_direction<typename traits::execution_space>::value;
230
231 static constexpr auto Right = Iterate::Right;
232 static constexpr auto Left = Iterate::Left;
233
234 KOKKOS_INLINE_FUNCTION const typename traits::execution_space& space() const {
235 return m_space;
236 }
237
238 MDRangePolicy() = default;
239
240 template <typename LT, std::size_t LN, typename UT, std::size_t UN,
241 typename TT = array_index_type, std::size_t TN = rank,
242 typename = std::enable_if_t<std::is_integral_v<LT> &&
243 std::is_integral_v<UT> &&
244 std::is_integral_v<TT>>>
245 MDRangePolicy(const LT (&lower)[LN], const UT (&upper)[UN],
246 const TT (&tile)[TN] = {})
247 : MDRangePolicy(
248 Impl::to_array_potentially_narrowing<index_type, decltype(m_lower)>(
249 lower),
250 Impl::to_array_potentially_narrowing<index_type, decltype(m_upper)>(
251 upper),
252 Impl::to_array_potentially_narrowing<index_type, decltype(m_tile)>(
253 tile)) {
254 static_assert(
255 LN == rank && UN == rank && TN <= rank,
256 "MDRangePolicy: Constructor initializer lists have wrong size");
257 }
258
259 template <typename LT, std::size_t LN, typename UT, std::size_t UN,
260 typename TT = array_index_type, std::size_t TN = rank,
261 typename = std::enable_if_t<std::is_integral_v<LT> &&
262 std::is_integral_v<UT> &&
263 std::is_integral_v<TT>>>
264 MDRangePolicy(const typename traits::execution_space& work_space,
265 const LT (&lower)[LN], const UT (&upper)[UN],
266 const TT (&tile)[TN] = {})
267 : MDRangePolicy(
268 work_space,
269 Impl::to_array_potentially_narrowing<index_type, decltype(m_lower)>(
270 lower),
271 Impl::to_array_potentially_narrowing<index_type, decltype(m_upper)>(
272 upper),
273 Impl::to_array_potentially_narrowing<index_type, decltype(m_tile)>(
274 tile)) {
275 static_assert(
276 LN == rank && UN == rank && TN <= rank,
277 "MDRangePolicy: Constructor initializer lists have wrong size");
278 }
279
280 // NOTE: Keeping these two constructor despite the templated constructors
281 // from Kokkos arrays for backwards compability to allow construction from
282 // double-braced initializer lists.
283 MDRangePolicy(point_type const& lower, point_type const& upper,
284 tile_type const& tile = tile_type{})
285 : MDRangePolicy(typename traits::execution_space(), lower, upper, tile) {}
286
287 MDRangePolicy(const typename traits::execution_space& work_space,
288 point_type const& lower, point_type const& upper,
289 tile_type const& tile = tile_type{})
290 : m_space(work_space), m_lower(lower), m_upper(upper), m_tile(tile) {
291 init_helper(Impl::get_tile_size_properties(work_space));
292 }
293
294 template <typename T, std::size_t NT = rank,
295 typename = std::enable_if_t<std::is_integral_v<T>>>
296 MDRangePolicy(Kokkos::Array<T, rank> const& lower,
297 Kokkos::Array<T, rank> const& upper,
299 : MDRangePolicy(typename traits::execution_space(), lower, upper, tile) {}
300
301 template <typename T, std::size_t NT = rank,
302 typename = std::enable_if_t<std::is_integral_v<T>>>
303 MDRangePolicy(const typename traits::execution_space& work_space,
304 Kokkos::Array<T, rank> const& lower,
305 Kokkos::Array<T, rank> const& upper,
307 : MDRangePolicy(
308 work_space,
309 Impl::to_array_potentially_narrowing<index_type, decltype(m_lower)>(
310 lower),
311 Impl::to_array_potentially_narrowing<index_type, decltype(m_upper)>(
312 upper),
313 Impl::to_array_potentially_narrowing<index_type, decltype(m_tile)>(
314 tile)) {}
315
316 template <class... OtherProperties>
317 MDRangePolicy(const MDRangePolicy<OtherProperties...> p)
318 : traits(p), // base class may contain data such as desired occupancy
319 m_space(p.m_space),
320 m_lower(p.m_lower),
321 m_upper(p.m_upper),
322 m_tile(p.m_tile),
323 m_tile_end(p.m_tile_end),
324 m_num_tiles(p.m_num_tiles),
325 m_prod_tile_dims(p.m_prod_tile_dims),
326 m_tune_tile_size(p.m_tune_tile_size) {}
327
328 void impl_change_tile_size(const point_type& tile) {
329 m_tile = tile;
330 init_helper(Impl::get_tile_size_properties(m_space));
331 }
332 bool impl_tune_tile_size() const { return m_tune_tile_size; }
333
334 tile_type tile_size_recommended() const {
335 tile_type rec_tile_sizes = {};
336
337 for (std::size_t i = 0; i < rec_tile_sizes.size(); ++i) {
338 rec_tile_sizes[i] = tile_size_recommended(i);
339 }
340 return rec_tile_sizes;
341 }
342
343 int max_total_tile_size() const {
344 return Impl::get_tile_size_properties(m_space).max_total_tile_size;
345 }
346
347 private:
348 int tile_size_recommended(const int tile_rank) const {
349 auto properties = Impl::get_tile_size_properties(m_space);
350 int last_rank = (inner_direction == Iterate::Right) ? rank - 1 : 0;
351 int rank_acc =
352 (inner_direction == Iterate::Right) ? tile_rank + 1 : tile_rank - 1;
353 int rec_tile_size = (std::pow(properties.default_tile_size, rank_acc) <
354 properties.max_total_tile_size)
355 ? properties.default_tile_size
356 : 1;
357
358 if (tile_rank == last_rank) {
359 rec_tile_size = tile_size_last_rank(
360 properties, m_upper[last_rank] - m_lower[last_rank]);
361 }
362 return rec_tile_size;
363 }
364
365 int tile_size_last_rank(const Impl::TileSizeProperties properties,
366 const index_type length) const {
367 return properties.default_largest_tile_size == 0
368 ? std::max<int>(length, 1)
369 : properties.default_largest_tile_size;
370 }
371
372 void init_helper(Impl::TileSizeProperties properties) {
373 m_prod_tile_dims = 1;
374 int increment = 1;
375 int rank_start = 0;
376 int rank_end = rank;
377 if (inner_direction == Iterate::Right) {
378 increment = -1;
379 rank_start = rank - 1;
380 rank_end = -1;
381 }
382
383 for (int i = rank_start; i != rank_end; i += increment) {
384 const index_type length = m_upper[i] - m_lower[i];
385
386 if (m_upper[i] < m_lower[i]) {
387 std::string msg =
388 "Kokkos::MDRangePolicy bounds error: The lower bound (" +
389 std::to_string(m_lower[i]) + ") is greater than its upper bound (" +
390 std::to_string(m_upper[i]) + ") in dimension " + std::to_string(i) +
391 ".\n";
392#if !defined(KOKKOS_ENABLE_DEPRECATED_CODE_4)
393 Kokkos::abort(msg.c_str());
394#elif defined(KOKKOS_ENABLE_DEPRECATION_WARNINGS)
395 Kokkos::Impl::log_warning(msg);
396#endif
397 }
398
399 if (m_tile[i] <= 0) {
400 m_tune_tile_size = true;
401 if ((inner_direction == Iterate::Right && (i < rank - 1)) ||
402 (inner_direction == Iterate::Left && (i > 0))) {
403 if (m_prod_tile_dims * properties.default_tile_size <
404 static_cast<index_type>(properties.max_total_tile_size)) {
405 m_tile[i] = properties.default_tile_size;
406 } else {
407 m_tile[i] = 1;
408 }
409 } else {
410 m_tile[i] = tile_size_last_rank(properties, length);
411 }
412 }
413 m_tile_end[i] =
414 static_cast<index_type>((length + m_tile[i] - 1) / m_tile[i]);
415 m_num_tiles *= m_tile_end[i];
416 m_prod_tile_dims *= m_tile[i];
417 }
418 if (m_prod_tile_dims > static_cast<index_type>(properties.max_threads)) {
419 printf(" Product of tile dimensions exceed maximum limit: %d\n",
420 static_cast<int>(properties.max_threads));
421 Kokkos::abort(
422 "ExecSpace Error: MDRange tile dims exceed maximum number "
423 "of threads per block - choose smaller tile dims");
424 }
425 }
426};
427
428template <typename LT, size_t N, typename UT>
429MDRangePolicy(const LT (&)[N], const UT (&)[N]) -> MDRangePolicy<Rank<N>>;
430
431template <typename LT, size_t N, typename UT, typename TT, size_t TN>
432MDRangePolicy(const LT (&)[N], const UT (&)[N], const TT (&)[TN])
433 -> MDRangePolicy<Rank<N>>;
434
435template <typename LT, size_t N, typename UT>
436MDRangePolicy(DefaultExecutionSpace const&, const LT (&)[N], const UT (&)[N])
437 -> MDRangePolicy<Rank<N>>;
438
439template <typename LT, size_t N, typename UT, typename TT, size_t TN>
440MDRangePolicy(DefaultExecutionSpace const&, const LT (&)[N], const UT (&)[N],
441 const TT (&)[TN]) -> MDRangePolicy<Rank<N>>;
442
443template <typename ES, typename LT, size_t N, typename UT,
444 typename = std::enable_if_t<is_execution_space_v<ES>>>
445MDRangePolicy(ES const&, const LT (&)[N], const UT (&)[N])
446 -> MDRangePolicy<ES, Rank<N>>;
447
448template <typename ES, typename LT, size_t N, typename UT, typename TT,
449 size_t TN, typename = std::enable_if_t<is_execution_space_v<ES>>>
450MDRangePolicy(ES const&, const LT (&)[N], const UT (&)[N], const TT (&)[TN])
451 -> MDRangePolicy<ES, Rank<N>>;
452
453template <typename T, size_t N>
454MDRangePolicy(Array<T, N> const&, Array<T, N> const&) -> MDRangePolicy<Rank<N>>;
455
456template <typename T, size_t N, size_t NT>
457MDRangePolicy(Array<T, N> const&, Array<T, N> const&, Array<T, NT> const&)
458 -> MDRangePolicy<Rank<N>>;
459
460template <typename T, size_t N>
461MDRangePolicy(DefaultExecutionSpace const&, Array<T, N> const&,
462 Array<T, N> const&) -> MDRangePolicy<Rank<N>>;
463
464template <typename T, size_t N, size_t NT>
465MDRangePolicy(DefaultExecutionSpace const&, Array<T, N> const&,
466 Array<T, N> const&, Array<T, NT> const&)
467 -> MDRangePolicy<Rank<N>>;
468
469template <typename ES, typename T, size_t N,
470 typename = std::enable_if_t<is_execution_space_v<ES>>>
471MDRangePolicy(ES const&, Array<T, N> const&, Array<T, N> const&)
472 -> MDRangePolicy<ES, Rank<N>>;
473
474template <typename ES, typename T, size_t N, size_t NT,
475 typename = std::enable_if_t<is_execution_space_v<ES>>>
476MDRangePolicy(ES const&, Array<T, N> const&, Array<T, N> const&,
477 Array<T, NT> const&) -> MDRangePolicy<ES, Rank<N>>;
478
479} // namespace Kokkos
480
481#endif // KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
Declaration of various MemoryLayout options.
A thread safe view to a bitset.
KOKKOS_FORCEINLINE_FUNCTION unsigned size() const