20#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
21#include <Kokkos_Macros.hpp>
23 "Including non-public Kokkos header files is not allowed.");
25#ifndef KOKKOS_PARALLEL_HPP
26#define KOKKOS_PARALLEL_HPP
28#include <Kokkos_Core_fwd.hpp>
29#include <Kokkos_DetectionIdiom.hpp>
30#include <Kokkos_ExecPolicy.hpp>
31#include <Kokkos_View.hpp>
33#include <impl/Kokkos_Tools.hpp>
34#include <impl/Kokkos_Tools_Generic.hpp>
36#include <impl/Kokkos_Traits.hpp>
37#include <impl/Kokkos_FunctorAnalysis.hpp>
50using execution_space_t =
typename T::execution_space;
53using device_type_t =
typename T::device_type;
64template <
class Functor,
class Policy>
73 !is_detected<execution_space_t, Policy>::value ||
74 !is_detected<execution_space_t, Functor>::value ||
75 std::is_same_v<policy_execution_space, functor_execution_space>,
76 "A policy with an execution space and a functor with an execution space "
77 "are given but the execution space types do not match!");
78 static_assert(!is_detected<execution_space_t, Policy>::value ||
79 !is_detected<device_type_t, Functor>::value ||
82 "A policy with an execution space and a functor with a device "
83 "type are given but the execution space types do not match!");
84 static_assert(!is_detected<device_type_t, Functor>::value ||
85 !is_detected<execution_space_t, Functor>::value ||
88 "A functor with both an execution space and device type is "
89 "given but their execution space types do not match!");
94 is_detected<device_type_t, Functor>::value,
98 execution_space_t,
Policy>;
131 class ExecPolicy,
class FunctorType,
132 class Enable = std::enable_if_t<is_execution_policy<ExecPolicy>::value>>
139 Kokkos::Tools::Impl::begin_parallel_for(policy,
functor,
str,
kpID);
143 Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
151template <
class ExecPolicy,
class FunctorType>
152inline void parallel_for(
153 const ExecPolicy& policy,
const FunctorType& functor,
154 std::enable_if_t<is_execution_policy<ExecPolicy>::value>* =
nullptr) {
155 Kokkos::parallel_for(
"", policy, functor);
158template <
class FunctorType>
159inline void parallel_for(
const std::string& str,
const size_t work_count,
160 const FunctorType& functor) {
161 using execution_space =
162 typename Impl::FunctorPolicyExecutionSpace<FunctorType,
163 void>::execution_space;
164 using policy = RangePolicy<execution_space>;
166 policy execution_policy = policy(0, work_count);
167 ::Kokkos::parallel_for(str, execution_policy, functor);
170template <
class FunctorType>
171inline void parallel_for(
const size_t work_count,
const FunctorType& functor) {
172 ::Kokkos::parallel_for(
"", work_count, functor);
177#include <Kokkos_Parallel_Reduce.hpp>
348template <
class ExecutionPolicy,
class FunctorType,
350 std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>>
356 Kokkos::Tools::Impl::begin_parallel_scan(policy,
functor,
str,
kpID);
360 Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
369template <
class ExecutionPolicy,
class FunctorType>
370inline void parallel_scan(
371 const ExecutionPolicy& policy,
const FunctorType& functor,
372 std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>* =
nullptr) {
373 ::Kokkos::parallel_scan(
"", policy, functor);
376template <
class FunctorType>
377inline void parallel_scan(
const std::string& str,
const size_t work_count,
378 const FunctorType& functor) {
379 using execution_space =
381 void>::execution_space;
385 policy execution_policy(0, work_count);
389template <
class FunctorType>
390inline void parallel_scan(
const size_t work_count,
const FunctorType& functor) {
391 ::Kokkos::parallel_scan(
"", work_count, functor);
394template <
class ExecutionPolicy,
class FunctorType,
class ReturnType,
396 std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>>
397inline void parallel_scan(
const std::string& str,
const ExecutionPolicy& policy,
398 const FunctorType& functor,
401 ExecutionPolicy inner_policy = policy;
402 Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID);
404 if constexpr (Kokkos::is_view<ReturnType>::value) {
406 Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
407 Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy,
408 typename ReturnType::value_type>>(
409 functor, inner_policy, return_value);
414 Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
415 Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy,
421 Kokkos::Tools::Impl::end_parallel_scan(inner_policy, functor, str, kpID);
423 if (!Kokkos::is_view<ReturnType>::value)
424 policy.space().fence(
425 "Kokkos::parallel_scan: fence due to result being a value, not a view");
428template <
class ExecutionPolicy,
class FunctorType,
class ReturnType>
430 const ExecutionPolicy& policy,
const FunctorType& functor,
432 std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>* =
nullptr) {
433 ::Kokkos::parallel_scan(
"", policy, functor, return_value);
436template <
class FunctorType,
class ReturnType>
437inline void parallel_scan(
const std::string& str,
const size_t work_count,
438 const FunctorType& functor,
440 using execution_space =
442 void>::execution_space;
446 policy execution_policy(0, work_count);
450template <
class FunctorType,
class ReturnType>
451inline void parallel_scan(
const size_t work_count,
const FunctorType& functor,
453 ::Kokkos::parallel_scan(
"", work_count, functor, return_value);
464template <
class FunctorType,
465 bool HasTeamShmemSize =
466 has_member_team_shmem_size<FunctorType>::value,
467 bool HasShmemSize = has_member_shmem_size<FunctorType>::value>
468struct FunctorTeamShmemSize {
469 KOKKOS_INLINE_FUNCTION
static size_t value(
const FunctorType&,
int) {
474template <
class FunctorType>
475struct FunctorTeamShmemSize<FunctorType, true, false> {
476 static inline size_t value(
const FunctorType& f,
int team_size) {
477 return f.team_shmem_size(team_size);
481template <
class FunctorType>
482struct FunctorTeamShmemSize<FunctorType, false, true> {
483 static inline size_t value(
const FunctorType& f,
int team_size) {
484 return f.shmem_size(team_size);
487template <
class FunctorType>
488struct FunctorTeamShmemSize<FunctorType, true, true> {
489 static inline size_t value(
const FunctorType& ,
int ) {
491 "Functor with both team_shmem_size and shmem_size defined is "
void parallel_scan(const std::string &str, const ExecutionPolicy &policy, const FunctorType &functor)
A thread safe view to a bitset.
Implementation of the ParallelFor operator that has a partial specialization for the device.
Implementation detail of parallel_scan.
Given a Functor and Execution Policy query an execution space.