Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_ExecutionSpaces.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_EXECUTIONSPACES_HPP
11#define TPETRA_DETAILS_EXECUTIONSPACES_HPP
12
13#include <iostream>
14#include <sstream>
15#include <vector>
16
17#include <Kokkos_Core.hpp>
18
19#include <Teuchos_RCP.hpp>
20
23
40#define TPETRA_DETAILS_SPACES_THROW(x) \
41 { \
42 std::stringstream ss; \
43 ss << __FILE__ << ":" << __LINE__ << ": " << x; \
44 throw std::runtime_error(ss.str()); \
45 }
46
47namespace Tpetra {
48namespace Details {
49namespace Spaces {
50
56enum class Priority {
57 low = 0,
58 medium = 1,
59 high = 2,
60 NUM_LEVELS = 3 // not to be used as a priority
61};
62
63#if defined(KOKKOS_ENABLE_CUDA)
64inline void success_or_throw(cudaError_t err, const char *file,
65 const int line) {
66 if (err != cudaSuccess) {
67 std::stringstream ss;
68 ss << file << ":" << line << ": ";
70 throw std::runtime_error(ss.str());
71 }
72}
73#define TPETRA_DETAILS_SPACES_CUDA_RUNTIME(x) \
74 Tpetra::Details::Spaces::success_or_throw((x), __FILE__, __LINE__)
75#endif // KOKKOS_ENABLE_CUDA
76
83void lazy_init();
84
85#if defined(KOKKOS_ENABLE_CUDA)
86struct CudaInfo {
87 bool initialized_;
88 int lowPrio_;
89 int mediumPrio_; // same as CUDA default
90 int highPrio_;
91 cudaEvent_t execSpaceWaitEvent_; // see exec_space_wait
92
93 CudaInfo();
94 ~CudaInfo() = default; // execSpaceWaitEvent_ cleaned up by CUDA deinit
95 CudaInfo(const CudaInfo &other) = delete;
96 CudaInfo(CudaInfo &&other) = delete;
97};
98extern CudaInfo cudaInfo;
99#endif // KOKKOS_ENABLE_CUDA
100
101// Tpetra's managed spaces
102#if defined(KOKKOS_ENABLE_CUDA)
103template <typename Space>
104using IsCuda = std::enable_if_t<std::is_same_v<Space, Kokkos::Cuda>, bool>;
105template <typename Space>
106using NotCuda = std::enable_if_t<!std::is_same_v<Space, Kokkos::Cuda>, bool>;
107template <typename S1, typename S2>
108using BothCuda = std::enable_if_t<
109 std::is_same_v<S1, Kokkos::Cuda> && std::is_same_v<S2, Kokkos::Cuda>, bool>;
110template <typename S1, typename S2>
111using NotBothCuda = std::enable_if_t<!std::is_same_v<S1, Kokkos::Cuda> ||
112 !std::is_same_v<S2, Kokkos::Cuda>,
113 bool>;
114#endif // KOKKOS_ENABLE_CUDA
115
116#if defined(KOKKOS_ENABLE_SERIAL)
118template <typename Space>
119using IsSerial = std::enable_if_t<std::is_same_v<Space, Kokkos::Serial>, bool>;
120#endif // KOKKOS_ENABLE_SERIAL
121
122#if defined(KOKKOS_ENABLE_OPENMP)
124template <typename Space>
125using IsOpenMP = std::enable_if_t<std::is_same_v<Space, Kokkos::OpenMP>, bool>;
126#endif // KOKKOS_ENABLE_OPENMP
127
128#if defined(KOKKOS_ENABLE_HIP)
130template <typename Space>
131using IsHIP = std::enable_if_t<std::is_same_v<Space, Kokkos::HIP>, bool>;
132#endif // KOKKOS_ENABLE_HIP
133
134#if defined(KOKKOS_ENABLE_SYCL)
136template <typename Space>
137using IsSYCL = std::enable_if_t<std::is_same_v<Space, Kokkos::Experimental::SYCL>, bool>;
138#endif // KOKKOS_ENABLE_SYCL
139
145template <typename ExecSpace, Priority priority = Priority::medium
146#if defined(KOKKOS_ENABLE_CUDA)
147 ,
148 NotCuda<ExecSpace> = true
149#endif // KOKKOS_ENABLE_CUDA
150 >
152 return ExecSpace();
153}
154
161#if defined(KOKKOS_ENABLE_CUDA)
162template <typename ExecSpace, Priority priority = Priority::medium,
163 IsCuda<ExecSpace> = true>
164Kokkos::Cuda make_instance() {
165 lazy_init(); // CUDA priorities
167 int prio;
168 switch (priority) {
169 case Priority::high:
170 prio = cudaInfo.highPrio_;
171 break;
172 case Priority::medium:
173 prio = cudaInfo.mediumPrio_;
174 break;
175 case Priority::low:
176 prio = cudaInfo.lowPrio_;
177 break;
178 default:
179 throw std::runtime_error("unexpected static Tpetra Space priority");
180 }
183 return Kokkos::Cuda(stream, true /*Kokkos will manage this stream*/);
184}
185#endif // KOKKOS_ENABLE_CUDA
186
192template <typename ExecSpace>
194 switch (prio) {
195 case Priority::high:
197 case Priority::medium:
199 case Priority::low:
201 default:
202 throw std::runtime_error("unexpected dynamic Tpetra Space priority");
203 }
204}
205
217template <typename ExecSpace>
219 public:
221 using rcp_type = Teuchos::RCP<const execution_space>;
222
229 template <Priority priority = Priority::medium>
230 rcp_type space_instance(int i = 0) {
232 "Tpetra::Details::Spaces::space_instance");
233
234 constexpr int p = static_cast<int>(priority);
235 static_assert(p < sizeof(instances) / sizeof(instances[0]),
236 "Spaces::Priority enum error");
237
238 if (i < 0) {
239 TPETRA_DETAILS_SPACES_THROW("requested instance id " << i << " (< 0)");
240 }
242 TPETRA_DETAILS_SPACES_THROW(
243 "requested instance id "
245 << ") set by TPETRA_SPACES_ID_WARN_LIMIT");
246 }
247
248 // make sure we can store an exec space at index i for priority
249 // not sure what happens in RCP(), so let's explicitly make it null
250 while (size_t(i) >= instances[p].size()) {
251 instances[p].push_back(Teuchos::ENull());
252 }
253
254 /* no exec space instance i of priority p exists.
255 It may have never existed, or all Tpetra objects referencing it have been
256 destructed.
257
258 Create a new RCP<ExecSpace> and internally store a weak
259 reference, so this space will be destructed when all strong references to
260 it are gone, but we can still refer to it as long as it lives to prevent
261 recreating
262 */
263 if (instances[p][i].is_null() || !instances[p][i].is_valid_ptr()) {
264 // create a strong RCP to a space
265 rcp_type r = Teuchos::RCP<const execution_space>(
267
268 // store a weak RCP to the space
269 instances[p][i] = r.create_weak();
270
271 return r; // allow strong rcp to escape so internal weak one does not
272 // immediately go away
273 }
274
275 auto r = instances[p][i].create_strong();
276 return r;
277 }
278
283 for (int i = 0; i < static_cast<int>(Spaces::Priority::NUM_LEVELS); ++i) {
284 for (const rcp_type &rcp : instances[i]) {
285 if (rcp.is_valid_ptr() && !rcp.is_null()) {
286 // avoid throwing in dtor
287 std::cerr << __FILE__ << ":" << __LINE__
288 << " execution space instance survived to "
289 "~InstanceLifetimeManager. strong_count() = "
290 << rcp.strong_count()
291 << ". Did a Tpetra object live past Kokkos::finalize()?"
292 << std::endl;
293 }
294 }
295 }
296 }
297
298 private:
299 // one vector of instances for each priority level
300 std::vector<rcp_type>
301 instances[static_cast<int>(Spaces::Priority::NUM_LEVELS)];
302};
303
304#if defined(KOKKOS_ENABLE_CUDA)
306#endif
307#if defined(KOKKOS_ENABLE_SERIAL)
309#endif
310#if defined(KOKKOS_ENABLE_OPENMP)
312#endif
313#if defined(KOKKOS_ENABLE_HIP)
315#endif
316#if defined(KOKKOS_ENABLE_SYCL)
318#endif
319
320#if defined(KOKKOS_ENABLE_CUDA)
321
325template <typename ExecSpace, Priority priority = Priority::medium,
326 IsCuda<ExecSpace> = true>
327Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
328 return cudaSpaces.space_instance<priority>(i);
329}
330#endif
331
332#if defined(KOKKOS_ENABLE_SERIAL)
336template <typename ExecSpace, Priority priority = Priority::medium,
337 IsSerial<ExecSpace> = true>
338Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
339 return serialSpaces.space_instance<priority>(i);
340}
341#endif
342
343#if defined(KOKKOS_ENABLE_OPENMP)
347template <typename ExecSpace, Priority priority = Priority::medium,
348 IsOpenMP<ExecSpace> = true>
349Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
350 return openMPSpaces.space_instance<priority>(i);
351}
352#endif
353
354#if defined(KOKKOS_ENABLE_HIP)
357template <typename ExecSpace, Priority priority = Priority::medium,
358 IsHIP<ExecSpace> = true>
359Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
360 return HIPSpaces.space_instance<priority>(i);
361}
362#endif
363#if defined(KOKKOS_ENABLE_SYCL)
367template <typename ExecSpace, Priority priority = Priority::medium,
368 IsSYCL<ExecSpace> = true>
369Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
370 return SYCLSpaces.space_instance<priority>(i);
371}
372#endif
373
379template <typename ExecSpace>
380Teuchos::RCP<const ExecSpace> space_instance(const Priority &priority,
381 int i = 0) {
382 switch (priority) {
383 case Priority::high:
385 case Priority::medium:
387 case Priority::low:
389 default:
390 throw std::runtime_error(
391 "unexpected dynamic Tpetra Space priority in space_instance");
392 }
393}
394
408template <typename S1, typename S2
409#if defined(KOKKOS_ENABLE_CUDA)
410 ,
412#endif
413 >
414void exec_space_wait(const char *msg, const S1 &waitee, const S2 & /*waiter*/) {
416 "Tpetra::Details::Spaces::exec_space_wait");
417 lazy_init();
418 waitee.fence(msg);
419}
420
421#if defined(KOKKOS_ENABLE_CUDA)
422template <typename S1, typename S2, BothCuda<S1, S2> = true>
423void exec_space_wait(const char *msg, const S1 &waitee, const S2 &waiter) {
425 "Tpetra::Details::Spaces::exec_space_wait");
426 lazy_init();
427
428 // if they are the same instance, no sync needed
429 if (waitee.impl_instance_id() !=
430 waiter
431 .impl_instance_id()) { // TODO: use instance operator== once available
432 /* cudaStreamWaitEvent is not affected by later calls to cudaEventRecord,
433 even if it overwrites the state of a shared event this means we only need
434 one event even if many exec_space_waits are in flight at the same time
435 */
437 cudaEventRecord(cudaInfo.execSpaceWaitEvent_, waitee.cuda_stream()));
439 waiter.cuda_stream(), cudaInfo.execSpaceWaitEvent_, 0 /*flags*/));
440 }
441}
442#endif
443
444template <typename S1, typename S2>
445void exec_space_wait(const S1 &waitee, const S2 &waiter) {
447 "Tpetra::Details::Spaces::exec_space_wait");
448 lazy_init();
449 exec_space_wait("anonymous", waitee, waiter);
450}
451
452template <typename ExecutionSpace>
453constexpr KOKKOS_INLINE_FUNCTION bool is_gpu_exec_space() {
454 return false;
455}
456
457#if defined(KOKKOS_ENABLE_CUDA)
458template <>
459constexpr KOKKOS_INLINE_FUNCTION bool is_gpu_exec_space<Kokkos::Cuda>() {
460 return true;
461}
462#endif
463
464#if defined(KOKKOS_ENABLE_HIP)
465template <>
466constexpr KOKKOS_INLINE_FUNCTION bool
467is_gpu_exec_space<Kokkos::HIP>() {
468 return true;
469}
470#endif
471
472#if defined(KOKKOS_ENABLE_SYCL)
473template <>
474constexpr KOKKOS_INLINE_FUNCTION bool
475is_gpu_exec_space<Kokkos::Experimental::SYCL>() {
476 return true;
477}
478#endif
479
480} // namespace Spaces
481} // namespace Details
482} // namespace Tpetra
483
484#undef TPETRA_DETAILS_SPACES_THROW
485
486#endif // TPETRA_DETAILS_EXECUTIONSPACES_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
void exec_space_wait(const char *msg, const S1 &waitee, const S2 &)
cause future work submitted to waiter to wait for the current work in waitee to finish
ExecSpace make_instance()
Construct a Kokkos execution space instance with the following priority.
Teuchos::RCP< const ExecSpace > space_instance(const Priority &priority, int i=0)
get a strong Teuchos::RCP to Tpetra-managed Kokkos execution space instance i
Priority
Priority interface for Tpetra's managed execution spaces.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Struct that holds views of the contents of a CrsMatrix.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
Provides reusable Kokkos execution space instances.
~InstanceLifetimeManager()
Issue a warning if any Tpetra-managed execution space instances survive to the end of static lifetime...
rcp_type space_instance(int i=0)
Retrieve a strong Teuchos::RCP<const ExecSpace> to instance i
Implementation details of Tpetra.
Namespace Tpetra contains the class and methods constituting the Tpetra library.