Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_ExecutionSpaces.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_EXECUTIONSPACES_HPP
11#define TPETRA_DETAILS_EXECUTIONSPACES_HPP
12
13#include <iostream>
14#include <sstream>
15#include <vector>
16
17#include <Kokkos_Core.hpp>
18
19#include <Teuchos_RCP.hpp>
20
23
40#define TPETRA_DETAILS_SPACES_THROW(x) \
41 { \
42 std::stringstream ss; \
43 ss << __FILE__ << ":" << __LINE__ << ": " << x; \
44 throw std::runtime_error(ss.str()); \
45 }
46
47namespace Tpetra {
48namespace Details {
49namespace Spaces {
50
56enum class Priority {
57 low = 0,
58 medium = 1,
59 high = 2,
60 NUM_LEVELS = 3 // not to be used as a priority
61};
62
63#if defined(KOKKOS_ENABLE_CUDA)
64inline void success_or_throw(cudaError_t err, const char *file,
65 const int line) {
66 if (err != cudaSuccess) {
67 std::stringstream ss;
68 ss << file << ":" << line << ": ";
70 throw std::runtime_error(ss.str());
71 }
72}
73#define TPETRA_DETAILS_SPACES_CUDA_RUNTIME(x) \
74 Tpetra::Details::Spaces::success_or_throw((x), __FILE__, __LINE__)
75#endif // KOKKOS_ENABLE_CUDA
76
83void lazy_init();
84
85#if defined(KOKKOS_ENABLE_CUDA)
86struct CudaInfo {
87 bool initialized_;
88 int lowPrio_;
89 int mediumPrio_; // same as CUDA default
90 int highPrio_;
91 cudaEvent_t execSpaceWaitEvent_; // see exec_space_wait
92
93 CudaInfo();
94 ~CudaInfo() = default; // execSpaceWaitEvent_ cleaned up by CUDA deinit
95 CudaInfo(const CudaInfo &other) = delete;
96 CudaInfo(CudaInfo &&other) = delete;
97};
98extern CudaInfo cudaInfo;
99#endif // KOKKOS_ENABLE_CUDA
100
101// Tpetra's managed spaces
102#if defined(KOKKOS_ENABLE_CUDA)
103template <typename Space>
104using IsCuda = std::enable_if_t<std::is_same_v<Space, Kokkos::Cuda>, bool>;
105template <typename Space>
106using NotCuda = std::enable_if_t<!std::is_same_v<Space, Kokkos::Cuda>, bool>;
107template <typename S1, typename S2>
108using BothCuda = std::enable_if_t<
109 std::is_same_v<S1, Kokkos::Cuda> && std::is_same_v<S2, Kokkos::Cuda>, bool>;
110template <typename S1, typename S2>
111using NotBothCuda = std::enable_if_t<!std::is_same_v<S1, Kokkos::Cuda> ||
112 !std::is_same_v<S2, Kokkos::Cuda>,
113 bool>;
114#endif // KOKKOS_ENABLE_CUDA
115
116#if defined(KOKKOS_ENABLE_SERIAL)
118template <typename Space>
119using IsSerial = std::enable_if_t<std::is_same_v<Space, Kokkos::Serial>, bool>;
120#endif // KOKKOS_ENABLE_SERIAL
121
122#if defined(KOKKOS_ENABLE_OPENMP)
124template <typename Space>
125using IsOpenMP = std::enable_if_t<std::is_same_v<Space, Kokkos::OpenMP>, bool>;
126#endif // KOKKOS_ENABLE_OPENMP
127
128#if defined(KOKKOS_ENABLE_HIP)
130template <typename Space>
131using IsHIP = std::enable_if_t<std::is_same_v<Space, Kokkos::HIP>, bool>;
132#endif // KOKKOS_ENABLE_HIP
133
134#if defined(KOKKOS_ENABLE_SYCL)
136template <typename Space>
137using IsSYCL = std::enable_if_t<std::is_same_v<Space, Kokkos::Experimental::SYCL>, bool>;
138#endif // KOKKOS_ENABLE_SYCL
139
145template <typename ExecSpace, Priority priority = Priority::medium
146#if defined(KOKKOS_ENABLE_CUDA)
147 ,
148 NotCuda<ExecSpace> = true
149#endif // KOKKOS_ENABLE_CUDA
150 >
152 return ExecSpace();
153}
154
161#if defined(KOKKOS_ENABLE_CUDA)
162template <typename ExecSpace, Priority priority = Priority::medium,
163 IsCuda<ExecSpace> = true>
164Kokkos::Cuda make_instance() {
165 lazy_init(); // CUDA priorities
167 int prio;
168 switch (priority) {
169 case Priority::high:
170 prio = cudaInfo.highPrio_;
171 break;
172 case Priority::medium:
173 prio = cudaInfo.mediumPrio_;
174 break;
175 case Priority::low:
176 prio = cudaInfo.lowPrio_;
177 break;
178 default:
179 throw std::runtime_error("unexpected static Tpetra Space priority");
180 }
183
184 Kokkos::push_finalize_hook([=] {
185 if (stream != nullptr) {
188 }
189 });
190
191 return Kokkos::Cuda(stream);
192}
193#endif // KOKKOS_ENABLE_CUDA
194
200template <typename ExecSpace>
202 switch (prio) {
203 case Priority::high:
205 case Priority::medium:
207 case Priority::low:
209 default:
210 throw std::runtime_error("unexpected dynamic Tpetra Space priority");
211 }
212}
213
225template <typename ExecSpace>
227 public:
229 using rcp_type = Teuchos::RCP<const execution_space>;
230
237 template <Priority priority = Priority::medium>
238 rcp_type space_instance(int i = 0) {
240 "Tpetra::Details::Spaces::space_instance");
241
242 constexpr int p = static_cast<int>(priority);
243 static_assert(p < sizeof(instances) / sizeof(instances[0]),
244 "Spaces::Priority enum error");
245
246 if (i < 0) {
247 TPETRA_DETAILS_SPACES_THROW("requested instance id " << i << " (< 0)");
248 }
250 TPETRA_DETAILS_SPACES_THROW(
251 "requested instance id "
253 << ") set by TPETRA_SPACES_ID_WARN_LIMIT");
254 }
255
256 // make sure we can store an exec space at index i for priority
257 // not sure what happens in RCP(), so let's explicitly make it null
258 while (size_t(i) >= instances[p].size()) {
259 instances[p].push_back(Teuchos::ENull());
260 }
261
262 /* no exec space instance i of priority p exists.
263 It may have never existed, or all Tpetra objects referencing it have been
264 destructed.
265
266 Create a new RCP<ExecSpace> and internally store a weak
267 reference, so this space will be destructed when all strong references to
268 it are gone, but we can still refer to it as long as it lives to prevent
269 recreating
270 */
271 if (instances[p][i].is_null() || !instances[p][i].is_valid_ptr()) {
272 // create a strong RCP to a space
273 rcp_type r = Teuchos::RCP<const execution_space>(
275
276 // store a weak RCP to the space
277 instances[p][i] = r.create_weak();
278
279 return r; // allow strong rcp to escape so internal weak one does not
280 // immediately go away
281 }
282
283 auto r = instances[p][i].create_strong();
284 return r;
285 }
286
291 for (int i = 0; i < static_cast<int>(Spaces::Priority::NUM_LEVELS); ++i) {
292 for (const rcp_type &rcp : instances[i]) {
293 if (rcp.is_valid_ptr() && !rcp.is_null()) {
294 // avoid throwing in dtor
295 std::cerr << __FILE__ << ":" << __LINE__
296 << " execution space instance survived to "
297 "~InstanceLifetimeManager. strong_count() = "
298 << rcp.strong_count()
299 << ". Did a Tpetra object live past Kokkos::finalize()?"
300 << std::endl;
301 }
302 }
303 }
304 }
305
306 private:
307 // one vector of instances for each priority level
308 std::vector<rcp_type>
309 instances[static_cast<int>(Spaces::Priority::NUM_LEVELS)];
310};
311
312#if defined(KOKKOS_ENABLE_CUDA)
314#endif
315#if defined(KOKKOS_ENABLE_SERIAL)
317#endif
318#if defined(KOKKOS_ENABLE_OPENMP)
320#endif
321#if defined(KOKKOS_ENABLE_HIP)
323#endif
324#if defined(KOKKOS_ENABLE_SYCL)
326#endif
327
328#if defined(KOKKOS_ENABLE_CUDA)
329
333template <typename ExecSpace, Priority priority = Priority::medium,
334 IsCuda<ExecSpace> = true>
335Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
336 return cudaSpaces.space_instance<priority>(i);
337}
338#endif
339
340#if defined(KOKKOS_ENABLE_SERIAL)
344template <typename ExecSpace, Priority priority = Priority::medium,
345 IsSerial<ExecSpace> = true>
346Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
347 return serialSpaces.space_instance<priority>(i);
348}
349#endif
350
351#if defined(KOKKOS_ENABLE_OPENMP)
355template <typename ExecSpace, Priority priority = Priority::medium,
356 IsOpenMP<ExecSpace> = true>
357Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
358 return openMPSpaces.space_instance<priority>(i);
359}
360#endif
361
362#if defined(KOKKOS_ENABLE_HIP)
365template <typename ExecSpace, Priority priority = Priority::medium,
366 IsHIP<ExecSpace> = true>
367Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
368 return HIPSpaces.space_instance<priority>(i);
369}
370#endif
371#if defined(KOKKOS_ENABLE_SYCL)
375template <typename ExecSpace, Priority priority = Priority::medium,
376 IsSYCL<ExecSpace> = true>
377Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
378 return SYCLSpaces.space_instance<priority>(i);
379}
380#endif
381
388template <typename ExecSpace>
389Teuchos::RCP<const ExecSpace> space_instance(const Priority &priority,
390 int i = 0) {
391 switch (priority) {
392 case Priority::high:
394 case Priority::medium:
396 case Priority::low:
398 default:
399 throw std::runtime_error(
400 "unexpected dynamic Tpetra Space priority in space_instance");
401 }
402}
403
417template <typename S1, typename S2
418#if defined(KOKKOS_ENABLE_CUDA)
419 ,
421#endif
422 >
423void exec_space_wait(const char *msg, const S1 &waitee, const S2 & /*waiter*/) {
425 "Tpetra::Details::Spaces::exec_space_wait");
426 lazy_init();
427 waitee.fence(msg);
428}
429
430#if defined(KOKKOS_ENABLE_CUDA)
431template <typename S1, typename S2, BothCuda<S1, S2> = true>
432void exec_space_wait(const char *msg, const S1 &waitee, const S2 &waiter) {
434 "Tpetra::Details::Spaces::exec_space_wait");
435 lazy_init();
436
437 // if they are the same instance, no sync needed
438 if (waitee.impl_instance_id() !=
439 waiter
440 .impl_instance_id()) { // TODO: use instance operator== once available
441 /* cudaStreamWaitEvent is not affected by later calls to cudaEventRecord,
442 even if it overwrites the state of a shared event this means we only need
443 one event even if many exec_space_waits are in flight at the same time
444 */
446 cudaEventRecord(cudaInfo.execSpaceWaitEvent_, waitee.cuda_stream()));
448 waiter.cuda_stream(), cudaInfo.execSpaceWaitEvent_, 0 /*flags*/));
449 }
450}
451#endif
452
453template <typename S1, typename S2>
454void exec_space_wait(const S1 &waitee, const S2 &waiter) {
456 "Tpetra::Details::Spaces::exec_space_wait");
457 lazy_init();
458 exec_space_wait("anonymous", waitee, waiter);
459}
460
461template <typename ExecutionSpace>
462constexpr KOKKOS_INLINE_FUNCTION bool is_gpu_exec_space() {
463 return false;
464}
465
466#if defined(KOKKOS_ENABLE_CUDA)
467template <>
468constexpr KOKKOS_INLINE_FUNCTION bool is_gpu_exec_space<Kokkos::Cuda>() {
469 return true;
470}
471#endif
472
473#if defined(KOKKOS_ENABLE_HIP)
474template <>
475constexpr KOKKOS_INLINE_FUNCTION bool
476is_gpu_exec_space<Kokkos::HIP>() {
477 return true;
478}
479#endif
480
481#if defined(KOKKOS_ENABLE_SYCL)
482template <>
483constexpr KOKKOS_INLINE_FUNCTION bool
484is_gpu_exec_space<Kokkos::Experimental::SYCL>() {
485 return true;
486}
487#endif
488
489} // namespace Spaces
490} // namespace Details
491} // namespace Tpetra
492
493#undef TPETRA_DETAILS_SPACES_THROW
494
495#endif // TPETRA_DETAILS_EXECUTIONSPACES_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
void exec_space_wait(const char *msg, const S1 &waitee, const S2 &)
cause future work submitted to waiter to wait for the current work in waitee to finish
ExecSpace make_instance()
Construct a Kokkos execution space instance with the following priority.
Teuchos::RCP< const ExecSpace > space_instance(const Priority &priority, int i=0)
get a strong Teuchos::RCP to Tpetra-managed Kokkos execution space instance
Priority
Priority interface for Tpetra's managed execution spaces.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Struct that holds views of the contents of a CrsMatrix.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
Provides reusable Kokkos execution space instances.
~InstanceLifetimeManager()
Issue a warning if any Tpetra-managed execution space instances survive to the end of static lifetime...
rcp_type space_instance(int i=0)
Retrieve a strong Teuchos::RCP<const ExecSpace> to instance i
Implementation details of Tpetra.
Namespace Tpetra contains the class and methods constituting the Tpetra library.