Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_KokkosTeuchosTimerInjection.cpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
11#include "TpetraCore_config.h"
13#include "Kokkos_Core.hpp"
14#include "Teuchos_TimeMonitor.hpp"
15#include "Teuchos_Time.hpp"
16#include "Teuchos_RCP.hpp"
17#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
18#include "Teuchos_StackedTimer.hpp"
19#include <sstream>
20#endif
21#include <string>
22
24
25namespace {
26// Get a useful label from the deviceId
27// NOTE: Relevant code is in: kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
28std::string deviceIdToString(const uint32_t deviceId) {
29 using namespace Kokkos::Tools::Experimental;
30 std::string device_label("(");
31 ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId);
32 if (eid.type == DeviceType::Serial)
33 device_label += "Serial";
34 else if (eid.type == DeviceType::OpenMP)
35 device_label += "OpenMP";
36 else if (eid.type == DeviceType::Cuda)
37 device_label += "Cuda";
38 else if (eid.type == DeviceType::HIP)
39 device_label += "HIP";
40 else if (eid.type == DeviceType::OpenMPTarget)
41 device_label += "OpenMPTarget";
42 else if (eid.type == DeviceType::HPX)
43 device_label += "HPX";
44 else if (eid.type == DeviceType::Threads)
45 device_label += "Threads";
46 else if (eid.type == DeviceType::SYCL)
47 device_label += "SYCL";
48 else if (eid.type == DeviceType::OpenACC)
49 device_label += "OpenACC";
50 else if (eid.type == DeviceType::Unknown)
51 device_label += "Unknown";
52 else
53 device_label += "Unknown to Tpetra";
54 if (eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
55 device_label += " All Instances)";
56 else if (eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization))
57 device_label += " DeepCopyResource)";
58 else
59 device_label += " Instance " + std::to_string(eid.instance_id) + ")";
60
61 return device_label;
62}
63
64void overlappingWarning() {
65 std::ostringstream warning;
66 warning << "\n*********************************************************************\n"
67 "WARNING: Overlapping timers detected!\n"
68 "A TimeMonitor timer was stopped before a nested subtimer was\n"
69 "stopped. This is not allowed by the StackedTimer. This corner case\n"
70 "typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
71 "assigned to a new timer. To disable this warning, either fix the\n"
72 "ordering of timer creation and destuction or disable the StackedTimer\n";
73 std::cout << warning.str() << std::endl;
74}
75
76} // namespace
77
78namespace Tpetra {
79namespace Details {
80
81namespace DeepCopyTimerInjection {
82Teuchos::RCP<Teuchos::Time> timer_;
83std::string label_;
84bool initialized_ = false;
85
86void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr,
87 Kokkos::Tools::SpaceHandle src_handle, const char* src_name, const void* src_ptr,
88 uint64_t size) {
89 // In verbose mode, we add the src/dst names as well
90 std::string extra_label;
92 extra_label = std::string(" {") + src_name + "=>" + dst_name + "}";
94 extra_label = std::string(" {") + src_name + "=>" + dst_name + "," + std::to_string(size) + "}";
95 }
96
97 if (timer_ != Teuchos::null)
98 std::cout << "WARNING: Kokkos::deep_copy() started within another Kokkos::deep_copy(). Timers will be in error" << std::endl;
99
100 // If the src_name is "Scalar" or "(none)" then we're doing a "Fill" style copy from host to devices, which we want to record separately.
101 if (!strcmp(src_name, "Scalar") || !strcmp(src_name, "(none)"))
102 label_ = std::string("Kokkos::deep_copy_scalar [") + src_handle.name + "=>" + dst_handle.name + "]" + extra_label;
103 // If the size is under 65 bytes, we're going to flag this as "small" to make it easier to watch the big stuff
104 else if (size <= 64)
105 label_ = std::string("Kokkos::deep_copy_small [") + src_handle.name + "=>" + dst_handle.name + "]" + extra_label;
106 else
107 label_ = std::string("Kokkos::deep_copy [") + src_handle.name + "=>" + dst_handle.name + "]" + extra_label;
108
109#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
110 if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
111 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
112 stackedTimer->start(label_);
113 } else
114#endif
115 {
116 timer_ = Teuchos::TimeMonitor::getNewTimer(label_);
117 timer_->start();
118 timer_->incrementNumCalls();
119 }
120}
121
122void kokkosp_end_deep_copy() {
123#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
124 if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
125 try {
126 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
127 stackedTimer->stop(label_);
128 return;
129 } catch (std::runtime_error&) {
130 overlappingWarning();
131 Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
132 }
133 } else
134#endif
135 {
136 if (timer_ != Teuchos::null) {
137 timer_->stop();
138 }
139 timer_ = Teuchos::null;
140 }
141}
142
143} // namespace DeepCopyTimerInjection
144
145void AddKokkosDeepCopyToTimeMonitor(bool force) {
146 if (!DeepCopyTimerInjection::initialized_) {
148 Kokkos::Tools::Experimental::set_begin_deep_copy_callback(DeepCopyTimerInjection::kokkosp_begin_deep_copy);
149 Kokkos::Tools::Experimental::set_end_deep_copy_callback(DeepCopyTimerInjection::kokkosp_end_deep_copy);
150 DeepCopyTimerInjection::initialized_ = true;
151 }
152 }
153}
154
155namespace FenceTimerInjection {
156Teuchos::RCP<Teuchos::Time> timer_;
157bool initialized_ = false;
158uint64_t active_handle;
159std::string label_ = "";
160
161void kokkosp_begin_fence(const char* name, const uint32_t deviceId,
162 uint64_t* handle) {
163 // Nested fences are not allowed
164 if (!label_.empty())
165 return;
166
167 active_handle = (active_handle + 1) % 1024;
168 *handle = active_handle;
169
170 std::string device_label = deviceIdToString(deviceId);
171
172 label_ = std::string("Kokkos::fence ") + name + " " + device_label;
173
174#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
175 if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
176 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
177 stackedTimer->start(label_);
178 } else
179#endif
180 {
181 timer_ = Teuchos::TimeMonitor::getNewTimer(label_);
182 timer_->start();
183 timer_->incrementNumCalls();
184 }
185}
186
187void kokkosp_end_fence(const uint64_t handle) {
188 if (handle == active_handle) {
189#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
190 if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
191 try {
192 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
193 stackedTimer->stop(label_);
194 label_ = "";
195 return;
196 } catch (std::runtime_error&) {
197 overlappingWarning();
198 Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
199 }
200 } else
201#endif
202 {
203 if (timer_ != Teuchos::null) {
204 timer_->stop();
205 }
206 timer_ = Teuchos::null;
207 }
208 }
209 // Else: We've nested our fences, and we need to ignore the inner fences
210}
211
212} // namespace FenceTimerInjection
213
214void AddKokkosFenceToTimeMonitor(bool force) {
215 if (!FenceTimerInjection::initialized_) {
217 Kokkos::Tools::Experimental::set_begin_fence_callback(FenceTimerInjection::kokkosp_begin_fence);
218 Kokkos::Tools::Experimental::set_end_fence_callback(FenceTimerInjection::kokkosp_end_fence);
219 FenceTimerInjection::initialized_ = true;
220 }
221 }
222}
223
224namespace FunctionsTimerInjection {
225Teuchos::RCP<Teuchos::Time> timer_;
226bool initialized_ = false;
227std::string label_;
228
229void kokkosp_begin_kernel(const char* kernelName, const char* kernelPrefix, const uint32_t devID,
230 uint64_t* kernelID) {
231 // Nested fences are not allowed
232 if (timer_ != Teuchos::null)
233 return;
234 std::string device_label = deviceIdToString(devID);
235
236 label_ = std::string("Kokkos::") + kernelName + " " + kernelPrefix + " " + device_label;
237
238#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
239 if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
240 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
241 stackedTimer->start(label_);
242 } else
243#endif
244 {
245 timer_ = Teuchos::TimeMonitor::getNewTimer(label_);
246 timer_->start();
247 timer_->incrementNumCalls();
248 }
249}
250
251void kokkosp_begin_for(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
252 kokkosp_begin_kernel("parallel_for", kernelPrefix, devID, kernelID);
253}
254
255void kokkosp_begin_scan(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
256 kokkosp_begin_kernel("parallel_scan", kernelPrefix, devID, kernelID);
257}
258
259void kokkosp_begin_reduce(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
260 kokkosp_begin_kernel("parallel_reduce", kernelPrefix, devID, kernelID);
261}
262
263void kokkosp_end_kernel(const uint64_t handle) {
264#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
265 if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
266 try {
267 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
268 stackedTimer->stop(label_);
269 return;
270 } catch (std::runtime_error&) {
271 overlappingWarning();
272 Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
273 }
274 } else
275#endif
276 {
277 if (timer_ != Teuchos::null) {
278 timer_->stop();
279 }
280 timer_ = Teuchos::null;
281 }
282}
283} // namespace FunctionsTimerInjection
284
285void AddKokkosFunctionsToTimeMonitor(bool force) {
286 if (!FunctionsTimerInjection::initialized_) {
288 Kokkos::Tools::Experimental::set_begin_parallel_for_callback(FunctionsTimerInjection::kokkosp_begin_for);
289 Kokkos::Tools::Experimental::set_begin_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_begin_reduce);
290 Kokkos::Tools::Experimental::set_begin_parallel_scan_callback(FunctionsTimerInjection::kokkosp_begin_scan);
291
292 // The end-call is generic, even though the start-call is not.
293 Kokkos::Tools::Experimental::set_end_parallel_for_callback(FunctionsTimerInjection::kokkosp_end_kernel);
294 Kokkos::Tools::Experimental::set_end_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_end_kernel);
295 Kokkos::Tools::Experimental::set_end_parallel_scan_callback(FunctionsTimerInjection::kokkosp_end_kernel);
296 FunctionsTimerInjection::initialized_ = true;
297 }
298 }
299}
300
301} // namespace Details
302} // namespace Tpetra
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration functions that use Kokkos' profiling library to add deep copies between memory spaces,...
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size....
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination....
Implementation details of Tpetra.
Namespace Tpetra contains the class and methods constituting the Tpetra library.