Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_KokkosCounter.cpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
11#include "TpetraCore_config.h"
12#include "Kokkos_Core.hpp"
13#include "Teuchos_TestForException.hpp"
14#include <cstring>
15#include <string>
16
17namespace Tpetra {
18namespace Details {
19
20/***************************** Deep Copy *****************************/
21namespace DeepCopyCounterDetails {
22// Static variables
23bool is_initialized = true;
24size_t count_same = 0;
25size_t count_different = 0;
26bool count_active = false;
27
28void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, const char *dst_name, const void *dst_ptr,
29 Kokkos::Tools::SpaceHandle src_handle, const char *src_name, const void *src_ptr,
30 uint64_t size) {
31 if (count_active) {
32 if (strcmp(dst_handle.name, src_handle.name))
33 count_different++;
34 else
35 count_same++;
36 }
37}
38
39} // namespace DeepCopyCounterDetails
40
42 DeepCopyCounterDetails::count_active = true;
43 Kokkos::Tools::Experimental::set_begin_deep_copy_callback(DeepCopyCounterDetails::kokkosp_begin_deep_copy);
44}
45
47 DeepCopyCounterDetails::count_same = 0;
48 DeepCopyCounterDetails::count_different = 0;
49}
50
52 DeepCopyCounterDetails::count_active = false;
53}
54
56 return DeepCopyCounterDetails::count_same;
57}
58
60 return DeepCopyCounterDetails::count_different;
61}
62
63/***************************** Fence *****************************/
64
65namespace FenceCounterDetails {
66
67// Static variables
68bool is_initialized = false;
69bool count_active = false;
70std::vector<size_t> count_instance;
71std::vector<size_t> count_global;
72int num_devices = 0;
73
74void kokkosp_begin_fence(const char *name, const uint32_t deviceId,
75 uint64_t *handle) {
76 if (count_active) {
77 using namespace Kokkos::Tools::Experimental;
79
80 // Figure out what count bin to stick this in
81 int idx = (int)eid.type;
82 if (eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
83 count_global[idx]++;
84 else
85 count_instance[idx]++;
86 }
87}
88
89std::string get_label(int i) {
90 using namespace Kokkos::Tools::Experimental;
91 DeviceType i_type = devicetype_from_uint32t(i);
92 std::string device_label;
93 if (i_type == DeviceType::Serial)
94 device_label = "Serial";
95 else if (i_type == DeviceType::OpenMP)
96 device_label = "OpenMP";
97 else if (i_type == DeviceType::Cuda)
98 device_label = "Cuda";
99 else if (i_type == DeviceType::HIP)
100 device_label = "HIP";
101 else if (i_type == DeviceType::OpenMPTarget)
102 device_label = "OpenMPTarget";
103 else if (i_type == DeviceType::HPX)
104 device_label = "HPX";
105 else if (i_type == DeviceType::Threads)
106 device_label = "Threats";
107 else if (i_type == DeviceType::SYCL)
108 device_label = "SYCL";
109 else if (i_type == DeviceType::OpenACC)
110 device_label = "OpenACC";
111 else if (i_type == DeviceType::Unknown)
112 device_label = "Unknown";
113
114 return device_label;
115}
116
117void initialize() {
118 using namespace Kokkos::Tools::Experimental;
119 num_devices = (int)DeviceType::Unknown;
120 count_instance.resize(num_devices);
121 count_instance.assign(num_devices, 0);
122 count_global.resize(num_devices);
123 count_global.assign(num_devices, 0);
124 is_initialized = true;
125}
126
127} // namespace FenceCounterDetails
128
130 if (!FenceCounterDetails::is_initialized)
131 FenceCounterDetails::initialize();
132 FenceCounterDetails::count_active = true;
133 Kokkos::Tools::Experimental::set_begin_fence_callback(FenceCounterDetails::kokkosp_begin_fence);
134}
135
137 FenceCounterDetails::count_instance.assign(FenceCounterDetails::num_devices, 0);
138 FenceCounterDetails::count_global.assign(FenceCounterDetails::num_devices, 0);
139}
140
142 FenceCounterDetails::count_active = false;
143}
144
145size_t FenceCounter::get_count_global(const std::string &device) {
146 using namespace Kokkos::Tools::Experimental;
147 for (int i = 0; i < FenceCounterDetails::num_devices; i++) {
148 std::string device_label = FenceCounterDetails::get_label(i);
149
150 if (device == device_label)
151 return FenceCounterDetails::count_global[i];
152 }
153
154 // Haven't found a device by this name
155 TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, std::string("Error: ") + device + std::string(" is not a device known to Tpetra"));
156}
157
158size_t FenceCounter::get_count_instance(const std::string &device) {
159 using namespace Kokkos::Tools::Experimental;
160 for (int i = 0; i < FenceCounterDetails::num_devices; i++) {
161 std::string device_label = FenceCounterDetails::get_label(i);
162
163 if (device == device_label)
164 return FenceCounterDetails::count_instance[i];
165 }
166
167 // Haven't found a device by this name
168 TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, std::string("Error: ") + device + std::string(" is not a device known to Tpetra"));
169}
170
171namespace KokkosRegionCounterDetails {
172std::vector<std::string> regions;
173
174void push_region_callback(const char *label) { regions.push_back(label); }
175static_assert(std::is_same_v<decltype(&push_region_callback),
177 "Unexpected Kokkos profiling interface API. This is an internal "
178 "Tpetra developer error, please report this.");
179
180} // namespace KokkosRegionCounterDetails
181
183 Kokkos::Tools::Experimental::set_push_region_callback(
184 KokkosRegionCounterDetails::push_region_callback);
185}
186
188 KokkosRegionCounterDetails::regions.clear();
189}
190
192 Kokkos::Tools::Experimental::set_push_region_callback(nullptr);
193}
194
195size_t
197 size_t count = 0;
198 for (const auto &region : KokkosRegionCounterDetails::regions) {
199 count += (region.find(needle) != std::string::npos);
200 }
201 return count;
202}
203
204void KokkosRegionCounter::dump_regions(Teuchos::FancyOStream &os) {
205 for (const auto &region : KokkosRegionCounterDetails::regions) {
206 os << region << "\n";
207 }
208}
209
211 for (const auto &region : KokkosRegionCounterDetails::regions) {
212 os << region << "\n";
213 }
214}
215
216} // namespace Details
217} // namespace Tpetra
Declaration of various tools for counting Kokkos calls of various types using the Kokkos Profiling Li...
Struct that holds views of the contents of a CrsMatrix.
Implementation details of Tpetra.
size_t get_count_different_space()
Query the deep_copy counter for copies between different spaces.
size_t get_count_same_space()
Query the deep_copy counter for copies in the same space.
void stop()
Stop the deep_copy counter.
void start()
Start the deep_copy counter.
void reset()
Reset the deep_copy counter.
size_t get_count_instance(const std::string &device)
Query the fence counter for given device, for an exec_space_instance.fence()
size_t get_count_global(const std::string &device)
Query the fence counter for given device, for an Kokkos::fence()
void dump_regions(std::ostream &os)
Print all observed region labels, separated by newline.
size_t get_count_region_contains(const std::string &substr)
How many regions containing substr have been seen.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void initialize(int *argc, char ***argv)
Initialize Tpetra.