Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_Behavior.cpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#include "Teuchos_EnvVariables.hpp"
11#include "Teuchos_OrdinalTraits.hpp"
12#include "Teuchos_TestForException.hpp"
13#include "TpetraCore_config.h"
15#include "KokkosKernels_config.h" // for TPL enable macros
16#include <array>
17#include <functional>
18#include <map>
19
54// environ should be available on posix platforms
55#if not(defined(WIN) && (_MSC_VER >= 1900))
56// needs to be in the global namespace
57extern char **environ;
58#endif
59
60namespace Tpetra {
61namespace Details {
62
63namespace BehaviorDetails {
64
65constexpr const std::string_view RESERVED_PREFIX = "TPETRA_";
66constexpr const std::string_view ASSUME_GPU_AWARE_MPI =
67 "TPETRA_ASSUME_GPU_AWARE_MPI";
68constexpr const std::string_view CUDA_LAUNCH_BLOCKING = "CUDA_LAUNCH_BLOCKING";
69constexpr const std::string_view MM_TAFC_OptimizationCoreCount =
70 "MM_TAFC_OptimizationCoreCount";
71constexpr const std::string_view VERBOSE_PRINT_COUNT_THRESHOLD =
72 "TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD";
73constexpr const std::string_view ROW_IMBALANCE_THRESHOLD =
74 "TPETRA_ROW_IMBALANCE_THRESHOLD";
75constexpr const std::string_view MULTIVECTOR_USE_MERGE_PATH =
76 "TPETRA_MULTIVECTOR_USE_MERGE_PATH";
77constexpr const std::string_view VECTOR_DEVICE_THRESHOLD =
78 "TPETRA_VECTOR_DEVICE_THRESHOLD";
79constexpr const std::string_view HIERARCHICAL_UNPACK_BATCH_SIZE =
80 "TPETRA_HIERARCHICAL_UNPACK_BATCH_SIZE";
81constexpr const std::string_view HIERARCHICAL_UNPACK_TEAM_SIZE =
82 "TPETRA_HIERARCHICAL_UNPACK_TEAM_SIZE";
83constexpr const std::string_view USE_TEUCHOS_TIMERS =
84 "TPETRA_USE_TEUCHOS_TIMERS";
85constexpr const std::string_view USE_KOKKOS_PROFILING =
86 "TPETRA_USE_KOKKOS_PROFILING";
87constexpr const std::string_view DEBUG = "TPETRA_DEBUG";
88constexpr const std::string_view VERBOSE = "TPETRA_VERBOSE";
89constexpr const std::string_view TIMING = "TPETRA_TIMING";
90constexpr const std::string_view HIERARCHICAL_UNPACK =
91 "TPETRA_HIERARCHICAL_UNPACK";
92constexpr const std::string_view SKIP_COPY_AND_PERMUTE =
93 "TPETRA_SKIP_COPY_AND_PERMUTE";
94constexpr const std::string_view FUSED_RESIDUAL = "TPETRA_FUSED_RESIDUAL";
95constexpr const std::string_view OVERLAP = "TPETRA_OVERLAP";
96constexpr const std::string_view DEFAULT_SEND_TYPE = "TPETRA_DEFAULT_SEND_TYPE";
97constexpr const std::string_view GRANULAR_TRANSFERS = "TPETRA_GRANULAR_TRANSFERS";
98constexpr const std::string_view SPACES_ID_WARN_LIMIT =
99 "TPETRA_SPACES_ID_WARN_LIMIT";
100constexpr const std::string_view TIME_KOKKOS_DEEP_COPY =
101 "TPETRA_TIME_KOKKOS_DEEP_COPY";
102constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE1 =
103 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE1";
104constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE2 =
105 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE2";
106constexpr const std::string_view TIME_KOKKOS_FENCE = "TPETRA_TIME_KOKKOS_FENCE";
107constexpr const std::string_view TIME_KOKKOS_FUNCTIONS =
108 "TPETRA_TIME_KOKKOS_FUNCTIONS";
109
110// construct an std::array of string_view with any number of provided
111// string_views
112template <typename... Elems>
113constexpr std::array<std::string_view, sizeof...(Elems)>
114make_array(Elems &&...elems) {
115 return {std::forward<Elems>(elems)...};
116}
117
118constexpr const auto RECOGNIZED_VARS = make_array(
119 ASSUME_GPU_AWARE_MPI, CUDA_LAUNCH_BLOCKING, MM_TAFC_OptimizationCoreCount,
120 VERBOSE_PRINT_COUNT_THRESHOLD, ROW_IMBALANCE_THRESHOLD,
121 MULTIVECTOR_USE_MERGE_PATH, VECTOR_DEVICE_THRESHOLD,
122 HIERARCHICAL_UNPACK_BATCH_SIZE, HIERARCHICAL_UNPACK_TEAM_SIZE,
123 USE_TEUCHOS_TIMERS, USE_KOKKOS_PROFILING, DEBUG, VERBOSE, TIMING,
124 HIERARCHICAL_UNPACK, SKIP_COPY_AND_PERMUTE, FUSED_RESIDUAL, OVERLAP,
125 DEFAULT_SEND_TYPE, GRANULAR_TRANSFERS,
126 SPACES_ID_WARN_LIMIT, TIME_KOKKOS_DEEP_COPY, TIME_KOKKOS_DEEP_COPY_VERBOSE1,
127 TIME_KOKKOS_DEEP_COPY_VERBOSE2, TIME_KOKKOS_FENCE, TIME_KOKKOS_FUNCTIONS);
128
129std::map<std::string, std::map<std::string, bool> > namedVariableMap_;
130bool verboseDisabled_ = false;
131bool timingDisabled_ = false;
132} // namespace BehaviorDetails
133
134namespace { // (anonymous)
135
136void split(const std::string_view s,
137 std::function<void(const std::string &)> f,
138 const char sep = ',') {
139 typedef std::string::size_type size_type;
140 size_type cur_pos, last_pos = 0, length = s.length();
141 while (last_pos < length + 1) {
142 cur_pos = s.find_first_of(sep, last_pos);
143 if (cur_pos == std::string::npos) {
144 cur_pos = length;
145 }
146 if (cur_pos != last_pos) {
147 auto token = std::string(s.data() + last_pos, (size_type)cur_pos - last_pos);
148 f(token);
149 }
150 last_pos = cur_pos + 1;
151 }
152 return;
153}
154
155constexpr bool debugDefault() {
156#ifdef HAVE_TPETRA_DEBUG
157 return true;
158#else
159 return false;
160#endif // HAVE_TPETRA_DEBUG
161}
162
163constexpr bool verboseDefault() {
164 return false;
165}
166
167constexpr bool timingDefault() {
168 return false;
169}
170
171constexpr bool assumeMpiIsGPUAwareDefault() {
172#ifdef TPETRA_ASSUME_GPU_AWARE_MPI
173 return true;
174#else
175 return false;
176#endif // TPETRA_ASSUME_GPU_AWARE_MPI
177}
178
179constexpr bool cudaLaunchBlockingDefault() {
180 return false;
181}
182
183constexpr bool hierarchicalUnpackDefault() {
184 return true;
185}
186
187} // namespace
188
190 static bool once = false;
191
192 if (!once) {
193 const char prefix[] = "Tpetra::Details::Behavior: ";
194 char **env;
195#if defined(WIN) && (_MSC_VER >= 1900)
196 env = *__p__environ();
197#else
198 env = environ; // defined at the top of this file as extern char **environ;
199#endif
200 for (; *env; ++env) {
201 std::string name;
202 std::string value;
203 const std::string_view ev(*env);
204
205 // split name=value on the first =, everything before = is name
206 split(
207 ev,
208 [&](const std::string &s) {
209 if (name.empty()) {
210 name = s;
211 } else {
212 value = s;
213 }
214 },
215 '=');
216
217 if (name.size() >= BehaviorDetails::RESERVED_PREFIX.size() &&
218 name.substr(0, BehaviorDetails::RESERVED_PREFIX.size()) ==
219 BehaviorDetails::RESERVED_PREFIX) {
220 const auto it = std::find(BehaviorDetails::RECOGNIZED_VARS.begin(),
221 BehaviorDetails::RECOGNIZED_VARS.end(), name);
223 it == BehaviorDetails::RECOGNIZED_VARS.end(), std::out_of_range,
224 prefix << "Environment "
225 "variable \""
226 << name << "\" (prefixed with \""
227 << BehaviorDetails::RESERVED_PREFIX
228 << "\") is not a recognized Tpetra variable.");
229 }
230 }
231
232 once = true;
233 }
234}
235
237 constexpr bool defaultValue = debugDefault();
238
239 static bool value_ = defaultValue;
240 static bool initialized_ = false;
241 return Teuchos::idempotentlyGetEnvironmentVariable(
242 value_, initialized_, BehaviorDetails::DEBUG, defaultValue);
243}
244
246 if (BehaviorDetails::verboseDisabled_)
247 return false;
248
249 constexpr bool defaultValue = verboseDefault();
250
251 static bool value_ = defaultValue;
252 static bool initialized_ = false;
253 return Teuchos::idempotentlyGetEnvironmentVariable(
254 value_, initialized_, BehaviorDetails::VERBOSE, defaultValue);
255}
256
258 if (BehaviorDetails::timingDisabled_)
259 return false;
260
261 constexpr bool defaultValue = timingDefault();
262
263 static bool value_ = defaultValue;
264 static bool initialized_ = false;
265 return Teuchos::idempotentlyGetEnvironmentVariable(
266 value_, initialized_, BehaviorDetails::TIMING, defaultValue);
267}
268
270 constexpr bool defaultValue = assumeMpiIsGPUAwareDefault();
271
272 static bool value_ = defaultValue;
273 static bool initialized_ = false;
274 return Teuchos::idempotentlyGetEnvironmentVariable(
275 value_, initialized_, BehaviorDetails::ASSUME_GPU_AWARE_MPI,
277}
278
280 constexpr bool defaultValue = cudaLaunchBlockingDefault();
281
282 static bool value_ = defaultValue;
283 static bool initialized_ = false;
284 return Teuchos::idempotentlyGetEnvironmentVariable(
285 value_, initialized_, BehaviorDetails::CUDA_LAUNCH_BLOCKING,
287}
288
290 constexpr int _default = 3000;
291 static int value_ = _default;
292 static bool initialized_ = false;
293 return Teuchos::idempotentlyGetEnvironmentVariable(
294 value_, initialized_, BehaviorDetails::MM_TAFC_OptimizationCoreCount,
295 _default);
296}
297
299 constexpr size_t defaultValue(200);
300
301 static size_t value_ = defaultValue;
302 static bool initialized_ = false;
303 return Teuchos::idempotentlyGetEnvironmentVariable(
304 value_, initialized_, BehaviorDetails::VERBOSE_PRINT_COUNT_THRESHOLD,
306}
307
309 constexpr size_t defaultValue(256);
310
311 static size_t value_ = defaultValue;
312 static bool initialized_ = false;
313 return Teuchos::idempotentlyGetEnvironmentVariable(
314 value_, initialized_, BehaviorDetails::ROW_IMBALANCE_THRESHOLD,
316}
317
319 constexpr bool defaultValue = false;
320
321 static bool value_ = defaultValue;
322 static bool initialized_ = false;
323 return Teuchos::idempotentlyGetEnvironmentVariable(
324 value_, initialized_, BehaviorDetails::MULTIVECTOR_USE_MERGE_PATH,
326}
327
329 constexpr size_t defaultValue(22000);
330
331 static size_t value_ = defaultValue;
332 static bool initialized_ = false;
333 return Teuchos::idempotentlyGetEnvironmentVariable(
334 value_, initialized_, BehaviorDetails::VECTOR_DEVICE_THRESHOLD,
336}
337
339#ifdef HAVE_TPETRA_INST_CUDA
340 constexpr size_t defaultValue(16);
341#else
342 constexpr size_t defaultValue(256);
343#endif
344
345 static size_t value_ = defaultValue;
346 static bool initialized_ = false;
347 return Teuchos::idempotentlyGetEnvironmentVariable(
348 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_BATCH_SIZE,
350}
351
353#ifdef HAVE_TPETRA_INST_CUDA
354 const size_t defaultValue(16);
355#else
356 const size_t defaultValue(Teuchos::OrdinalTraits<size_t>::invalid());
357#endif
358
359 static size_t value_ = defaultValue;
360 static bool initialized_ = false;
361 return Teuchos::idempotentlyGetEnvironmentVariable(
362 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_TEAM_SIZE,
364}
365
367 constexpr bool defaultValue(false);
368
369 static bool value_ = defaultValue;
370 static bool initialized_ = false;
371 return Teuchos::idempotentlyGetEnvironmentVariable(
372 value_, initialized_, BehaviorDetails::USE_TEUCHOS_TIMERS, defaultValue);
373}
374
376 constexpr bool defaultValue(false);
377
378 static bool value_ = defaultValue;
379 static bool initialized_ = false;
380 return Teuchos::idempotentlyGetEnvironmentVariable(
381 value_, initialized_, BehaviorDetails::USE_KOKKOS_PROFILING,
383}
384
385bool Behavior::debug(const char name[]) {
386 constexpr bool defaultValue = false;
387
388 static bool initialized_ = false;
389 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
390 name, initialized_, BehaviorDetails::DEBUG.data(), defaultValue);
391}
392
393bool Behavior::verbose(const char name[]) {
394 if (BehaviorDetails::verboseDisabled_)
395 return false;
396
397 constexpr bool defaultValue = false;
398
399 static bool initialized_ = false;
400 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
401 name, initialized_, BehaviorDetails::VERBOSE.data(), defaultValue);
402}
403
405 BehaviorDetails::verboseDisabled_ = false;
406}
407
409 BehaviorDetails::verboseDisabled_ = true;
410}
411
412bool Behavior::timing(const char name[]) {
413 if (BehaviorDetails::timingDisabled_)
414 return false;
415
416 constexpr bool defaultValue = false;
417
418 static bool initialized_ = false;
419 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
420 name, initialized_, BehaviorDetails::TIMING.data(), defaultValue);
421}
422
423void Behavior::enable_timing() { BehaviorDetails::timingDisabled_ = false; }
424
425void Behavior::disable_timing() { BehaviorDetails::timingDisabled_ = true; }
426
428 constexpr bool defaultValue = hierarchicalUnpackDefault();
429
430 static bool value_ = defaultValue;
431 static bool initialized_ = false;
432 return Teuchos::idempotentlyGetEnvironmentVariable(
433 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK, defaultValue);
434}
435
437 constexpr bool defaultValue(false);
438
439 static bool value_ = defaultValue;
440 static bool initialized_ = false;
441 return Teuchos::idempotentlyGetEnvironmentVariable(
442 value_, initialized_, BehaviorDetails::SKIP_COPY_AND_PERMUTE,
444}
445
447#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || \
448 defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || \
449 defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
450 constexpr bool defaultValue(false);
451#else
452 constexpr bool defaultValue(true);
453#endif
454
455 static bool value_ = defaultValue;
456 static bool initialized_ = false;
457 return Teuchos::idempotentlyGetEnvironmentVariable(
458 value_, initialized_, BehaviorDetails::FUSED_RESIDUAL, defaultValue);
459}
460
462 constexpr bool defaultValue(false);
463
464 static bool value_ = defaultValue;
465 static bool initialized_ = false;
466 return Teuchos::idempotentlyGetEnvironmentVariable(
467 value_, initialized_, BehaviorDetails::OVERLAP, defaultValue);
468}
469
471 const std::string defaultValue("Send");
472
473 static std::string value_ = defaultValue;
474 static bool initialized_ = false;
475 return Teuchos::idempotentlyGetEnvironmentVariable(
476 value_, initialized_, BehaviorDetails::DEFAULT_SEND_TYPE, defaultValue);
477}
478
480 constexpr bool defaultValue(false);
481
482 static bool value_ = defaultValue;
483 static bool initialized_ = false;
484 return Teuchos::idempotentlyGetEnvironmentVariable(
485 value_, initialized_, BehaviorDetails::GRANULAR_TRANSFERS, defaultValue);
486}
487
489 constexpr size_t defaultValue(16);
490
491 static size_t value_ = defaultValue;
492 static bool initialized_ = false;
493 return Teuchos::idempotentlyGetEnvironmentVariable(
494 value_, initialized_, BehaviorDetails::SPACES_ID_WARN_LIMIT,
496}
497
499 constexpr bool defaultValue(false);
500
501 static bool value_ = defaultValue;
502 static bool initialized_ = false;
503 return Teuchos::idempotentlyGetEnvironmentVariable(
504 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY,
506}
507
509 constexpr bool defaultValue(false);
510
511 static bool value_ = defaultValue;
512 static bool initialized_ = false;
513 return Teuchos::idempotentlyGetEnvironmentVariable(
514 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE1,
516}
517
519 constexpr bool defaultValue(false);
520
521 static bool value_ = defaultValue;
522 static bool initialized_ = false;
523 return Teuchos::idempotentlyGetEnvironmentVariable(
524 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE2,
526}
527
529 constexpr bool defaultValue(false);
530
531 static bool value_ = defaultValue;
532 static bool initialized_ = false;
533 return Teuchos::idempotentlyGetEnvironmentVariable(
534 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FENCE, defaultValue);
535}
536
538 constexpr bool defaultValue(false);
539
540 static bool value_ = defaultValue;
541 static bool initialized_ = false;
542 return Teuchos::idempotentlyGetEnvironmentVariable(
543 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FUNCTIONS,
545}
546
547} // namespace Details
548} // namespace Tpetra
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Struct that holds views of the contents of a CrsMatrix.
static bool timing()
Whether Tpetra is in timing mode.
static void enable_verbose_behavior()
Enable verbose mode, programatically.
static void disable_timing()
Disable timing, programatically.
static bool cudaLaunchBlocking()
Whether the CUDA_LAUNCH_BLOCKING environment variable has been set.
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size....
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
static bool hierarchicalUnpack()
Unpack rows of a matrix using hierarchical unpacking.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static bool debug()
Whether Tpetra is in debug mode.
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
static bool enableGranularTransfers()
Speed up transfers by overlapping computation and communication.
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
static void enable_timing()
Enable timing, programatically.
static bool profilingRegionUseTeuchosTimers()
Use Teuchos::Timer in Tpetra::ProfilingRegion.
static bool profilingRegionUseKokkosProfiling()
Use Kokkos::Profiling in Tpetra::ProfilingRegion.
static bool verbose()
Whether Tpetra is in verbose mode.
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
static bool useMergePathMultiVector()
Whether to use the cuSPARSE merge path algorithm to perform sparse matrix-multivector products,...
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination....
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static void disable_verbose_behavior()
Disable verbose mode, programatically.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row....
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
static std::string defaultSendType()
Default send type.
Implementation details of Tpetra.
Namespace Tpetra contains the class and methods constituting the Tpetra library.