Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_Behavior.cpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#include "Teuchos_EnvVariables.hpp"
11#include "Teuchos_OrdinalTraits.hpp"
12#include "Teuchos_TestForException.hpp"
13#include "TpetraCore_config.h"
15#include "KokkosKernels_config.h" // for TPL enable macros
16#include <array>
17#include <functional>
18#include <map>
19
54// environ should be available on posix platforms
55#if not(defined(WIN) && (_MSC_VER >= 1900))
56// needs to be in the global namespace
57extern char **environ;
58#endif
59
60namespace Tpetra {
61namespace Details {
62
63namespace BehaviorDetails {
64
65constexpr const std::string_view RESERVED_PREFIX = "TPETRA_";
66constexpr const std::string_view ASSUME_GPU_AWARE_MPI =
67 "TPETRA_ASSUME_GPU_AWARE_MPI";
68constexpr const std::string_view CUDA_LAUNCH_BLOCKING = "CUDA_LAUNCH_BLOCKING";
69constexpr const std::string_view MM_TAFC_OptimizationCoreCount =
70 "MM_TAFC_OptimizationCoreCount";
71constexpr const std::string_view VERBOSE_PRINT_COUNT_THRESHOLD =
72 "TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD";
73constexpr const std::string_view ROW_IMBALANCE_THRESHOLD =
74 "TPETRA_ROW_IMBALANCE_THRESHOLD";
75constexpr const std::string_view MULTIVECTOR_USE_MERGE_PATH =
76 "TPETRA_MULTIVECTOR_USE_MERGE_PATH";
77constexpr const std::string_view VECTOR_DEVICE_THRESHOLD =
78 "TPETRA_VECTOR_DEVICE_THRESHOLD";
79constexpr const std::string_view HIERARCHICAL_UNPACK_BATCH_SIZE =
80 "TPETRA_HIERARCHICAL_UNPACK_BATCH_SIZE";
81constexpr const std::string_view HIERARCHICAL_UNPACK_TEAM_SIZE =
82 "TPETRA_HIERARCHICAL_UNPACK_TEAM_SIZE";
83constexpr const std::string_view USE_TEUCHOS_TIMERS =
84 "TPETRA_USE_TEUCHOS_TIMERS";
85constexpr const std::string_view USE_KOKKOS_PROFILING =
86 "TPETRA_USE_KOKKOS_PROFILING";
87constexpr const std::string_view DEBUG = "TPETRA_DEBUG";
88constexpr const std::string_view VERBOSE = "TPETRA_VERBOSE";
89constexpr const std::string_view TIMING = "TPETRA_TIMING";
90constexpr const std::string_view HIERARCHICAL_UNPACK =
91 "TPETRA_HIERARCHICAL_UNPACK";
92constexpr const std::string_view SKIP_COPY_AND_PERMUTE =
93 "TPETRA_SKIP_COPY_AND_PERMUTE";
94constexpr const std::string_view FUSED_RESIDUAL = "TPETRA_FUSED_RESIDUAL";
95constexpr const std::string_view OVERLAP = "TPETRA_OVERLAP";
96constexpr const std::string_view DEFAULT_SEND_TYPE = "TPETRA_DEFAULT_SEND_TYPE";
97constexpr const std::string_view GRANULAR_TRANSFERS = "TPETRA_GRANULAR_TRANSFERS";
98constexpr const std::string_view SPACES_ID_WARN_LIMIT =
99 "TPETRA_SPACES_ID_WARN_LIMIT";
100constexpr const std::string_view TIME_KOKKOS_DEEP_COPY =
101 "TPETRA_TIME_KOKKOS_DEEP_COPY";
102constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE1 =
103 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE1";
104constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE2 =
105 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE2";
106constexpr const std::string_view TIME_KOKKOS_FENCE = "TPETRA_TIME_KOKKOS_FENCE";
107constexpr const std::string_view TIME_KOKKOS_FUNCTIONS =
108 "TPETRA_TIME_KOKKOS_FUNCTIONS";
109constexpr const std::string_view USE_NEW_COPY_AND_PERMUTE = "TPETRA_USE_NEW_COPY_AND_PERMUTE";
110
111// construct an std::array of string_view with any number of provided
112// string_views
113template <typename... Elems>
114constexpr std::array<std::string_view, sizeof...(Elems)>
115make_array(Elems &&...elems) {
116 return {std::forward<Elems>(elems)...};
117}
118
119constexpr const auto RECOGNIZED_VARS = make_array(
120 ASSUME_GPU_AWARE_MPI, CUDA_LAUNCH_BLOCKING, MM_TAFC_OptimizationCoreCount,
121 VERBOSE_PRINT_COUNT_THRESHOLD, ROW_IMBALANCE_THRESHOLD,
122 MULTIVECTOR_USE_MERGE_PATH, VECTOR_DEVICE_THRESHOLD,
123 HIERARCHICAL_UNPACK_BATCH_SIZE, HIERARCHICAL_UNPACK_TEAM_SIZE,
124 USE_TEUCHOS_TIMERS, USE_KOKKOS_PROFILING, DEBUG, VERBOSE, TIMING,
125 HIERARCHICAL_UNPACK, SKIP_COPY_AND_PERMUTE, FUSED_RESIDUAL, OVERLAP,
126 DEFAULT_SEND_TYPE, GRANULAR_TRANSFERS,
127 SPACES_ID_WARN_LIMIT, TIME_KOKKOS_DEEP_COPY, TIME_KOKKOS_DEEP_COPY_VERBOSE1,
128 TIME_KOKKOS_DEEP_COPY_VERBOSE2, TIME_KOKKOS_FENCE, TIME_KOKKOS_FUNCTIONS,
129 USE_NEW_COPY_AND_PERMUTE);
130
131std::map<std::string, std::map<std::string, bool> > namedVariableMap_;
132bool verboseDisabled_ = false;
133bool timingDisabled_ = false;
134} // namespace BehaviorDetails
135
136namespace { // (anonymous)
137
138void split(const std::string_view s,
139 std::function<void(const std::string &)> f,
140 const char sep = ',') {
141 typedef std::string::size_type size_type;
142 size_type cur_pos, last_pos = 0, length = s.length();
143 while (last_pos < length + 1) {
144 cur_pos = s.find_first_of(sep, last_pos);
145 if (cur_pos == std::string::npos) {
146 cur_pos = length;
147 }
148 if (cur_pos != last_pos) {
149 auto token = std::string(s.data() + last_pos, (size_type)cur_pos - last_pos);
150 f(token);
151 }
152 last_pos = cur_pos + 1;
153 }
154 return;
155}
156
157constexpr bool debugDefault() {
158#ifdef HAVE_TPETRA_DEBUG
159 return true;
160#else
161 return false;
162#endif // HAVE_TPETRA_DEBUG
163}
164
165constexpr bool verboseDefault() {
166 return false;
167}
168
169constexpr bool timingDefault() {
170 return false;
171}
172
173constexpr bool assumeMpiIsGPUAwareDefault() {
174#ifdef TPETRA_ASSUME_GPU_AWARE_MPI
175 return true;
176#else
177 return false;
178#endif // TPETRA_ASSUME_GPU_AWARE_MPI
179}
180
181constexpr bool cudaLaunchBlockingDefault() {
182 return false;
183}
184
185constexpr bool hierarchicalUnpackDefault() {
186 return true;
187}
188
189} // namespace
190
192 static bool once = false;
193
194 if (!once) {
195 const char prefix[] = "Tpetra::Details::Behavior: ";
196 char **env;
197#if defined(WIN) && (_MSC_VER >= 1900)
198 env = *__p__environ();
199#else
200 env = environ; // defined at the top of this file as extern char **environ;
201#endif
202 for (; *env; ++env) {
203 std::string name;
204 std::string value;
205 const std::string_view ev(*env);
206
207 // split name=value on the first =, everything before = is name
208 split(
209 ev,
210 [&](const std::string &s) {
211 if (name.empty()) {
212 name = s;
213 } else {
214 value = s;
215 }
216 },
217 '=');
218
219 if (name.size() >= BehaviorDetails::RESERVED_PREFIX.size() &&
220 name.substr(0, BehaviorDetails::RESERVED_PREFIX.size()) ==
221 BehaviorDetails::RESERVED_PREFIX) {
222 const auto it = std::find(BehaviorDetails::RECOGNIZED_VARS.begin(),
223 BehaviorDetails::RECOGNIZED_VARS.end(), name);
225 it == BehaviorDetails::RECOGNIZED_VARS.end(), std::out_of_range,
226 prefix << "Environment "
227 "variable \""
228 << name << "\" (prefixed with \""
229 << BehaviorDetails::RESERVED_PREFIX
230 << "\") is not a recognized Tpetra variable.");
231 }
232 }
233
234 once = true;
235 }
236}
237
239 constexpr bool defaultValue = debugDefault();
240
241 static bool value_ = defaultValue;
242 static bool initialized_ = false;
243 return Teuchos::idempotentlyGetEnvironmentVariable(
244 value_, initialized_, BehaviorDetails::DEBUG, defaultValue);
245}
246
248 if (BehaviorDetails::verboseDisabled_)
249 return false;
250
251 constexpr bool defaultValue = verboseDefault();
252
253 static bool value_ = defaultValue;
254 static bool initialized_ = false;
255 return Teuchos::idempotentlyGetEnvironmentVariable(
256 value_, initialized_, BehaviorDetails::VERBOSE, defaultValue);
257}
258
260 if (BehaviorDetails::timingDisabled_)
261 return false;
262
263 constexpr bool defaultValue = timingDefault();
264
265 static bool value_ = defaultValue;
266 static bool initialized_ = false;
267 return Teuchos::idempotentlyGetEnvironmentVariable(
268 value_, initialized_, BehaviorDetails::TIMING, defaultValue);
269}
270
272 constexpr bool defaultValue = assumeMpiIsGPUAwareDefault();
273
274 static bool value_ = defaultValue;
275 static bool initialized_ = false;
276 return Teuchos::idempotentlyGetEnvironmentVariable(
277 value_, initialized_, BehaviorDetails::ASSUME_GPU_AWARE_MPI,
279}
280
282 constexpr bool defaultValue = cudaLaunchBlockingDefault();
283
284 static bool value_ = defaultValue;
285 static bool initialized_ = false;
286 return Teuchos::idempotentlyGetEnvironmentVariable(
287 value_, initialized_, BehaviorDetails::CUDA_LAUNCH_BLOCKING,
289}
290
292 constexpr int _default = 3000;
293 static int value_ = _default;
294 static bool initialized_ = false;
295 return Teuchos::idempotentlyGetEnvironmentVariable(
296 value_, initialized_, BehaviorDetails::MM_TAFC_OptimizationCoreCount,
297 _default);
298}
299
301 constexpr size_t defaultValue(200);
302
303 static size_t value_ = defaultValue;
304 static bool initialized_ = false;
305 return Teuchos::idempotentlyGetEnvironmentVariable(
306 value_, initialized_, BehaviorDetails::VERBOSE_PRINT_COUNT_THRESHOLD,
308}
309
311 constexpr size_t defaultValue(256);
312
313 static size_t value_ = defaultValue;
314 static bool initialized_ = false;
315 return Teuchos::idempotentlyGetEnvironmentVariable(
316 value_, initialized_, BehaviorDetails::ROW_IMBALANCE_THRESHOLD,
318}
319
321 constexpr bool defaultValue = false;
322
323 static bool value_ = defaultValue;
324 static bool initialized_ = false;
325 return Teuchos::idempotentlyGetEnvironmentVariable(
326 value_, initialized_, BehaviorDetails::MULTIVECTOR_USE_MERGE_PATH,
328}
329
331 constexpr size_t defaultValue(22000);
332
333 static size_t value_ = defaultValue;
334 static bool initialized_ = false;
335 return Teuchos::idempotentlyGetEnvironmentVariable(
336 value_, initialized_, BehaviorDetails::VECTOR_DEVICE_THRESHOLD,
338}
339
341#ifdef HAVE_TPETRA_INST_CUDA
342 constexpr size_t defaultValue(16);
343#else
344 constexpr size_t defaultValue(256);
345#endif
346
347 static size_t value_ = defaultValue;
348 static bool initialized_ = false;
349 return Teuchos::idempotentlyGetEnvironmentVariable(
350 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_BATCH_SIZE,
352}
353
355#ifdef HAVE_TPETRA_INST_CUDA
356 const size_t defaultValue(16);
357#else
358 const size_t defaultValue(Teuchos::OrdinalTraits<size_t>::invalid());
359#endif
360
361 static size_t value_ = defaultValue;
362 static bool initialized_ = false;
363 return Teuchos::idempotentlyGetEnvironmentVariable(
364 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_TEAM_SIZE,
366}
367
369 constexpr bool defaultValue(false);
370
371 static bool value_ = defaultValue;
372 static bool initialized_ = false;
373 return Teuchos::idempotentlyGetEnvironmentVariable(
374 value_, initialized_, BehaviorDetails::USE_TEUCHOS_TIMERS, defaultValue);
375}
376
378 constexpr bool defaultValue(false);
379
380 static bool value_ = defaultValue;
381 static bool initialized_ = false;
382 return Teuchos::idempotentlyGetEnvironmentVariable(
383 value_, initialized_, BehaviorDetails::USE_KOKKOS_PROFILING,
385}
386
387bool Behavior::debug(const char name[]) {
388 constexpr bool defaultValue = false;
389
390 static bool initialized_ = false;
391 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
392 name, initialized_, BehaviorDetails::DEBUG.data(), defaultValue);
393}
394
395bool Behavior::verbose(const char name[]) {
396 if (BehaviorDetails::verboseDisabled_)
397 return false;
398
399 constexpr bool defaultValue = false;
400
401 static bool initialized_ = false;
402 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
403 name, initialized_, BehaviorDetails::VERBOSE.data(), defaultValue);
404}
405
407 BehaviorDetails::verboseDisabled_ = false;
408}
409
411 BehaviorDetails::verboseDisabled_ = true;
412}
413
414bool Behavior::timing(const char name[]) {
415 if (BehaviorDetails::timingDisabled_)
416 return false;
417
418 constexpr bool defaultValue = false;
419
420 static bool initialized_ = false;
421 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
422 name, initialized_, BehaviorDetails::TIMING.data(), defaultValue);
423}
424
425void Behavior::enable_timing() { BehaviorDetails::timingDisabled_ = false; }
426
427void Behavior::disable_timing() { BehaviorDetails::timingDisabled_ = true; }
428
430 constexpr bool defaultValue = hierarchicalUnpackDefault();
431
432 static bool value_ = defaultValue;
433 static bool initialized_ = false;
434 return Teuchos::idempotentlyGetEnvironmentVariable(
435 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK, defaultValue);
436}
437
439 constexpr bool defaultValue(false);
440
441 static bool value_ = defaultValue;
442 static bool initialized_ = false;
443 return Teuchos::idempotentlyGetEnvironmentVariable(
444 value_, initialized_, BehaviorDetails::SKIP_COPY_AND_PERMUTE,
446}
447
449#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || \
450 defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || \
451 defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
452 constexpr bool defaultValue(false);
453#else
454 constexpr bool defaultValue(true);
455#endif
456
457 static bool value_ = defaultValue;
458 static bool initialized_ = false;
459 return Teuchos::idempotentlyGetEnvironmentVariable(
460 value_, initialized_, BehaviorDetails::FUSED_RESIDUAL, defaultValue);
461}
462
464 constexpr bool defaultValue(false);
465
466 static bool value_ = defaultValue;
467 static bool initialized_ = false;
468 return Teuchos::idempotentlyGetEnvironmentVariable(
469 value_, initialized_, BehaviorDetails::OVERLAP, defaultValue);
470}
471
473 const std::string defaultValue("Send");
474
475 static std::string value_ = defaultValue;
476 static bool initialized_ = false;
477 return Teuchos::idempotentlyGetEnvironmentVariable(
478 value_, initialized_, BehaviorDetails::DEFAULT_SEND_TYPE, defaultValue);
479}
480
482 constexpr bool defaultValue(false);
483
484 static bool value_ = defaultValue;
485 static bool initialized_ = false;
486 return Teuchos::idempotentlyGetEnvironmentVariable(
487 value_, initialized_, BehaviorDetails::GRANULAR_TRANSFERS, defaultValue);
488}
489
491 constexpr size_t defaultValue(16);
492
493 static size_t value_ = defaultValue;
494 static bool initialized_ = false;
495 return Teuchos::idempotentlyGetEnvironmentVariable(
496 value_, initialized_, BehaviorDetails::SPACES_ID_WARN_LIMIT,
498}
499
501 constexpr bool defaultValue(false);
502
503 static bool value_ = defaultValue;
504 static bool initialized_ = false;
505 return Teuchos::idempotentlyGetEnvironmentVariable(
506 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY,
508}
509
511 constexpr bool defaultValue(false);
512
513 static bool value_ = defaultValue;
514 static bool initialized_ = false;
515 return Teuchos::idempotentlyGetEnvironmentVariable(
516 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE1,
518}
519
521 constexpr bool defaultValue(false);
522
523 static bool value_ = defaultValue;
524 static bool initialized_ = false;
525 return Teuchos::idempotentlyGetEnvironmentVariable(
526 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE2,
528}
529
531 constexpr bool defaultValue(false);
532
533 static bool value_ = defaultValue;
534 static bool initialized_ = false;
535 return Teuchos::idempotentlyGetEnvironmentVariable(
536 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FENCE, defaultValue);
537}
538
540 constexpr bool defaultValue(false);
541
542 static bool value_ = defaultValue;
543 static bool initialized_ = false;
544 return Teuchos::idempotentlyGetEnvironmentVariable(
545 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FUNCTIONS,
547}
548
550 constexpr bool defaultValue(false);
551
552 static bool value_ = defaultValue;
553 static bool initialized_ = false;
554 return Teuchos::idempotentlyGetEnvironmentVariable(
555 value_, initialized_, BehaviorDetails::USE_NEW_COPY_AND_PERMUTE,
557}
558
559} // namespace Details
560} // namespace Tpetra
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Struct that holds views of the contents of a CrsMatrix.
static bool useNewCopyAndPermute()
Use new implementation of copyAndPermute.
static bool timing()
Whether Tpetra is in timing mode.
static void enable_verbose_behavior()
Enable verbose mode, programatically.
static void disable_timing()
Disable timing, programatically.
static bool cudaLaunchBlocking()
Whether the CUDA_LAUNCH_BLOCKING environment variable has been set.
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size....
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
static bool hierarchicalUnpack()
Unpack rows of a matrix using hierarchical unpacking.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static bool debug()
Whether Tpetra is in debug mode.
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
static bool enableGranularTransfers()
Speed up transfers by overlapping computation and communication.
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
static void enable_timing()
Enable timing, programatically.
static bool profilingRegionUseTeuchosTimers()
Use Teuchos::Timer in Tpetra::ProfilingRegion.
static bool profilingRegionUseKokkosProfiling()
Use Kokkos::Profiling in Tpetra::ProfilingRegion.
static bool verbose()
Whether Tpetra is in verbose mode.
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
static bool useMergePathMultiVector()
Whether to use the cuSPARSE merge path algorithm to perform sparse matrix-multivector products,...
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination....
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static void disable_verbose_behavior()
Disable verbose mode, programatically.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row....
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
static std::string defaultSendType()
Default send type.
Implementation details of Tpetra.
Namespace Tpetra contains the class and methods constituting the Tpetra library.