10#include "Teuchos_EnvVariables.hpp"
11#include "Teuchos_OrdinalTraits.hpp"
12#include "Teuchos_TestForException.hpp"
13#include "TpetraCore_config.h"
15#include "KokkosKernels_config.h"
55#if not(defined(WIN) && (_MSC_VER >= 1900))
63namespace BehaviorDetails {
65constexpr const std::string_view RESERVED_PREFIX =
"TPETRA_";
66constexpr const std::string_view ASSUME_GPU_AWARE_MPI =
67 "TPETRA_ASSUME_GPU_AWARE_MPI";
68constexpr const std::string_view CUDA_LAUNCH_BLOCKING =
"CUDA_LAUNCH_BLOCKING";
69constexpr const std::string_view MM_TAFC_OptimizationCoreCount =
70 "MM_TAFC_OptimizationCoreCount";
71constexpr const std::string_view VERBOSE_PRINT_COUNT_THRESHOLD =
72 "TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD";
73constexpr const std::string_view ROW_IMBALANCE_THRESHOLD =
74 "TPETRA_ROW_IMBALANCE_THRESHOLD";
75constexpr const std::string_view MULTIVECTOR_USE_MERGE_PATH =
76 "TPETRA_MULTIVECTOR_USE_MERGE_PATH";
77constexpr const std::string_view VECTOR_DEVICE_THRESHOLD =
78 "TPETRA_VECTOR_DEVICE_THRESHOLD";
79constexpr const std::string_view HIERARCHICAL_UNPACK_BATCH_SIZE =
80 "TPETRA_HIERARCHICAL_UNPACK_BATCH_SIZE";
81constexpr const std::string_view HIERARCHICAL_UNPACK_TEAM_SIZE =
82 "TPETRA_HIERARCHICAL_UNPACK_TEAM_SIZE";
83constexpr const std::string_view USE_TEUCHOS_TIMERS =
84 "TPETRA_USE_TEUCHOS_TIMERS";
85constexpr const std::string_view USE_KOKKOS_PROFILING =
86 "TPETRA_USE_KOKKOS_PROFILING";
87constexpr const std::string_view DEBUG =
"TPETRA_DEBUG";
88constexpr const std::string_view VERBOSE =
"TPETRA_VERBOSE";
89constexpr const std::string_view TIMING =
"TPETRA_TIMING";
90constexpr const std::string_view HIERARCHICAL_UNPACK =
91 "TPETRA_HIERARCHICAL_UNPACK";
92constexpr const std::string_view SKIP_COPY_AND_PERMUTE =
93 "TPETRA_SKIP_COPY_AND_PERMUTE";
94constexpr const std::string_view FUSED_RESIDUAL =
"TPETRA_FUSED_RESIDUAL";
95constexpr const std::string_view OVERLAP =
"TPETRA_OVERLAP";
96constexpr const std::string_view DEFAULT_SEND_TYPE =
"TPETRA_DEFAULT_SEND_TYPE";
97constexpr const std::string_view GRANULAR_TRANSFERS =
"TPETRA_GRANULAR_TRANSFERS";
98constexpr const std::string_view SPACES_ID_WARN_LIMIT =
99 "TPETRA_SPACES_ID_WARN_LIMIT";
100constexpr const std::string_view TIME_KOKKOS_DEEP_COPY =
101 "TPETRA_TIME_KOKKOS_DEEP_COPY";
102constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE1 =
103 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE1";
104constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE2 =
105 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE2";
106constexpr const std::string_view TIME_KOKKOS_FENCE =
"TPETRA_TIME_KOKKOS_FENCE";
107constexpr const std::string_view TIME_KOKKOS_FUNCTIONS =
108 "TPETRA_TIME_KOKKOS_FUNCTIONS";
109constexpr const std::string_view USE_NEW_COPY_AND_PERMUTE =
"TPETRA_USE_NEW_COPY_AND_PERMUTE";
113template <
typename... Elems>
114constexpr std::array<std::string_view,
sizeof...(Elems)>
115make_array(Elems &&...elems) {
116 return {std::forward<Elems>(elems)...};
119constexpr const auto RECOGNIZED_VARS = make_array(
120 ASSUME_GPU_AWARE_MPI, CUDA_LAUNCH_BLOCKING, MM_TAFC_OptimizationCoreCount,
121 VERBOSE_PRINT_COUNT_THRESHOLD, ROW_IMBALANCE_THRESHOLD,
122 MULTIVECTOR_USE_MERGE_PATH, VECTOR_DEVICE_THRESHOLD,
123 HIERARCHICAL_UNPACK_BATCH_SIZE, HIERARCHICAL_UNPACK_TEAM_SIZE,
124 USE_TEUCHOS_TIMERS, USE_KOKKOS_PROFILING, DEBUG, VERBOSE, TIMING,
125 HIERARCHICAL_UNPACK, SKIP_COPY_AND_PERMUTE, FUSED_RESIDUAL, OVERLAP,
126 DEFAULT_SEND_TYPE, GRANULAR_TRANSFERS,
127 SPACES_ID_WARN_LIMIT, TIME_KOKKOS_DEEP_COPY, TIME_KOKKOS_DEEP_COPY_VERBOSE1,
128 TIME_KOKKOS_DEEP_COPY_VERBOSE2, TIME_KOKKOS_FENCE, TIME_KOKKOS_FUNCTIONS,
129 USE_NEW_COPY_AND_PERMUTE);
131std::map<std::string, std::map<std::string, bool> > namedVariableMap_;
132bool verboseDisabled_ =
false;
133bool timingDisabled_ =
false;
138void split(
const std::string_view s,
139 std::function<
void(
const std::string &)> f,
140 const char sep =
',') {
141 typedef std::string::size_type size_type;
142 size_type cur_pos, last_pos = 0, length = s.length();
143 while (last_pos < length + 1) {
144 cur_pos = s.find_first_of(sep, last_pos);
145 if (cur_pos == std::string::npos) {
148 if (cur_pos != last_pos) {
149 auto token = std::string(s.data() + last_pos, (size_type)cur_pos - last_pos);
152 last_pos = cur_pos + 1;
157constexpr bool debugDefault() {
158#ifdef HAVE_TPETRA_DEBUG
165constexpr bool verboseDefault() {
169constexpr bool timingDefault() {
173constexpr bool assumeMpiIsGPUAwareDefault() {
174#ifdef TPETRA_ASSUME_GPU_AWARE_MPI
181constexpr bool cudaLaunchBlockingDefault() {
185constexpr bool hierarchicalUnpackDefault() {
192 static bool once =
false;
195 const char prefix[] =
"Tpetra::Details::Behavior: ";
197#if defined(WIN) && (_MSC_VER >= 1900)
205 const std::string_view
ev(*
env);
210 [&](
const std::string &
s) {
219 if (name.size() >= BehaviorDetails::RESERVED_PREFIX.size() &&
220 name.substr(0, BehaviorDetails::RESERVED_PREFIX.size()) ==
221 BehaviorDetails::RESERVED_PREFIX) {
222 const auto it = std::find(BehaviorDetails::RECOGNIZED_VARS.
begin(),
223 BehaviorDetails::RECOGNIZED_VARS.
end(), name);
225 it == BehaviorDetails::RECOGNIZED_VARS.
end(), std::out_of_range,
228 << name <<
"\" (prefixed with \""
229 << BehaviorDetails::RESERVED_PREFIX
230 <<
"\") is not a recognized Tpetra variable.");
242 static bool initialized_ =
false;
243 return Teuchos::idempotentlyGetEnvironmentVariable(
248 if (BehaviorDetails::verboseDisabled_)
254 static bool initialized_ =
false;
255 return Teuchos::idempotentlyGetEnvironmentVariable(
260 if (BehaviorDetails::timingDisabled_)
266 static bool initialized_ =
false;
267 return Teuchos::idempotentlyGetEnvironmentVariable(
275 static bool initialized_ =
false;
276 return Teuchos::idempotentlyGetEnvironmentVariable(
277 value_, initialized_, BehaviorDetails::ASSUME_GPU_AWARE_MPI,
285 static bool initialized_ =
false;
286 return Teuchos::idempotentlyGetEnvironmentVariable(
287 value_, initialized_, BehaviorDetails::CUDA_LAUNCH_BLOCKING,
294 static bool initialized_ =
false;
295 return Teuchos::idempotentlyGetEnvironmentVariable(
296 value_, initialized_, BehaviorDetails::MM_TAFC_OptimizationCoreCount,
304 static bool initialized_ =
false;
305 return Teuchos::idempotentlyGetEnvironmentVariable(
306 value_, initialized_, BehaviorDetails::VERBOSE_PRINT_COUNT_THRESHOLD,
314 static bool initialized_ =
false;
315 return Teuchos::idempotentlyGetEnvironmentVariable(
316 value_, initialized_, BehaviorDetails::ROW_IMBALANCE_THRESHOLD,
324 static bool initialized_ =
false;
325 return Teuchos::idempotentlyGetEnvironmentVariable(
326 value_, initialized_, BehaviorDetails::MULTIVECTOR_USE_MERGE_PATH,
334 static bool initialized_ =
false;
335 return Teuchos::idempotentlyGetEnvironmentVariable(
336 value_, initialized_, BehaviorDetails::VECTOR_DEVICE_THRESHOLD,
341#ifdef HAVE_TPETRA_INST_CUDA
348 static bool initialized_ =
false;
349 return Teuchos::idempotentlyGetEnvironmentVariable(
350 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_BATCH_SIZE,
355#ifdef HAVE_TPETRA_INST_CUDA
358 const size_t defaultValue(Teuchos::OrdinalTraits<size_t>::invalid());
362 static bool initialized_ =
false;
363 return Teuchos::idempotentlyGetEnvironmentVariable(
364 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_TEAM_SIZE,
372 static bool initialized_ =
false;
373 return Teuchos::idempotentlyGetEnvironmentVariable(
381 static bool initialized_ =
false;
382 return Teuchos::idempotentlyGetEnvironmentVariable(
383 value_, initialized_, BehaviorDetails::USE_KOKKOS_PROFILING,
390 static bool initialized_ =
false;
391 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
396 if (BehaviorDetails::verboseDisabled_)
401 static bool initialized_ =
false;
402 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
407 BehaviorDetails::verboseDisabled_ =
false;
411 BehaviorDetails::verboseDisabled_ =
true;
415 if (BehaviorDetails::timingDisabled_)
420 static bool initialized_ =
false;
421 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
433 static bool initialized_ =
false;
434 return Teuchos::idempotentlyGetEnvironmentVariable(
442 static bool initialized_ =
false;
443 return Teuchos::idempotentlyGetEnvironmentVariable(
444 value_, initialized_, BehaviorDetails::SKIP_COPY_AND_PERMUTE,
449#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || \
450 defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || \
451 defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
458 static bool initialized_ =
false;
459 return Teuchos::idempotentlyGetEnvironmentVariable(
467 static bool initialized_ =
false;
468 return Teuchos::idempotentlyGetEnvironmentVariable(
476 static bool initialized_ =
false;
477 return Teuchos::idempotentlyGetEnvironmentVariable(
485 static bool initialized_ =
false;
486 return Teuchos::idempotentlyGetEnvironmentVariable(
494 static bool initialized_ =
false;
495 return Teuchos::idempotentlyGetEnvironmentVariable(
496 value_, initialized_, BehaviorDetails::SPACES_ID_WARN_LIMIT,
504 static bool initialized_ =
false;
505 return Teuchos::idempotentlyGetEnvironmentVariable(
506 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY,
514 static bool initialized_ =
false;
515 return Teuchos::idempotentlyGetEnvironmentVariable(
516 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE1,
524 static bool initialized_ =
false;
525 return Teuchos::idempotentlyGetEnvironmentVariable(
526 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE2,
534 static bool initialized_ =
false;
535 return Teuchos::idempotentlyGetEnvironmentVariable(
543 static bool initialized_ =
false;
544 return Teuchos::idempotentlyGetEnvironmentVariable(
545 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FUNCTIONS,
553 static bool initialized_ =
false;
554 return Teuchos::idempotentlyGetEnvironmentVariable(
555 value_, initialized_, BehaviorDetails::USE_NEW_COPY_AND_PERMUTE,
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Struct that holds views of the contents of a CrsMatrix.
static bool useNewCopyAndPermute()
Use new implementation of copyAndPermute.
static bool timing()
Whether Tpetra is in timing mode.
static void enable_verbose_behavior()
Enable verbose mode, programatically.
static void disable_timing()
Disable timing, programatically.
static bool cudaLaunchBlocking()
Whether the CUDA_LAUNCH_BLOCKING environment variable has been set.
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size....
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
static bool hierarchicalUnpack()
Unpack rows of a matrix using hierarchical unpacking.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static bool debug()
Whether Tpetra is in debug mode.
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
static bool enableGranularTransfers()
Speed up transfers by overlapping computation and communication.
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
static void enable_timing()
Enable timing, programatically.
static bool profilingRegionUseTeuchosTimers()
Use Teuchos::Timer in Tpetra::ProfilingRegion.
static bool profilingRegionUseKokkosProfiling()
Use Kokkos::Profiling in Tpetra::ProfilingRegion.
static bool verbose()
Whether Tpetra is in verbose mode.
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
static bool useMergePathMultiVector()
Whether to use the cuSPARSE merge path algorithm to perform sparse matrix-multivector products,...
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination....
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static void disable_verbose_behavior()
Disable verbose mode, programatically.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row....
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
static std::string defaultSendType()
Default send type.
Implementation details of Tpetra.
Namespace Tpetra contains the class and methods constituting the Tpetra library.