10#include "Teuchos_EnvVariables.hpp"
11#include "Teuchos_OrdinalTraits.hpp"
12#include "Teuchos_TestForException.hpp"
13#include "TpetraCore_config.h"
15#include "KokkosKernels_config.h"
55#if not(defined(WIN) && (_MSC_VER >= 1900))
63namespace BehaviorDetails {
65constexpr const std::string_view RESERVED_PREFIX =
"TPETRA_";
66constexpr const std::string_view ASSUME_GPU_AWARE_MPI =
67 "TPETRA_ASSUME_GPU_AWARE_MPI";
68constexpr const std::string_view CUDA_LAUNCH_BLOCKING =
"CUDA_LAUNCH_BLOCKING";
69constexpr const std::string_view MM_TAFC_OptimizationCoreCount =
70 "MM_TAFC_OptimizationCoreCount";
71constexpr const std::string_view VERBOSE_PRINT_COUNT_THRESHOLD =
72 "TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD";
73constexpr const std::string_view ROW_IMBALANCE_THRESHOLD =
74 "TPETRA_ROW_IMBALANCE_THRESHOLD";
75constexpr const std::string_view MULTIVECTOR_USE_MERGE_PATH =
76 "TPETRA_MULTIVECTOR_USE_MERGE_PATH";
77constexpr const std::string_view VECTOR_DEVICE_THRESHOLD =
78 "TPETRA_VECTOR_DEVICE_THRESHOLD";
79constexpr const std::string_view HIERARCHICAL_UNPACK_BATCH_SIZE =
80 "TPETRA_HIERARCHICAL_UNPACK_BATCH_SIZE";
81constexpr const std::string_view HIERARCHICAL_UNPACK_TEAM_SIZE =
82 "TPETRA_HIERARCHICAL_UNPACK_TEAM_SIZE";
83constexpr const std::string_view USE_TEUCHOS_TIMERS =
84 "TPETRA_USE_TEUCHOS_TIMERS";
85constexpr const std::string_view USE_KOKKOS_PROFILING =
86 "TPETRA_USE_KOKKOS_PROFILING";
87constexpr const std::string_view DEBUG =
"TPETRA_DEBUG";
88constexpr const std::string_view VERBOSE =
"TPETRA_VERBOSE";
89constexpr const std::string_view TIMING =
"TPETRA_TIMING";
90constexpr const std::string_view HIERARCHICAL_UNPACK =
91 "TPETRA_HIERARCHICAL_UNPACK";
92constexpr const std::string_view SKIP_COPY_AND_PERMUTE =
93 "TPETRA_SKIP_COPY_AND_PERMUTE";
94constexpr const std::string_view FUSED_RESIDUAL =
"TPETRA_FUSED_RESIDUAL";
95constexpr const std::string_view OVERLAP =
"TPETRA_OVERLAP";
96constexpr const std::string_view DEFAULT_SEND_TYPE =
"TPETRA_DEFAULT_SEND_TYPE";
97constexpr const std::string_view GRANULAR_TRANSFERS =
"TPETRA_GRANULAR_TRANSFERS";
98constexpr const std::string_view SPACES_ID_WARN_LIMIT =
99 "TPETRA_SPACES_ID_WARN_LIMIT";
100constexpr const std::string_view TIME_KOKKOS_DEEP_COPY =
101 "TPETRA_TIME_KOKKOS_DEEP_COPY";
102constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE1 =
103 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE1";
104constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE2 =
105 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE2";
106constexpr const std::string_view TIME_KOKKOS_FENCE =
"TPETRA_TIME_KOKKOS_FENCE";
107constexpr const std::string_view TIME_KOKKOS_FUNCTIONS =
108 "TPETRA_TIME_KOKKOS_FUNCTIONS";
112template <
typename... Elems>
113constexpr std::array<std::string_view,
sizeof...(Elems)>
114make_array(Elems &&...elems) {
115 return {std::forward<Elems>(elems)...};
118constexpr const auto RECOGNIZED_VARS = make_array(
119 ASSUME_GPU_AWARE_MPI, CUDA_LAUNCH_BLOCKING, MM_TAFC_OptimizationCoreCount,
120 VERBOSE_PRINT_COUNT_THRESHOLD, ROW_IMBALANCE_THRESHOLD,
121 MULTIVECTOR_USE_MERGE_PATH, VECTOR_DEVICE_THRESHOLD,
122 HIERARCHICAL_UNPACK_BATCH_SIZE, HIERARCHICAL_UNPACK_TEAM_SIZE,
123 USE_TEUCHOS_TIMERS, USE_KOKKOS_PROFILING, DEBUG, VERBOSE, TIMING,
124 HIERARCHICAL_UNPACK, SKIP_COPY_AND_PERMUTE, FUSED_RESIDUAL, OVERLAP,
125 DEFAULT_SEND_TYPE, GRANULAR_TRANSFERS,
126 SPACES_ID_WARN_LIMIT, TIME_KOKKOS_DEEP_COPY, TIME_KOKKOS_DEEP_COPY_VERBOSE1,
127 TIME_KOKKOS_DEEP_COPY_VERBOSE2, TIME_KOKKOS_FENCE, TIME_KOKKOS_FUNCTIONS);
129std::map<std::string, std::map<std::string, bool> > namedVariableMap_;
130bool verboseDisabled_ =
false;
131bool timingDisabled_ =
false;
136void split(
const std::string_view s,
137 std::function<
void(
const std::string &)> f,
138 const char sep =
',') {
139 typedef std::string::size_type size_type;
140 size_type cur_pos, last_pos = 0, length = s.length();
141 while (last_pos < length + 1) {
142 cur_pos = s.find_first_of(sep, last_pos);
143 if (cur_pos == std::string::npos) {
146 if (cur_pos != last_pos) {
147 auto token = std::string(s.data() + last_pos, (size_type)cur_pos - last_pos);
150 last_pos = cur_pos + 1;
155constexpr bool debugDefault() {
156#ifdef HAVE_TPETRA_DEBUG
163constexpr bool verboseDefault() {
167constexpr bool timingDefault() {
171constexpr bool assumeMpiIsGPUAwareDefault() {
172#ifdef TPETRA_ASSUME_GPU_AWARE_MPI
179constexpr bool cudaLaunchBlockingDefault() {
183constexpr bool hierarchicalUnpackDefault() {
190 static bool once =
false;
193 const char prefix[] =
"Tpetra::Details::Behavior: ";
195#if defined(WIN) && (_MSC_VER >= 1900)
203 const std::string_view
ev(*
env);
208 [&](
const std::string &
s) {
217 if (name.size() >= BehaviorDetails::RESERVED_PREFIX.size() &&
218 name.substr(0, BehaviorDetails::RESERVED_PREFIX.size()) ==
219 BehaviorDetails::RESERVED_PREFIX) {
220 const auto it = std::find(BehaviorDetails::RECOGNIZED_VARS.
begin(),
221 BehaviorDetails::RECOGNIZED_VARS.
end(), name);
223 it == BehaviorDetails::RECOGNIZED_VARS.
end(), std::out_of_range,
226 << name <<
"\" (prefixed with \""
227 << BehaviorDetails::RESERVED_PREFIX
228 <<
"\") is not a recognized Tpetra variable.");
240 static bool initialized_ =
false;
241 return Teuchos::idempotentlyGetEnvironmentVariable(
246 if (BehaviorDetails::verboseDisabled_)
252 static bool initialized_ =
false;
253 return Teuchos::idempotentlyGetEnvironmentVariable(
258 if (BehaviorDetails::timingDisabled_)
264 static bool initialized_ =
false;
265 return Teuchos::idempotentlyGetEnvironmentVariable(
273 static bool initialized_ =
false;
274 return Teuchos::idempotentlyGetEnvironmentVariable(
275 value_, initialized_, BehaviorDetails::ASSUME_GPU_AWARE_MPI,
283 static bool initialized_ =
false;
284 return Teuchos::idempotentlyGetEnvironmentVariable(
285 value_, initialized_, BehaviorDetails::CUDA_LAUNCH_BLOCKING,
292 static bool initialized_ =
false;
293 return Teuchos::idempotentlyGetEnvironmentVariable(
294 value_, initialized_, BehaviorDetails::MM_TAFC_OptimizationCoreCount,
302 static bool initialized_ =
false;
303 return Teuchos::idempotentlyGetEnvironmentVariable(
304 value_, initialized_, BehaviorDetails::VERBOSE_PRINT_COUNT_THRESHOLD,
312 static bool initialized_ =
false;
313 return Teuchos::idempotentlyGetEnvironmentVariable(
314 value_, initialized_, BehaviorDetails::ROW_IMBALANCE_THRESHOLD,
322 static bool initialized_ =
false;
323 return Teuchos::idempotentlyGetEnvironmentVariable(
324 value_, initialized_, BehaviorDetails::MULTIVECTOR_USE_MERGE_PATH,
332 static bool initialized_ =
false;
333 return Teuchos::idempotentlyGetEnvironmentVariable(
334 value_, initialized_, BehaviorDetails::VECTOR_DEVICE_THRESHOLD,
339#ifdef HAVE_TPETRA_INST_CUDA
346 static bool initialized_ =
false;
347 return Teuchos::idempotentlyGetEnvironmentVariable(
348 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_BATCH_SIZE,
353#ifdef HAVE_TPETRA_INST_CUDA
356 const size_t defaultValue(Teuchos::OrdinalTraits<size_t>::invalid());
360 static bool initialized_ =
false;
361 return Teuchos::idempotentlyGetEnvironmentVariable(
362 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_TEAM_SIZE,
370 static bool initialized_ =
false;
371 return Teuchos::idempotentlyGetEnvironmentVariable(
379 static bool initialized_ =
false;
380 return Teuchos::idempotentlyGetEnvironmentVariable(
381 value_, initialized_, BehaviorDetails::USE_KOKKOS_PROFILING,
388 static bool initialized_ =
false;
389 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
394 if (BehaviorDetails::verboseDisabled_)
399 static bool initialized_ =
false;
400 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
405 BehaviorDetails::verboseDisabled_ =
false;
409 BehaviorDetails::verboseDisabled_ =
true;
413 if (BehaviorDetails::timingDisabled_)
418 static bool initialized_ =
false;
419 return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
431 static bool initialized_ =
false;
432 return Teuchos::idempotentlyGetEnvironmentVariable(
440 static bool initialized_ =
false;
441 return Teuchos::idempotentlyGetEnvironmentVariable(
442 value_, initialized_, BehaviorDetails::SKIP_COPY_AND_PERMUTE,
447#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || \
448 defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || \
449 defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
456 static bool initialized_ =
false;
457 return Teuchos::idempotentlyGetEnvironmentVariable(
465 static bool initialized_ =
false;
466 return Teuchos::idempotentlyGetEnvironmentVariable(
474 static bool initialized_ =
false;
475 return Teuchos::idempotentlyGetEnvironmentVariable(
483 static bool initialized_ =
false;
484 return Teuchos::idempotentlyGetEnvironmentVariable(
492 static bool initialized_ =
false;
493 return Teuchos::idempotentlyGetEnvironmentVariable(
494 value_, initialized_, BehaviorDetails::SPACES_ID_WARN_LIMIT,
502 static bool initialized_ =
false;
503 return Teuchos::idempotentlyGetEnvironmentVariable(
504 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY,
512 static bool initialized_ =
false;
513 return Teuchos::idempotentlyGetEnvironmentVariable(
514 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE1,
522 static bool initialized_ =
false;
523 return Teuchos::idempotentlyGetEnvironmentVariable(
524 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE2,
532 static bool initialized_ =
false;
533 return Teuchos::idempotentlyGetEnvironmentVariable(
541 static bool initialized_ =
false;
542 return Teuchos::idempotentlyGetEnvironmentVariable(
543 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FUNCTIONS,
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Struct that holds views of the contents of a CrsMatrix.
static bool timing()
Whether Tpetra is in timing mode.
static void enable_verbose_behavior()
Enable verbose mode, programatically.
static void disable_timing()
Disable timing, programatically.
static bool cudaLaunchBlocking()
Whether the CUDA_LAUNCH_BLOCKING environment variable has been set.
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size....
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
static bool hierarchicalUnpack()
Unpack rows of a matrix using hierarchical unpacking.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static bool debug()
Whether Tpetra is in debug mode.
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
static bool enableGranularTransfers()
Speed up transfers by overlapping computation and communication.
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
static void enable_timing()
Enable timing, programatically.
static bool profilingRegionUseTeuchosTimers()
Use Teuchos::Timer in Tpetra::ProfilingRegion.
static bool profilingRegionUseKokkosProfiling()
Use Kokkos::Profiling in Tpetra::ProfilingRegion.
static bool verbose()
Whether Tpetra is in verbose mode.
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
static bool useMergePathMultiVector()
Whether to use the cuSPARSE merge path algorithm to perform sparse matrix-multivector products,...
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination....
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static void disable_verbose_behavior()
Disable verbose mode, programatically.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row....
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
static std::string defaultSendType()
Default send type.
Implementation details of Tpetra.
Namespace Tpetra contains the class and methods constituting the Tpetra library.