10#include "Teuchos_EnvVariables.hpp" 
   11#include "Teuchos_OrdinalTraits.hpp" 
   12#include "Teuchos_TestForException.hpp" 
   13#include "TpetraCore_config.h" 
   15#include "KokkosKernels_config.h"   
   55#if not(defined(WIN) && (_MSC_VER >= 1900)) 
   63namespace BehaviorDetails {
 
   65constexpr const std::string_view RESERVED_PREFIX = 
"TPETRA_";
 
   66constexpr const std::string_view ASSUME_GPU_AWARE_MPI =
 
   67    "TPETRA_ASSUME_GPU_AWARE_MPI";
 
   68constexpr const std::string_view CUDA_LAUNCH_BLOCKING = 
"CUDA_LAUNCH_BLOCKING";
 
   69constexpr const std::string_view MM_TAFC_OptimizationCoreCount =
 
   70    "MM_TAFC_OptimizationCoreCount";
 
   71constexpr const std::string_view VERBOSE_PRINT_COUNT_THRESHOLD =
 
   72    "TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD";
 
   73constexpr const std::string_view ROW_IMBALANCE_THRESHOLD =
 
   74    "TPETRA_ROW_IMBALANCE_THRESHOLD";
 
   75constexpr const std::string_view MULTIVECTOR_USE_MERGE_PATH =
 
   76    "TPETRA_MULTIVECTOR_USE_MERGE_PATH";
 
   77constexpr const std::string_view VECTOR_DEVICE_THRESHOLD =
 
   78    "TPETRA_VECTOR_DEVICE_THRESHOLD";
 
   79constexpr const std::string_view HIERARCHICAL_UNPACK_BATCH_SIZE =
 
   80    "TPETRA_HIERARCHICAL_UNPACK_BATCH_SIZE";
 
   81constexpr const std::string_view HIERARCHICAL_UNPACK_TEAM_SIZE =
 
   82    "TPETRA_HIERARCHICAL_UNPACK_TEAM_SIZE";
 
   83constexpr const std::string_view USE_TEUCHOS_TIMERS =
 
   84    "TPETRA_USE_TEUCHOS_TIMERS";
 
   85constexpr const std::string_view USE_KOKKOS_PROFILING =
 
   86    "TPETRA_USE_KOKKOS_PROFILING";
 
   87constexpr const std::string_view DEBUG   = 
"TPETRA_DEBUG";
 
   88constexpr const std::string_view VERBOSE = 
"TPETRA_VERBOSE";
 
   89constexpr const std::string_view TIMING  = 
"TPETRA_TIMING";
 
   90constexpr const std::string_view HIERARCHICAL_UNPACK =
 
   91    "TPETRA_HIERARCHICAL_UNPACK";
 
   92constexpr const std::string_view SKIP_COPY_AND_PERMUTE =
 
   93    "TPETRA_SKIP_COPY_AND_PERMUTE";
 
   94constexpr const std::string_view FUSED_RESIDUAL     = 
"TPETRA_FUSED_RESIDUAL";
 
   95constexpr const std::string_view OVERLAP            = 
"TPETRA_OVERLAP";
 
   96constexpr const std::string_view DEFAULT_SEND_TYPE  = 
"TPETRA_DEFAULT_SEND_TYPE";
 
   97constexpr const std::string_view GRANULAR_TRANSFERS = 
"TPETRA_GRANULAR_TRANSFERS";
 
   98constexpr const std::string_view SPACES_ID_WARN_LIMIT =
 
   99    "TPETRA_SPACES_ID_WARN_LIMIT";
 
  100constexpr const std::string_view TIME_KOKKOS_DEEP_COPY =
 
  101    "TPETRA_TIME_KOKKOS_DEEP_COPY";
 
  102constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE1 =
 
  103    "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE1";
 
  104constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE2 =
 
  105    "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE2";
 
  106constexpr const std::string_view TIME_KOKKOS_FENCE = 
"TPETRA_TIME_KOKKOS_FENCE";
 
  107constexpr const std::string_view TIME_KOKKOS_FUNCTIONS =
 
  108    "TPETRA_TIME_KOKKOS_FUNCTIONS";
 
  112template <
typename... Elems>
 
  113constexpr std::array<std::string_view, 
sizeof...(Elems)>
 
  114make_array(Elems &&...elems) {
 
  115  return {std::forward<Elems>(elems)...};
 
  118constexpr const auto RECOGNIZED_VARS = make_array(
 
  119    ASSUME_GPU_AWARE_MPI, CUDA_LAUNCH_BLOCKING, MM_TAFC_OptimizationCoreCount,
 
  120    VERBOSE_PRINT_COUNT_THRESHOLD, ROW_IMBALANCE_THRESHOLD,
 
  121    MULTIVECTOR_USE_MERGE_PATH, VECTOR_DEVICE_THRESHOLD,
 
  122    HIERARCHICAL_UNPACK_BATCH_SIZE, HIERARCHICAL_UNPACK_TEAM_SIZE,
 
  123    USE_TEUCHOS_TIMERS, USE_KOKKOS_PROFILING, DEBUG, VERBOSE, TIMING,
 
  124    HIERARCHICAL_UNPACK, SKIP_COPY_AND_PERMUTE, FUSED_RESIDUAL, OVERLAP,
 
  125    DEFAULT_SEND_TYPE, GRANULAR_TRANSFERS,
 
  126    SPACES_ID_WARN_LIMIT, TIME_KOKKOS_DEEP_COPY, TIME_KOKKOS_DEEP_COPY_VERBOSE1,
 
  127    TIME_KOKKOS_DEEP_COPY_VERBOSE2, TIME_KOKKOS_FENCE, TIME_KOKKOS_FUNCTIONS);
 
  129std::map<std::string, std::map<std::string, bool> > namedVariableMap_;
 
  130bool verboseDisabled_ = 
false;
 
  131bool timingDisabled_  = 
false;
 
  136void split(
const std::string_view s,
 
  137           std::function<
void(
const std::string &)> f,
 
  138           const char sep = 
',') {
 
  139  typedef std::string::size_type size_type;
 
  140  size_type cur_pos, last_pos = 0, length = s.length();
 
  141  while (last_pos < length + 1) {
 
  142    cur_pos = s.find_first_of(sep, last_pos);
 
  143    if (cur_pos == std::string::npos) {
 
  146    if (cur_pos != last_pos) {
 
  147      auto token = std::string(s.data() + last_pos, (size_type)cur_pos - last_pos);
 
  150    last_pos = cur_pos + 1;
 
  155constexpr bool debugDefault() {
 
  156#ifdef HAVE_TPETRA_DEBUG 
  163constexpr bool verboseDefault() {
 
  167constexpr bool timingDefault() {
 
  171constexpr bool assumeMpiIsGPUAwareDefault() {
 
  172#ifdef TPETRA_ASSUME_GPU_AWARE_MPI 
  179constexpr bool cudaLaunchBlockingDefault() {
 
  183constexpr bool hierarchicalUnpackDefault() {
 
  190  static bool once = 
false;
 
  193    const char prefix[] = 
"Tpetra::Details::Behavior: ";
 
  195#if defined(WIN) && (_MSC_VER >= 1900) 
  203      const std::string_view 
ev(*
env);
 
  208          [&](
const std::string &
s) {
 
  217      if (name.size() >= BehaviorDetails::RESERVED_PREFIX.size() &&
 
  218          name.substr(0, BehaviorDetails::RESERVED_PREFIX.size()) ==
 
  219              BehaviorDetails::RESERVED_PREFIX) {
 
  220        const auto it = std::find(BehaviorDetails::RECOGNIZED_VARS.
begin(),
 
  221                                  BehaviorDetails::RECOGNIZED_VARS.
end(), name);
 
  223            it == BehaviorDetails::RECOGNIZED_VARS.
end(), std::out_of_range,
 
  226                   << name << 
"\" (prefixed with \"" 
  227                   << BehaviorDetails::RESERVED_PREFIX
 
  228                   << 
"\") is not a recognized Tpetra variable.");
 
 
  240  static bool initialized_ = 
false;
 
  241  return Teuchos::idempotentlyGetEnvironmentVariable(
 
 
  246  if (BehaviorDetails::verboseDisabled_)
 
  252  static bool initialized_ = 
false;
 
  253  return Teuchos::idempotentlyGetEnvironmentVariable(
 
 
  258  if (BehaviorDetails::timingDisabled_)
 
  264  static bool initialized_ = 
false;
 
  265  return Teuchos::idempotentlyGetEnvironmentVariable(
 
 
  273  static bool initialized_ = 
false;
 
  274  return Teuchos::idempotentlyGetEnvironmentVariable(
 
  275      value_, initialized_, BehaviorDetails::ASSUME_GPU_AWARE_MPI,
 
 
  283  static bool initialized_ = 
false;
 
  284  return Teuchos::idempotentlyGetEnvironmentVariable(
 
  285      value_, initialized_, BehaviorDetails::CUDA_LAUNCH_BLOCKING,
 
 
  292  static bool initialized_ = 
false;
 
  293  return Teuchos::idempotentlyGetEnvironmentVariable(
 
  294      value_, initialized_, BehaviorDetails::MM_TAFC_OptimizationCoreCount,
 
 
  302  static bool initialized_ = 
false;
 
  303  return Teuchos::idempotentlyGetEnvironmentVariable(
 
  304      value_, initialized_, BehaviorDetails::VERBOSE_PRINT_COUNT_THRESHOLD,
 
 
  312  static bool initialized_ = 
false;
 
  313  return Teuchos::idempotentlyGetEnvironmentVariable(
 
  314      value_, initialized_, BehaviorDetails::ROW_IMBALANCE_THRESHOLD,
 
 
  322  static bool initialized_ = 
false;
 
  323  return Teuchos::idempotentlyGetEnvironmentVariable(
 
  324      value_, initialized_, BehaviorDetails::MULTIVECTOR_USE_MERGE_PATH,
 
 
  332  static bool initialized_ = 
false;
 
  333  return Teuchos::idempotentlyGetEnvironmentVariable(
 
  334      value_, initialized_, BehaviorDetails::VECTOR_DEVICE_THRESHOLD,
 
 
  339#ifdef HAVE_TPETRA_INST_CUDA 
  346  static bool initialized_ = 
false;
 
  347  return Teuchos::idempotentlyGetEnvironmentVariable(
 
  348      value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_BATCH_SIZE,
 
 
  353#ifdef HAVE_TPETRA_INST_CUDA 
  356  const size_t defaultValue(Teuchos::OrdinalTraits<size_t>::invalid());
 
  360  static bool initialized_ = 
false;
 
  361  return Teuchos::idempotentlyGetEnvironmentVariable(
 
  362      value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_TEAM_SIZE,
 
 
  370  static bool initialized_ = 
false;
 
  371  return Teuchos::idempotentlyGetEnvironmentVariable(
 
 
  379  static bool initialized_ = 
false;
 
  380  return Teuchos::idempotentlyGetEnvironmentVariable(
 
  381      value_, initialized_, BehaviorDetails::USE_KOKKOS_PROFILING,
 
 
  388  static bool initialized_ = 
false;
 
  389  return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
 
 
  394  if (BehaviorDetails::verboseDisabled_)
 
  399  static bool initialized_ = 
false;
 
  400  return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
 
 
  405  BehaviorDetails::verboseDisabled_ = 
false;
 
 
  409  BehaviorDetails::verboseDisabled_ = 
true;
 
 
  413  if (BehaviorDetails::timingDisabled_)
 
  418  static bool initialized_ = 
false;
 
  419  return Teuchos::idempotentlyGetNamedEnvironmentVariableAsBool(
 
 
  431  static bool initialized_ = 
false;
 
  432  return Teuchos::idempotentlyGetEnvironmentVariable(
 
 
  440  static bool initialized_ = 
false;
 
  441  return Teuchos::idempotentlyGetEnvironmentVariable(
 
  442      value_, initialized_, BehaviorDetails::SKIP_COPY_AND_PERMUTE,
 
 
  447#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) ||  \ 
  448    defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || \ 
  449    defined(KOKKOSKERNELS_ENABLE_TPL_MKL) 
  456  static bool initialized_ = 
false;
 
  457  return Teuchos::idempotentlyGetEnvironmentVariable(
 
 
  465  static bool initialized_ = 
false;
 
  466  return Teuchos::idempotentlyGetEnvironmentVariable(
 
 
  474  static bool initialized_  = 
false;
 
  475  return Teuchos::idempotentlyGetEnvironmentVariable(
 
 
  483  static bool initialized_ = 
false;
 
  484  return Teuchos::idempotentlyGetEnvironmentVariable(
 
 
  492  static bool initialized_ = 
false;
 
  493  return Teuchos::idempotentlyGetEnvironmentVariable(
 
  494      value_, initialized_, BehaviorDetails::SPACES_ID_WARN_LIMIT,
 
 
  502  static bool initialized_ = 
false;
 
  503  return Teuchos::idempotentlyGetEnvironmentVariable(
 
  504      value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY,
 
 
  512  static bool initialized_ = 
false;
 
  513  return Teuchos::idempotentlyGetEnvironmentVariable(
 
  514      value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE1,
 
 
  522  static bool initialized_ = 
false;
 
  523  return Teuchos::idempotentlyGetEnvironmentVariable(
 
  524      value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE2,
 
 
  532  static bool initialized_ = 
false;
 
  533  return Teuchos::idempotentlyGetEnvironmentVariable(
 
 
  541  static bool initialized_ = 
false;
 
  542  return Teuchos::idempotentlyGetEnvironmentVariable(
 
  543      value_, initialized_, BehaviorDetails::TIME_KOKKOS_FUNCTIONS,
 
 
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
 
Struct that holds views of the contents of a CrsMatrix.
 
static bool timing()
Whether Tpetra is in timing mode.
 
static void enable_verbose_behavior()
Enable verbose mode, programatically.
 
static void disable_timing()
Disable timing, programatically.
 
static bool cudaLaunchBlocking()
Whether the CUDA_LAUNCH_BLOCKING environment variable has been set.
 
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size....
 
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
 
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
 
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
 
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
 
static bool hierarchicalUnpack()
Unpack rows of a matrix using hierarchical unpacking.
 
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
 
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
 
static bool debug()
Whether Tpetra is in debug mode.
 
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
 
static bool enableGranularTransfers()
Speed up transfers by overlapping computation and communication.
 
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
 
static void enable_timing()
Enable timing, programatically.
 
static bool profilingRegionUseTeuchosTimers()
Use Teuchos::Timer in Tpetra::ProfilingRegion.
 
static bool profilingRegionUseKokkosProfiling()
Use Kokkos::Profiling in Tpetra::ProfilingRegion.
 
static bool verbose()
Whether Tpetra is in verbose mode.
 
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
 
static bool useMergePathMultiVector()
Whether to use the cuSPARSE merge path algorithm to perform sparse matrix-multivector products,...
 
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
 
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
 
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination....
 
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
 
static void disable_verbose_behavior()
Disable verbose mode, programatically.
 
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row....
 
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
 
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
 
static std::string defaultSendType()
Default send type.
 
Implementation details of Tpetra.
 
Namespace Tpetra contains the class and methods constituting the Tpetra library.