10#ifndef TPETRA_MATRIXMATRIX_DEF_HPP 
   11#define TPETRA_MATRIXMATRIX_DEF_HPP 
   13#include "KokkosSparse_Utils.hpp" 
   14#include "Tpetra_ConfigDefs.hpp" 
   16#include "Teuchos_VerboseObject.hpp" 
   17#include "Teuchos_Array.hpp" 
   19#include "Tpetra_CrsMatrix.hpp" 
   20#include "Tpetra_BlockCrsMatrix.hpp" 
   22#include "Tpetra_RowMatrixTransposer.hpp" 
   25#include "Tpetra_Details_makeColMap.hpp" 
   26#include "Tpetra_ConfigDefs.hpp" 
   27#include "Tpetra_Map.hpp" 
   28#include "Tpetra_Export.hpp" 
   35#include "Teuchos_FancyOStream.hpp" 
   37#include "TpetraExt_MatrixMatrix_ExtraKernels_def.hpp" 
   40#include "KokkosSparse_spgemm.hpp" 
   41#include "KokkosSparse_spadd.hpp" 
   42#include "Kokkos_Bitset.hpp" 
   54#include "TpetraExt_MatrixMatrix_OpenMP.hpp" 
   55#include "TpetraExt_MatrixMatrix_Cuda.hpp" 
   56#include "TpetraExt_MatrixMatrix_HIP.hpp" 
   57#include "TpetraExt_MatrixMatrix_SYCL.hpp" 
   61namespace MatrixMatrix {
 
   69template <
class Scalar,
 
   80    const std::string& label,
 
   81    const Teuchos::RCP<Teuchos::ParameterList>& 
params) {
 
   97  const std::string 
prefix = 
"TpetraExt::MatrixMatrix::Multiply(): ";
 
  122  const bool newFlag = !
C.getGraph()->isLocallyIndexed() && !
C.getGraph()->isGloballyIndexed();
 
  128#ifdef USE_OLD_TRANSPOSE   
  132  using Teuchos::ParameterList;
 
  158                             prefix << 
"ERROR, inner dimensions of op(A) and op(B) " 
  159                                       "must match for matrix-matrix product. op(A) is " 
  167                             prefix << 
"ERROR, dimensions of result C must " 
  168                                       "match dimensions of op(A) * op(B). C has " 
  169                                    << 
C.getGlobalNumRows()
 
  170                                    << 
" rows, should have at least " << 
Aouter << std::endl);
 
  178  if (!
C.isFillActive()) 
C.resumeFill();
 
  220    MMdetails::mult_AT_B_newmatrix(
A, 
B, 
C, label, 
params);
 
  246    C.fillComplete(
Bprime->getDomainMap(), 
Aprime->getRangeMap());
 
 
  264    const std::string& label) {
 
  276  std::string 
prefix = std::string(
"TpetraExt ") + label + std::string(
": ");
 
  292                             prefix << 
"ERROR, inner dimensions of op(A) and op(B) " 
  293                                       "must match for matrix-matrix product. op(A) is " 
  300  const LO blocksize = 
A->getBlockSize();
 
  302                             prefix << 
"ERROR, Blocksizes do not match. A.blocksize = " << blocksize << 
", B.blocksize = " << 
B->getBlockSize());
 
  322  MMdetails::import_and_extract_views(*
B, 
targetMap_B, 
Bview, 
A->getGraph()->getImporter(),
 
  323                                      A->getGraph()->getImporter().is_null());
 
 
  339            const std::string& label,
 
  340            const Teuchos::RCP<Teuchos::ParameterList>& 
params) {
 
  354  const std::string 
prefix = 
"TpetraExt::MatrixMatrix::Jacobi(): ";
 
  373                             prefix << 
"ERROR, inner dimensions of op(A) and op(B) " 
  374                                       "must match for matrix-matrix product. op(A) is " 
  382                             prefix << 
"ERROR, dimensions of result C must " 
  383                                       "match dimensions of op(A) * op(B). C has " 
  384                                    << 
C.getGlobalNumRows()
 
  385                                    << 
" rows, should have at least " << 
Aouter << std::endl);
 
  413      importParams1->set(
"compute global constants", 
params->get(
"compute global constants: temporaries", 
false));
 
  415      auto slist                      = 
params->sublist(
"matrixmatrix: kernel params", 
false);
 
  419      bool isMM              = 
slist.get(
"isMatrixMatrix_TransferAndFillComplete", 
false);
 
  423      ip1slist.set(
"isMatrixMatrix_TransferAndFillComplete", 
isMM);
 
  441      importParams2->set(
"compute global constants", 
params->get(
"compute global constants: temporaries", 
false));
 
  443      auto slist                      = 
params->sublist(
"matrixmatrix: kernel params", 
false);
 
  445      bool isMM                       = 
slist.get(
"isMatrixMatrix_TransferAndFillComplete", 
false);
 
  451      ip2slist.set(
"isMatrixMatrix_TransferAndFillComplete", 
isMM);
 
  464  bool newFlag = !
C.getGraph()->isLocallyIndexed() && !
C.getGraph()->isGloballyIndexed();
 
  479      typedef Teuchos::ScalarTraits<Scalar> STS;
 
  480      typename STS::magnitudeType 
threshold = 
params->get(
"remove zeros threshold", STS::magnitude(STS::zero()));
 
 
  496  using Teuchos::Array;
 
  506  const std::string 
prefix = 
"TpetraExt::MatrixMatrix::Add(): ";
 
  509                             prefix << 
"ERROR, input matrix A.isFillComplete() is false; it is required to be true. " 
  510                                       "(Result matrix B is not required to be isFillComplete()).");
 
  512                             prefix << 
"ERROR, input matrix B must not be fill complete!");
 
  514                             prefix << 
"ERROR, input matrix B must not have static graph!");
 
  516                             prefix << 
"ERROR, input matrix B must not be locally indexed!");
 
  518  using Teuchos::ParameterList;
 
  531  typename crs_matrix_type::nonconst_global_inds_host_view_type 
a_inds(
"a_inds", 
A.getLocalMaxNumRowEntries());
 
  532  typename crs_matrix_type::nonconst_values_host_view_type 
a_vals(
"a_vals", 
A.getLocalMaxNumRowEntries());
 
  535  if (
scalarB != Teuchos::ScalarTraits<SC>::one())
 
  539  if (
scalarA != Teuchos::ScalarTraits<SC>::zero()) {
 
  541      row = 
B.getRowMap()->getGlobalElement(
i);
 
  544      if (
scalarA != Teuchos::ScalarTraits<SC>::one()) {
 
 
  557Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
 
  566    const Teuchos::RCP<Teuchos::ParameterList>& 
params) {
 
  567  using Teuchos::ParameterList;
 
  570  using Teuchos::rcpFromRef;
 
  574        params->isParameter(
"Call fillComplete") && !
params->get<
bool>(
"Call fillComplete"),
 
  575        std::invalid_argument,
 
  576        "Tpetra::MatrixMatrix::add(): this version of add() always calls fillComplete\n" 
  577        "on the result, but you explicitly set 'Call fillComplete' = false in the parameter list. Don't set this explicitly.");
 
  578    params->set(
"Call fillComplete", 
true);
 
  589                             "TpetraExt::MatrixMatrix::add(): A and B must both be fill complete.");
 
 
  600template <
class LO, 
class GO, 
class LOView, 
class GOView, 
class LocalMap>
 
  601struct ConvertGlobalToLocalFunctor {
 
  607  KOKKOS_FUNCTION 
void operator()(
const GO i)
 const {
 
  608    lids(i) = localColMap.getLocalElement(gids(i));
 
  613  const LocalMap localColMap;
 
  616template <
class Scalar,
 
  629         const Teuchos::RCP<Teuchos::ParameterList>& 
params) {
 
  632  using Teuchos::rcp_dynamic_cast;
 
  633  using Teuchos::rcp_implicit_cast;
 
  634  using Teuchos::rcpFromRef;
 
  635  using Teuchos::TimeMonitor;
 
  646  using exec_space       = 
typename crs_graph_type::execution_space;
 
  647  using AddKern          = MMdetails::AddKernels<SC, LO, GO, NO>;
 
  648  const char* 
prefix_mmm = 
"TpetraExt::MatrixMatrix::add: ";
 
  649  constexpr bool debug   = 
false;
 
  654    std::ostringstream 
os;
 
  655    os << 
"Proc " << 
A.getMap()->getComm()->getRank() << 
": " 
  656       << 
"TpetraExt::MatrixMatrix::add" << std::endl;
 
  657    std::cerr << 
os.str();
 
  661                             prefix_mmm << 
"C must be a 'new' matrix (neither locally nor globally indexed).");
 
  663                             prefix_mmm << 
"A and B must both be fill complete.");
 
  664#ifdef HAVE_TPETRA_DEBUG 
  666  if (
A.isFillComplete() && 
B.isFillComplete()) {
 
  669         !
A.getDomainMap()->locallySameAs(*
B.getDomainMap())) ||
 
  671         !
A.getDomainMap()->isSameAs(*
B.getRangeMap())) ||
 
  673         !
A.getRangeMap()->isSameAs(*
B.getDomainMap()));
 
  675                               prefix_mmm << 
"The domain Maps of Op(A) and Op(B) are not the same.");
 
  679         !
A.getRangeMap()->isSameAs(*
B.getRangeMap())) ||
 
  681         !
A.getRangeMap()->isSameAs(*
B.getDomainMap())) ||
 
  683         !
A.getDomainMap()->isSameAs(*
B.getRangeMap()));
 
  685                               prefix_mmm << 
"The range Maps of Op(A) and Op(B) are not the same.");
 
  689  using Teuchos::ParameterList;
 
  697#ifdef HAVE_TPETRA_DEBUG 
  700                                           "Please report this bug to the Tpetra developers.");
 
  707      std::ostringstream 
os;
 
  708      os << 
"Proc " << 
A.getMap()->getComm()->getRank() << 
": " 
  709         << 
"Form explicit xpose of B" << std::endl;
 
  710      std::cerr << 
os.str();
 
  715#ifdef HAVE_TPETRA_DEBUG 
  717                             prefix_mmm << 
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
 
  719      !
Aprime->isFillComplete() || !
Bprime->isFillComplete(), std::invalid_argument,
 
  720      prefix_mmm << 
"Aprime and Bprime must both be fill complete.  " 
  721                    "Please report this bug to the Tpetra developers.");
 
  733  typedef typename AddKern::values_array values_array;
 
  734  typedef typename AddKern::row_ptrs_array row_ptrs_array;
 
  735  typedef typename AddKern::col_inds_array col_inds_array;
 
  745  if (!(
Aprime->getRowMap()->isSameAs(*(
Bprime->getRowMap())))) {
 
  747    auto import = rcp(new import_type(Aprime->getRowMap(), Bprime->getRowMap()));
 
  759  if (Teuchos::nonnull(
params) && 
params->isParameter(
"Call fillComplete")) {
 
  771    rowptrs = row_ptrs_array(
"C rowptrs", 0);
 
  776    using global_col_inds_array = 
typename AddKern::global_col_inds_array;
 
  785      std::ostringstream 
os;
 
  786      os << 
"Proc " << 
A.getMap()->getComm()->getRank() << 
": " 
  787         << 
"Call AddKern::convertToGlobalAndAdd(...)" << std::endl;
 
  788      std::cerr << 
os.str();
 
  790    AddKern::convertToGlobalAndAdd(
 
  794      std::ostringstream 
os;
 
  795      os << 
"Proc " << 
A.getMap()->getComm()->getRank() << 
": " 
  796         << 
"Finished AddKern::convertToGlobalAndAdd(...)" << std::endl;
 
  797      std::cerr << 
os.str();
 
  806    Kokkos::parallel_for(Kokkos::RangePolicy<exec_space>(0, 
globalColinds.extent(0)),
 
  808                                                     col_inds_array, global_col_inds_array,
 
  810    KokkosSparse::sort_crs_matrix<exec_space, row_ptrs_array, col_inds_array, values_array>(
rowptrs, 
localColinds, 
vals);
 
  819    auto Arowptrs = 
Alocal.graph.row_map;
 
  820    auto Browptrs = 
Blocal.graph.row_map;
 
  821    auto Acolinds = 
Alocal.graph.entries;
 
  822    auto Bcolinds = 
Blocal.graph.entries;
 
  828        std::ostringstream 
os;
 
  829        os << 
"Proc " << 
A.getMap()->getComm()->getRank() << 
": " 
  830           << 
"Call AddKern::addSorted(...)" << std::endl;
 
  831        std::cerr << 
os.str();
 
  833      AddKern::addSorted(
Avals, Arowptrs, Acolinds, 
alpha, 
Bvals, Browptrs, Bcolinds, 
beta, 
Aprime->getGlobalNumCols(), 
vals, 
rowptrs, 
colinds);
 
  840        std::ostringstream 
os;
 
  841        os << 
"Proc " << 
A.getMap()->getComm()->getRank() << 
": " 
  842           << 
"Call AddKern::addUnsorted(...)" << std::endl;
 
  843        std::cerr << 
os.str();
 
  845      AddKern::addUnsorted(
Avals, Arowptrs, Acolinds, 
alpha, 
Bvals, Browptrs, Bcolinds, 
beta, 
Aprime->getGlobalNumCols(), 
vals, 
rowptrs, 
colinds);
 
  856          std::ostringstream 
os;
 
  857          os << 
"Proc " << 
A.getMap()->getComm()->getRank() << 
": " 
  858             << 
"Create Cimport" << std::endl;
 
  859          std::cerr << 
os.str();
 
  865          std::ostringstream 
os;
 
  866          os << 
"Proc " << 
A.getMap()->getComm()->getRank() << 
": " 
  867             << 
"Create Cexport" << std::endl;
 
  868          std::cerr << 
os.str();
 
  874        std::ostringstream 
os;
 
  875        os << 
"Proc " << 
A.getMap()->getComm()->getRank() << 
": " 
  876           << 
"Call C->expertStaticFillComplete(...)" << std::endl;
 
  877        std::cerr << 
os.str();
 
 
  899  using Teuchos::Array;
 
  900  using Teuchos::ArrayRCP;
 
  901  using Teuchos::ArrayView;
 
  904  using Teuchos::rcp_dynamic_cast;
 
  905  using Teuchos::rcpFromRef;
 
  906  using Teuchos::tuple;
 
  908  typedef Teuchos::ScalarTraits<Scalar> STS;
 
  916  std::string 
prefix = 
"TpetraExt::MatrixMatrix::Add(): ";
 
  919      !
A.isFillComplete() || !
B.isFillComplete(), std::invalid_argument,
 
  920      prefix << 
"A and B must both be fill complete before calling this function.");
 
  924                               prefix << 
"C is null (must be allocated), but A.haveGlobalConstants() is false. " 
  925                                         "Please report this bug to the Tpetra developers.");
 
  927                               prefix << 
"C is null (must be allocated), but B.haveGlobalConstants() is false. " 
  928                                         "Please report this bug to the Tpetra developers.");
 
  931#ifdef HAVE_TPETRA_DEBUG 
  938                               prefix << 
"The domain Maps of Op(A) and Op(B) are not the same.");
 
  945                               prefix << 
"The range Maps of Op(A) and Op(B) are not the same.");
 
  949  using Teuchos::ParameterList;
 
  962#ifdef HAVE_TPETRA_DEBUG 
  964                             prefix << 
"Failed to compute Op(A). Please report this bug to the Tpetra developers.");
 
  976#ifdef HAVE_TPETRA_DEBUG 
  978                             prefix << 
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
 
  988    C->setAllToScalar(STS::zero());
 
  998    if (
Aprime->getRowMap()->isSameAs(*
Bprime->getRowMap())) {
 
 1007      C = 
rcp(
new crs_matrix_type(
Aprime->getRowMap(), 
Aprime->getGlobalMaxNumRowEntries() + 
Bprime->getGlobalMaxNumRowEntries()));
 
 1011#ifdef HAVE_TPETRA_DEBUG 
 1013                             prefix << 
"At this point, Aprime is null. Please report this bug to the Tpetra developers.");
 
 1015                             prefix << 
"At this point, Bprime is null. Please report this bug to the Tpetra developers.");
 
 1017                             prefix << 
"At this point, C is null. Please report this bug to the Tpetra developers.");
 
 1025  for (
int k = 0; 
k < 2; ++
k) {
 
 1026    typename crs_matrix_type::nonconst_global_inds_host_view_type 
Indices;
 
 1027    typename crs_matrix_type::nonconst_values_host_view_type 
Values;
 
 1035#ifdef HAVE_TPETRA_DEBUG 
 1037                               prefix << 
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
 
 1040#ifdef HAVE_TPETRA_DEBUG 
 1042                               prefix << 
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
 
 1048      size_t numEntries             = 
Mat[
k]->getNumEntriesInGlobalRow(
globalRow);
 
 1049      if (numEntries > 0) {
 
 1050        if (numEntries > 
Indices.extent(0)) {
 
 1051          Kokkos::resize(
Indices, numEntries);
 
 1052          Kokkos::resize(
Values, numEntries);
 
 1056        if (
scalar[
k] != STS::one()) {
 
 1057          for (
size_t j = 0; 
j < numEntries; ++
j) {
 
 1066                                     prefix << 
"sumIntoGlobalValues failed to add entries from A or B into C.");
 
 1075    C->fillComplete(
C->getDomainMap(),
 
 
 1094  std::string 
prefix = 
"TpetraExt::MatrixMatrix::Add(): ";
 
 1097                             prefix << 
"C must not be null");
 
 1099  Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> 
C_ = 
C;
 
 
 1105namespace MMdetails {
 
 1157template <
class Scalar,
 
 1159          class GlobalOrdinal,
 
 1161void mult_AT_B_newmatrix(
 
 1162    const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
 
 1163    const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B,
 
 1164    CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
 
 1165    const std::string& label,
 
 1166    const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
 1170  typedef LocalOrdinal LO;
 
 1171  typedef GlobalOrdinal GO;
 
 1173  typedef CrsMatrixStruct<SC, LO, GO, NO> crs_matrix_struct_type;
 
 1174  typedef RowMatrixTransposer<SC, LO, GO, NO> transposer_type;
 
 1181  transposer_type transposer(rcpFromRef(A), label + std::string(
"XP: "));
 
 1183  using Teuchos::ParameterList;
 
 1184  RCP<ParameterList> transposeParams(
new ParameterList);
 
 1185  transposeParams->set(
"sort", 
true);  
 
 1186  if (!params.is_null()) {
 
 1187    transposeParams->set(
"compute global constants",
 
 1188                         params->get(
"compute global constants: temporaries",
 
 1191  RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Atrans =
 
 1192      transposer.createTransposeLocal(transposeParams);
 
 1201  crs_matrix_struct_type Aview;
 
 1202  crs_matrix_struct_type Bview;
 
 1203  RCP<const Import<LO, GO, NO>> dummyImporter;
 
 1206  RCP<Teuchos::ParameterList> importParams1(
new ParameterList);
 
 1207  if (!params.is_null()) {
 
 1208    importParams1->set(
"compute global constants",
 
 1209                       params->get(
"compute global constants: temporaries",
 
 1211    auto slist             = params->sublist(
"matrixmatrix: kernel params", 
false);
 
 1212    bool isMM              = slist.get(
"isMatrixMatrix_TransferAndFillComplete", 
false);
 
 1213    bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck", 
false);
 
 1214    int mm_optimization_core_count =
 
 1216    mm_optimization_core_count =
 
 1217        slist.get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
 
 1218    int mm_optimization_core_count2 =
 
 1219        params->get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
 
 1220    if (mm_optimization_core_count2 < mm_optimization_core_count) {
 
 1221      mm_optimization_core_count = mm_optimization_core_count2;
 
 1223    auto& sip1 = importParams1->sublist(
"matrixmatrix: kernel params", 
false);
 
 1224    sip1.set(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
 
 1225    sip1.set(
"isMatrixMatrix_TransferAndFillComplete", isMM);
 
 1226    sip1.set(
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
 
 1229  MMdetails::import_and_extract_views(*Atrans, Atrans->getRowMap(),
 
 1230                                      Aview, dummyImporter, 
true,
 
 1231                                      label, importParams1);
 
 1233  RCP<ParameterList> importParams2(
new ParameterList);
 
 1234  if (!params.is_null()) {
 
 1235    importParams2->set(
"compute global constants",
 
 1236                       params->get(
"compute global constants: temporaries",
 
 1238    auto slist             = params->sublist(
"matrixmatrix: kernel params", 
false);
 
 1239    bool isMM              = slist.get(
"isMatrixMatrix_TransferAndFillComplete", 
false);
 
 1240    bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck", 
false);
 
 1241    int mm_optimization_core_count =
 
 1243    mm_optimization_core_count =
 
 1244        slist.get(
"MM_TAFC_OptimizationCoreCount",
 
 1245                  mm_optimization_core_count);
 
 1246    int mm_optimization_core_count2 =
 
 1247        params->get(
"MM_TAFC_OptimizationCoreCount",
 
 1248                    mm_optimization_core_count);
 
 1249    if (mm_optimization_core_count2 < mm_optimization_core_count) {
 
 1250      mm_optimization_core_count = mm_optimization_core_count2;
 
 1252    auto& sip2 = importParams2->sublist(
"matrixmatrix: kernel params", 
false);
 
 1253    sip2.set(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
 
 1254    sip2.set(
"isMatrixMatrix_TransferAndFillComplete", isMM);
 
 1255    sip2.set(
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
 
 1258  if (B.getRowMap()->isSameAs(*Atrans->getColMap())) {
 
 1259    MMdetails::import_and_extract_views(B, B.getRowMap(), Bview, dummyImporter, 
true, label, importParams2);
 
 1261    MMdetails::import_and_extract_views(B, Atrans->getColMap(), Bview, dummyImporter, 
false, label, importParams2);
 
 1267  RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Ctemp;
 
 1270  bool needs_final_export = !Atrans->getGraph()->getExporter().is_null();
 
 1271  if (needs_final_export) {
 
 1274    Ctemp = rcp(&C, 
false);
 
 1277  mult_A_B_newmatrix(Aview, Bview, *Ctemp, label, params);
 
 1285  RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Crcp(&C, 
false);
 
 1287  if (needs_final_export) {
 
 1288    ParameterList labelList;
 
 1289    labelList.set(
"Timer Label", label);
 
 1290    if (!params.is_null()) {
 
 1291      ParameterList& params_sublist    = params->sublist(
"matrixmatrix: kernel params", 
false);
 
 1292      ParameterList& labelList_subList = labelList.sublist(
"matrixmatrix: kernel params", 
false);
 
 1294      mm_optimization_core_count       = params_sublist.get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
 
 1295      int mm_optimization_core_count2  = params->get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
 
 1296      if (mm_optimization_core_count2 < mm_optimization_core_count) mm_optimization_core_count = mm_optimization_core_count2;
 
 1297      labelList_subList.set(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count, 
"Core Count above which the optimized neighbor discovery is used");
 
 1298      bool isMM              = params_sublist.get(
"isMatrixMatrix_TransferAndFillComplete", 
false);
 
 1299      bool overrideAllreduce = params_sublist.get(
"MM_TAFC_OverrideAllreduceCheck", 
false);
 
 1301      labelList_subList.set(
"isMatrixMatrix_TransferAndFillComplete", isMM,
 
 1302                            "This parameter should be set to true only for MatrixMatrix operations: the optimization in Epetra that was ported to Tpetra does _not_ take into account the possibility that for any given source PID, a particular GID may not exist on the target PID: i.e. a transfer operation. A fix for this general case is in development.");
 
 1303      labelList.set(
"compute global constants", params->get(
"compute global constants", 
true));
 
 1304      labelList.set(
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
 
 1306    Ctemp->exportAndFillComplete(Crcp,
 
 1307                                 *Ctemp->getGraph()->getExporter(),
 
 1310                                 rcp(&labelList, 
false));
 
 1312#ifdef HAVE_TPETRA_MMM_STATISTICS 
 1313  printMultiplicationStatistics(Ctemp->getGraph()->getExporter(), label + std::string(
" AT_B MMM"));
 
 1319template <
class Scalar,
 
 1321          class GlobalOrdinal,
 
 1324    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
 
 1325    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
 
 1326    CrsWrapper<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
 
 1327    const std::string& ,
 
 1328    const Teuchos::RCP<Teuchos::ParameterList>& ) {
 
 1329  using Teuchos::Array;
 
 1330  using Teuchos::ArrayRCP;
 
 1331  using Teuchos::ArrayView;
 
 1332  using Teuchos::null;
 
 1333  using Teuchos::OrdinalTraits;
 
 1335  typedef Teuchos::ScalarTraits<Scalar> STS;
 
 1337  LocalOrdinal C_firstCol = Bview.
colMap->getMinLocalIndex();
 
 1338  LocalOrdinal C_lastCol  = Bview.colMap->getMaxLocalIndex();
 
 1340  LocalOrdinal C_firstCol_import = OrdinalTraits<LocalOrdinal>::zero();
 
 1341  LocalOrdinal C_lastCol_import  = OrdinalTraits<LocalOrdinal>::invalid();
 
 1343  ArrayView<const GlobalOrdinal> bcols        = Bview.colMap->getLocalElementList();
 
 1344  ArrayView<const GlobalOrdinal> bcols_import = null;
 
 1345  if (Bview.importColMap != null) {
 
 1346    C_firstCol_import = Bview.importColMap->getMinLocalIndex();
 
 1347    C_lastCol_import  = Bview.importColMap->getMaxLocalIndex();
 
 1349    bcols_import = Bview.importColMap->getLocalElementList();
 
 1352  size_t C_numCols = C_lastCol - C_firstCol +
 
 1353                     OrdinalTraits<LocalOrdinal>::one();
 
 1354  size_t C_numCols_import = C_lastCol_import - C_firstCol_import +
 
 1355                            OrdinalTraits<LocalOrdinal>::one();
 
 1357  if (C_numCols_import > C_numCols)
 
 1358    C_numCols = C_numCols_import;
 
 1360  Array<Scalar> dwork        = Array<Scalar>(C_numCols);
 
 1361  Array<GlobalOrdinal> iwork = Array<GlobalOrdinal>(C_numCols);
 
 1362  Array<size_t> iwork2       = Array<size_t>(C_numCols);
 
 1364  Array<Scalar> C_row_i               = dwork;
 
 1365  Array<GlobalOrdinal> C_cols         = iwork;
 
 1366  Array<size_t> c_index               = iwork2;
 
 1367  Array<GlobalOrdinal> combined_index = Array<GlobalOrdinal>(2 * C_numCols);
 
 1368  Array<Scalar> combined_values       = Array<Scalar>(2 * C_numCols);
 
 1370  size_t C_row_i_length, j, k, last_index;
 
 1373  LocalOrdinal LO_INVALID = OrdinalTraits<LocalOrdinal>::invalid();
 
 1374  Array<LocalOrdinal> Acol2Brow(Aview.colMap->getLocalNumElements(), LO_INVALID);
 
 1375  Array<LocalOrdinal> Acol2Irow(Aview.colMap->getLocalNumElements(), LO_INVALID);
 
 1376  if (Aview.colMap->isSameAs(*Bview.origMatrix->getRowMap())) {
 
 1378    for (LocalOrdinal i = Aview.colMap->getMinLocalIndex(); i <=
 
 1379                                                            Aview.colMap->getMaxLocalIndex();
 
 1384    for (LocalOrdinal i = Aview.colMap->getMinLocalIndex(); i <=
 
 1385                                                            Aview.colMap->getMaxLocalIndex();
 
 1387      GlobalOrdinal GID = Aview.colMap->getGlobalElement(i);
 
 1388      LocalOrdinal BLID = Bview.origMatrix->getRowMap()->getLocalElement(GID);
 
 1389      if (BLID != LO_INVALID)
 
 1390        Acol2Brow[i] = BLID;
 
 1392        Acol2Irow[i] = Bview.importMatrix->getRowMap()->getLocalElement(GID);
 
 1402  auto Arowptr = Aview.origMatrix->getLocalRowPtrsHost();
 
 1403  auto Acolind = Aview.origMatrix->getLocalIndicesHost();
 
 1404  auto Avals   = Aview.origMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
 
 1405  auto Browptr = Bview.origMatrix->getLocalRowPtrsHost();
 
 1406  auto Bcolind = Bview.origMatrix->getLocalIndicesHost();
 
 1407  auto Bvals   = Bview.origMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
 
 1408  decltype(Browptr) Irowptr;
 
 1409  decltype(Bcolind) Icolind;
 
 1410  decltype(Bvals) Ivals;
 
 1411  if (!Bview.importMatrix.is_null()) {
 
 1412    Irowptr = Bview.importMatrix->getLocalRowPtrsHost();
 
 1413    Icolind = Bview.importMatrix->getLocalIndicesHost();
 
 1414    Ivals   = Bview.importMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
 
 1417  bool C_filled = C.isFillComplete();
 
 1419  for (
size_t i = 0; i < C_numCols; i++)
 
 1420    c_index[i] = OrdinalTraits<size_t>::invalid();
 
 1423  size_t Arows = Aview.rowMap->getLocalNumElements();
 
 1424  for (
size_t i = 0; i < Arows; ++i) {
 
 1427    GlobalOrdinal global_row = Aview.rowMap->getGlobalElement(i);
 
 1433    C_row_i_length = OrdinalTraits<size_t>::zero();
 
 1435    for (k = Arowptr[i]; k < Arowptr[i + 1]; ++k) {
 
 1436      LocalOrdinal Ak   = Acol2Brow[Acolind[k]];
 
 1437      const Scalar Aval = Avals[k];
 
 1438      if (Aval == STS::zero())
 
 1441      if (Ak == LO_INVALID)
 
 1444      for (j = Browptr[Ak]; j < Browptr[Ak + 1]; ++j) {
 
 1445        LocalOrdinal col = Bcolind[j];
 
 1448        if (c_index[col] == OrdinalTraits<size_t>::invalid()) {
 
 1451          C_row_i[C_row_i_length] = Aval * Bvals[j];
 
 1452          C_cols[C_row_i_length]  = col;
 
 1453          c_index[col]            = C_row_i_length;
 
 1458          C_row_i[c_index[col]] += Aval * 
static_cast<Scalar
>(Bvals[j]);
 
 1463    for (
size_t ii = 0; ii < C_row_i_length; ii++) {
 
 1464      c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
 
 1465      C_cols[ii]          = bcols[C_cols[ii]];
 
 1466      combined_index[ii]  = C_cols[ii];
 
 1467      combined_values[ii] = C_row_i[ii];
 
 1469    last_index = C_row_i_length;
 
 1475    C_row_i_length = OrdinalTraits<size_t>::zero();
 
 1477    for (k = Arowptr[i]; k < Arowptr[i + 1]; ++k) {
 
 1478      LocalOrdinal Ak   = Acol2Brow[Acolind[k]];
 
 1479      const Scalar Aval = Avals[k];
 
 1480      if (Aval == STS::zero())
 
 1483      if (Ak != LO_INVALID) 
continue;
 
 1485      Ak = Acol2Irow[Acolind[k]];
 
 1486      for (j = Irowptr[Ak]; j < Irowptr[Ak + 1]; ++j) {
 
 1487        LocalOrdinal col = Icolind[j];
 
 1490        if (c_index[col] == OrdinalTraits<size_t>::invalid()) {
 
 1493          C_row_i[C_row_i_length] = Aval * Ivals[j];
 
 1494          C_cols[C_row_i_length]  = col;
 
 1495          c_index[col]            = C_row_i_length;
 
 1501          C_row_i[c_index[col]] += Aval * 
static_cast<Scalar
>(Ivals[j]);
 
 1506    for (
size_t ii = 0; ii < C_row_i_length; ii++) {
 
 1507      c_index[C_cols[ii]]         = OrdinalTraits<size_t>::invalid();
 
 1508      C_cols[ii]                  = bcols_import[C_cols[ii]];
 
 1509      combined_index[last_index]  = C_cols[ii];
 
 1510      combined_values[last_index] = C_row_i[ii];
 
 1516    C_filled ? C.sumIntoGlobalValues(
 
 1518                   combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
 
 1519                   combined_values.view(OrdinalTraits<size_t>::zero(), last_index))
 
 1520             : C.insertGlobalValues(
 
 1522                   combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
 
 1523                   combined_values.view(OrdinalTraits<size_t>::zero(), last_index));
 
 1528template <
class Scalar,
 
 1530          class GlobalOrdinal,
 
 1532void setMaxNumEntriesPerRow(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview) {
 
 1533  typedef typename Teuchos::Array<Teuchos::ArrayView<const LocalOrdinal>>::size_type local_length_size;
 
 1534  Mview.maxNumRowEntries = Teuchos::OrdinalTraits<local_length_size>::zero();
 
 1536  if (Mview.indices.size() > Teuchos::OrdinalTraits<local_length_size>::zero()) {
 
 1537    Mview.maxNumRowEntries = Mview.indices[0].size();
 
 1539    for (local_length_size i = 1; i < Mview.indices.size(); ++i)
 
 1540      if (Mview.indices[i].size() > Mview.maxNumRowEntries)
 
 1541        Mview.maxNumRowEntries = Mview.indices[i].size();
 
 1546template <
class CrsMatrixType>
 
 1547size_t C_estimate_nnz(CrsMatrixType& A, CrsMatrixType& B) {
 
 1549  size_t Aest = 100, Best = 100;
 
 1550  if (A.getLocalNumEntries() >= A.getLocalNumRows())
 
 1551    Aest = (A.getLocalNumRows() > 0) ? A.getLocalNumEntries() / A.getLocalNumRows() : 100;
 
 1552  if (B.getLocalNumEntries() >= B.getLocalNumRows())
 
 1553    Best = (B.getLocalNumRows() > 0) ? B.getLocalNumEntries() / B.getLocalNumRows() : 100;
 
 1555  size_t nnzperrow = (size_t)(sqrt((
double)Aest) + sqrt((
double)Best) - 1);
 
 1556  nnzperrow *= nnzperrow;
 
 1558  return (
size_t)(A.getLocalNumRows() * nnzperrow * 0.75 + 100);
 
 1567template <
class Scalar,
 
 1569          class GlobalOrdinal,
 
 1571void mult_A_B_newmatrix(
 
 1572    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
 
 1573    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
 
 1574    CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
 
 1575    const std::string& label,
 
 1576    const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
 1577  using Teuchos::Array;
 
 1578  using Teuchos::ArrayRCP;
 
 1579  using Teuchos::ArrayView;
 
 1584  typedef LocalOrdinal LO;
 
 1585  typedef GlobalOrdinal GO;
 
 1587  typedef Import<LO, GO, NO> import_type;
 
 1588  typedef Map<LO, GO, NO> map_type;
 
 1591  typedef typename map_type::local_map_type local_map_type;
 
 1593  typedef typename KCRS::StaticCrsGraphType graph_t;
 
 1594  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
 
 1595  typedef typename NO::execution_space execution_space;
 
 1596  typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
 
 1597  typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
 
 1601  LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
 
 1604  RCP<const import_type> Cimport;
 
 1605  RCP<const map_type> Ccolmap;
 
 1606  RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
 
 1607  RCP<const import_type> Iimport = Bview.importMatrix.is_null() ? Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
 
 1608  local_map_type Acolmap_local   = Aview.colMap->getLocalMap();
 
 1609  local_map_type Browmap_local   = Bview.origMatrix->getRowMap()->getLocalMap();
 
 1610  local_map_type Irowmap_local;
 
 1611  if (!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
 
 1612  local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
 
 1613  local_map_type Icolmap_local;
 
 1614  if (!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
 
 1621  lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"), Bview.colMap->getLocalNumElements()), Icol2Ccol;
 
 1623  if (Bview.importMatrix.is_null()) {
 
 1626    Ccolmap             = Bview.colMap;
 
 1627    const LO colMapSize = 
static_cast<LO
>(Bview.colMap->getLocalNumElements());
 
 1629    Kokkos::parallel_for(
 
 1630        "Tpetra::mult_A_B_newmatrix::Bcol2Ccol_fill",
 
 1631        Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
 
 1632        KOKKOS_LAMBDA(
const LO i) {
 
 1644    if (!Bimport.is_null() && !Iimport.is_null()) {
 
 1645      Cimport = Bimport->setUnion(*Iimport);
 
 1646    } 
else if (!Bimport.is_null() && Iimport.is_null()) {
 
 1647      Cimport = Bimport->setUnion();
 
 1648    } 
else if (Bimport.is_null() && !Iimport.is_null()) {
 
 1649      Cimport = Iimport->setUnion();
 
 1651      throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
 
 1653    Ccolmap = Cimport->getTargetMap();
 
 1658    TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
 
 1659                               std::runtime_error, 
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
 
 1666    Kokkos::resize(Icol2Ccol, Bview.importMatrix->getColMap()->getLocalNumElements());
 
 1667    local_map_type Ccolmap_local = Ccolmap->getLocalMap();
 
 1668    Kokkos::parallel_for(
 
 1669        "Tpetra::mult_A_B_newmatrix::Bcol2Ccol_getGlobalElement", range_type(0, Bview.origMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
 
 1670          Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
 
 1672    Kokkos::parallel_for(
 
 1673        "Tpetra::mult_A_B_newmatrix::Icol2Ccol_getGlobalElement", range_type(0, Bview.importMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
 
 1674          Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
 
 1682  C.replaceColMap(Ccolmap);
 
 1700  lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"), Aview.colMap->getLocalNumElements());
 
 1701  lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"), Aview.colMap->getLocalNumElements());
 
 1703  Kokkos::parallel_for(
 
 1704      "Tpetra::mult_A_B_newmatrix::construct_tables", range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex() + 1), KOKKOS_LAMBDA(
const LO i) {
 
 1705        GO aidx  = Acolmap_local.getGlobalElement(i);
 
 1706        LO B_LID = Browmap_local.getLocalElement(aidx);
 
 1707        if (B_LID != LO_INVALID) {
 
 1708          targetMapToOrigRow(i)   = B_LID;
 
 1709          targetMapToImportRow(i) = LO_INVALID;
 
 1711          LO I_LID                = Irowmap_local.getLocalElement(aidx);
 
 1712          targetMapToOrigRow(i)   = LO_INVALID;
 
 1713          targetMapToImportRow(i) = I_LID;
 
 1719  KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Node, lo_view_t>::mult_A_B_newmatrix_kernel_wrapper(Aview, Bview, targetMapToOrigRow, targetMapToImportRow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
 
 1724template <
class Scalar,
 
 1726          class GlobalOrdinal,
 
 1728void mult_A_B_newmatrix(BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
 
 1729                        BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
 
 1730                        Teuchos::RCP<BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& C) {
 
 1731  using Teuchos::Array;
 
 1732  using Teuchos::ArrayRCP;
 
 1733  using Teuchos::ArrayView;
 
 1734  using Teuchos::null;
 
 1739  typedef LocalOrdinal LO;
 
 1740  typedef GlobalOrdinal GO;
 
 1742  typedef Import<LO, GO, NO> import_type;
 
 1743  typedef Map<LO, GO, NO> map_type;
 
 1744  typedef BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> block_crs_matrix_type;
 
 1745  typedef typename block_crs_matrix_type::crs_graph_type graph_t;
 
 1748  typedef typename map_type::local_map_type local_map_type;
 
 1749  typedef typename block_crs_matrix_type::local_matrix_device_type KBSR;
 
 1750  typedef typename KBSR::device_type device_t;
 
 1751  typedef typename KBSR::StaticCrsGraphType static_graph_t;
 
 1752  typedef typename static_graph_t::row_map_type::non_const_type lno_view_t;
 
 1753  typedef typename static_graph_t::entries_type::non_const_type lno_nnz_view_t;
 
 1754  typedef typename KBSR::values_type::non_const_type scalar_view_t;
 
 1755  typedef typename NO::execution_space execution_space;
 
 1756  typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
 
 1757  typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
 
 1759  LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
 
 1762  RCP<const import_type> Cimport;
 
 1763  RCP<const map_type> Ccolmap;
 
 1764  RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
 
 1765  RCP<const import_type> Iimport = Bview.importMatrix.is_null() ? Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
 
 1766  local_map_type Acolmap_local   = Aview.colMap->getLocalMap();
 
 1767  local_map_type Browmap_local   = Bview.origMatrix->getRowMap()->getLocalMap();
 
 1768  local_map_type Irowmap_local;
 
 1769  if (!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
 
 1770  local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
 
 1771  local_map_type Icolmap_local;
 
 1772  if (!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
 
 1779  lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"), Bview.colMap->getLocalNumElements()), Icol2Ccol;
 
 1781  if (Bview.importMatrix.is_null()) {
 
 1784    Ccolmap             = Bview.colMap;
 
 1785    const LO colMapSize = 
static_cast<LO
>(Bview.colMap->getLocalNumElements());
 
 1787    Kokkos::parallel_for(
 
 1788        "Tpetra::mult_A_B_newmatrix::Bcol2Ccol_fill",
 
 1789        Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
 
 1790        KOKKOS_LAMBDA(
const LO i) {
 
 1802    if (!Bimport.is_null() && !Iimport.is_null()) {
 
 1803      Cimport = Bimport->setUnion(*Iimport);
 
 1804    } 
else if (!Bimport.is_null() && Iimport.is_null()) {
 
 1805      Cimport = Bimport->setUnion();
 
 1806    } 
else if (Bimport.is_null() && !Iimport.is_null()) {
 
 1807      Cimport = Iimport->setUnion();
 
 1809      throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
 
 1811    Ccolmap = Cimport->getTargetMap();
 
 1818    Kokkos::resize(Icol2Ccol, Bview.importMatrix->getColMap()->getLocalNumElements());
 
 1819    local_map_type Ccolmap_local = Ccolmap->getLocalMap();
 
 1820    Kokkos::parallel_for(
 
 1821        "Tpetra::mult_A_B_newmatrix::Bcol2Ccol_getGlobalElement",
 
 1822        range_type(0, Bview.origMatrix->getColMap()->getLocalNumElements()),
 
 1823        KOKKOS_LAMBDA(
const LO i) {
 
 1824          Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
 
 1826    Kokkos::parallel_for(
 
 1827        "Tpetra::mult_A_B_newmatrix::Icol2Ccol_getGlobalElement",
 
 1828        range_type(0, Bview.importMatrix->getColMap()->getLocalNumElements()),
 
 1829        KOKKOS_LAMBDA(
const LO i) {
 
 1830          Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
 
 1850  lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),
 
 1851                               Aview.colMap->getLocalNumElements());
 
 1852  lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),
 
 1853                                 Aview.colMap->getLocalNumElements());
 
 1855  Kokkos::parallel_for(
 
 1856      "Tpetra::mult_A_B_newmatrix::construct_tables",
 
 1857      range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex() + 1),
 
 1858      KOKKOS_LAMBDA(
const LO i) {
 
 1859        GO aidx  = Acolmap_local.getGlobalElement(i);
 
 1860        LO B_LID = Browmap_local.getLocalElement(aidx);
 
 1861        if (B_LID != LO_INVALID) {
 
 1862          targetMapToOrigRow(i)   = B_LID;
 
 1863          targetMapToImportRow(i) = LO_INVALID;
 
 1865          LO I_LID                = Irowmap_local.getLocalElement(aidx);
 
 1866          targetMapToOrigRow(i)   = LO_INVALID;
 
 1867          targetMapToImportRow(i) = I_LID;
 
 1872  using KernelHandle =
 
 1873      KokkosKernels::Experimental::KokkosKernelsHandle<
typename lno_view_t::const_value_type,
 
 1874                                                       typename lno_nnz_view_t::const_value_type,
 
 1875                                                       typename scalar_view_t::const_value_type,
 
 1876                                                       typename device_t::execution_space,
 
 1877                                                       typename device_t::memory_space,
 
 1878                                                       typename device_t::memory_space>;
 
 1879  int team_work_size = 16;  
 
 1880  std::string myalg(
"SPGEMM_KK_MEMORY");
 
 1881  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
 
 1884  kh.create_spgemm_handle(alg_enum);
 
 1885  kh.set_team_work_size(team_work_size);
 
 1888  const KBSR Amat    = Aview.origMatrix->getLocalMatrixDevice();
 
 1889  const KBSR Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview,
 
 1890                                                         targetMapToOrigRow, targetMapToImportRow,
 
 1891                                                         Bcol2Ccol, Icol2Ccol,
 
 1892                                                         Ccolmap.getConst()->getLocalNumElements());
 
 1894  RCP<graph_t> graphC;
 
 1895  typename KBSR::values_type values;
 
 1900    KokkosSparse::block_spgemm_symbolic(kh, Amat, 
false, Bmerged, 
false, Cmat);
 
 1901    KokkosSparse::block_spgemm_numeric(kh, Amat, 
false, Bmerged, 
false, Cmat);
 
 1902    kh.destroy_spgemm_handle();
 
 1905    graphC = rcp(
new graph_t(Cmat.graph, Aview.origMatrix->getRowMap(), Ccolmap.getConst()));
 
 1906    values = Cmat.values;
 
 1908  C = rcp(
new block_crs_matrix_type(*graphC, values, Aview.blocksize));
 
 1913template <
class Scalar,
 
 1915          class GlobalOrdinal,
 
 1917          class LocalOrdinalViewType>
 
 1918void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
 
 1919                                                                                                                        CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
 
 1920                                                                                                                        const LocalOrdinalViewType& targetMapToOrigRow,
 
 1921                                                                                                                        const LocalOrdinalViewType& targetMapToImportRow,
 
 1922                                                                                                                        const LocalOrdinalViewType& Bcol2Ccol,
 
 1923                                                                                                                        const LocalOrdinalViewType& Icol2Ccol,
 
 1924                                                                                                                        CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
 
 1925                                                                                                                        Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> Cimport,
 
 1926                                                                                                                        const std::string& label,
 
 1927                                                                                                                        const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
 1928  using Teuchos::Array;
 
 1929  using Teuchos::ArrayRCP;
 
 1930  using Teuchos::ArrayView;
 
 1937  typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
 
 1938  typedef typename KCRS::StaticCrsGraphType graph_t;
 
 1939  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
 
 1940  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
 
 1941  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
 
 1942  typedef typename KCRS::values_type::non_const_type scalar_view_t;
 
 1945  typedef LocalOrdinal LO;
 
 1946  typedef GlobalOrdinal GO;
 
 1948  typedef Map<LO, GO, NO> map_type;
 
 1949  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
 
 1950  const LO LO_INVALID     = Teuchos::OrdinalTraits<LO>::invalid();
 
 1951  const SC SC_ZERO        = Teuchos::ScalarTraits<Scalar>::zero();
 
 1954  RCP<const map_type> Ccolmap = C.getColMap();
 
 1955  size_t m                    = Aview.origMatrix->getLocalNumRows();
 
 1956  size_t n                    = Ccolmap->getLocalNumElements();
 
 1957  size_t b_max_nnz_per_row    = Bview.origMatrix->getLocalMaxNumRowEntries();
 
 1960  const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
 
 1961  const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
 
 1963  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
 
 1964  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
 
 1965  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
 
 1967  c_lno_view_t Irowptr;
 
 1968  lno_nnz_view_t Icolind;
 
 1969  scalar_view_t Ivals;
 
 1970  if (!Bview.importMatrix.is_null()) {
 
 1971    auto lclB         = Bview.importMatrix->getLocalMatrixHost();
 
 1972    Irowptr           = lclB.graph.row_map;
 
 1973    Icolind           = lclB.graph.entries;
 
 1974    Ivals             = lclB.values;
 
 1975    b_max_nnz_per_row = std::max(b_max_nnz_per_row, Bview.importMatrix->getLocalMaxNumRowEntries());
 
 1985  size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
 
 1986  lno_view_t Crowptr(Kokkos::ViewAllocateWithoutInitializing(
"Crowptr"), m + 1);
 
 1987  lno_nnz_view_t Ccolind(Kokkos::ViewAllocateWithoutInitializing(
"Ccolind"), CSR_alloc);
 
 1988  scalar_view_t Cvals(Kokkos::ViewAllocateWithoutInitializing(
"Cvals"), CSR_alloc);
 
 1998  size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
 
 1999  std::vector<size_t> c_status(n, ST_INVALID);
 
 2009  size_t CSR_ip = 0, OLD_ip = 0;
 
 2010  for (
size_t i = 0; i < m; i++) {
 
 2013    Crowptr[i] = CSR_ip;
 
 2016    for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
 
 2017      LO Aik        = Acolind[k];  
 
 2018      const SC Aval = Avals[k];    
 
 2019      if (Aval == SC_ZERO)
 
 2022      if (targetMapToOrigRow[Aik] != LO_INVALID) {
 
 2029        size_t Bk = 
static_cast<size_t>(targetMapToOrigRow[Aik]);
 
 2032        for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
 
 2033          LO Bkj = Bcolind[j];
 
 2034          LO Cij = Bcol2Ccol[Bkj];
 
 2036          if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
 
 2038            c_status[Cij]   = CSR_ip;
 
 2039            Ccolind[CSR_ip] = Cij;
 
 2040            Cvals[CSR_ip]   = Aval * Bvals[j];
 
 2044            Cvals[c_status[Cij]] += Aval * Bvals[j];
 
 2055        size_t Ik = 
static_cast<size_t>(targetMapToImportRow[Aik]);
 
 2056        for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
 
 2057          LO Ikj = Icolind[j];
 
 2058          LO Cij = Icol2Ccol[Ikj];
 
 2060          if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
 
 2062            c_status[Cij]   = CSR_ip;
 
 2063            Ccolind[CSR_ip] = Cij;
 
 2064            Cvals[CSR_ip]   = Aval * Ivals[j];
 
 2067            Cvals[c_status[Cij]] += Aval * Ivals[j];
 
 2074    if (i + 1 < m && CSR_ip + std::min(n, (Arowptr[i + 2] - Arowptr[i + 1]) * b_max_nnz_per_row) > CSR_alloc) {
 
 2076      Kokkos::resize(Ccolind, CSR_alloc);
 
 2077      Kokkos::resize(Cvals, CSR_alloc);
 
 2082  Crowptr[m] = CSR_ip;
 
 2085  Kokkos::resize(Ccolind, CSR_ip);
 
 2086  Kokkos::resize(Cvals, CSR_ip);
 
 2092    if (params.is_null() || params->get(
"sort entries", 
true))
 
 2093      Import_Util::sortCrsEntries(Crowptr, Ccolind, Cvals);
 
 2094    C.setAllValues(Crowptr, Ccolind, Cvals);
 
 2107    RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
 
 2108    labelList->set(
"Timer Label", label);
 
 2109    if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants", 
true));
 
 2110    RCP<const Export<LO, GO, NO>> dummyExport;
 
 2111    C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
 
 2116template <
class Scalar,
 
 2118          class GlobalOrdinal,
 
 2121    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
 
 2122    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
 
 2123    CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
 
 2124    const std::string& label,
 
 2125    const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
 2126  using Teuchos::Array;
 
 2127  using Teuchos::ArrayRCP;
 
 2128  using Teuchos::ArrayView;
 
 2133  typedef LocalOrdinal LO;
 
 2134  typedef GlobalOrdinal GO;
 
 2136  typedef Import<LO, GO, NO> import_type;
 
 2137  typedef Map<LO, GO, NO> map_type;
 
 2140  typedef typename map_type::local_map_type local_map_type;
 
 2142  typedef typename KCRS::StaticCrsGraphType graph_t;
 
 2143  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
 
 2144  typedef typename NO::execution_space execution_space;
 
 2145  typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
 
 2146  typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
 
 2151  LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
 
 2154  RCP<const import_type> Cimport = C.getGraph()->getImporter();
 
 2155  RCP<const map_type> Ccolmap    = C.getColMap();
 
 2156  local_map_type Acolmap_local   = Aview.colMap->getLocalMap();
 
 2157  local_map_type Browmap_local   = Bview.origMatrix->getRowMap()->getLocalMap();
 
 2158  local_map_type Irowmap_local;
 
 2159  if (!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
 
 2160  local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
 
 2161  local_map_type Icolmap_local;
 
 2162  if (!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
 
 2163  local_map_type Ccolmap_local = Ccolmap->getLocalMap();
 
 2166  lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"), Bview.colMap->getLocalNumElements()), Icol2Ccol;
 
 2170    Kokkos::parallel_for(
 
 2171        range_type(0, Bview.origMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
 
 2172          Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
 
 2175    if (!Bview.importMatrix.is_null()) {
 
 2176      TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
 
 2177                                 std::runtime_error, 
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
 
 2179      Kokkos::resize(Icol2Ccol, Bview.importMatrix->getColMap()->getLocalNumElements());
 
 2180      Kokkos::parallel_for(
 
 2181          range_type(0, Bview.importMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
 
 2182            Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
 
 2188  lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"), Aview.colMap->getLocalNumElements());
 
 2189  lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"), Aview.colMap->getLocalNumElements());
 
 2190  Kokkos::parallel_for(
 
 2191      range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex() + 1), KOKKOS_LAMBDA(
const LO i) {
 
 2192        GO aidx  = Acolmap_local.getGlobalElement(i);
 
 2193        LO B_LID = Browmap_local.getLocalElement(aidx);
 
 2194        if (B_LID != LO_INVALID) {
 
 2195          targetMapToOrigRow(i)   = B_LID;
 
 2196          targetMapToImportRow(i) = LO_INVALID;
 
 2198          LO I_LID                = Irowmap_local.getLocalElement(aidx);
 
 2199          targetMapToOrigRow(i)   = LO_INVALID;
 
 2200          targetMapToImportRow(i) = I_LID;
 
 2206  KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Node, lo_view_t>::mult_A_B_reuse_kernel_wrapper(Aview, Bview, targetMapToOrigRow, targetMapToImportRow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
 
 2210template <
class Scalar,
 
 2212          class GlobalOrdinal,
 
 2214          class LocalOrdinalViewType>
 
 2215void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
 
 2216                                                                                                                    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
 
 2217                                                                                                                    const LocalOrdinalViewType& targetMapToOrigRow,
 
 2218                                                                                                                    const LocalOrdinalViewType& targetMapToImportRow,
 
 2219                                                                                                                    const LocalOrdinalViewType& Bcol2Ccol,
 
 2220                                                                                                                    const LocalOrdinalViewType& Icol2Ccol,
 
 2221                                                                                                                    CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
 
 2222                                                                                                                    Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> ,
 
 2223                                                                                                                    const std::string& label,
 
 2224                                                                                                                    const Teuchos::RCP<Teuchos::ParameterList>& ) {
 
 2229  typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
 
 2230  typedef typename KCRS::StaticCrsGraphType graph_t;
 
 2231  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
 
 2232  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
 
 2233  typedef typename KCRS::values_type::non_const_type scalar_view_t;
 
 2236  typedef LocalOrdinal LO;
 
 2237  typedef GlobalOrdinal GO;
 
 2239  typedef Map<LO, GO, NO> map_type;
 
 2240  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
 
 2241  const LO LO_INVALID     = Teuchos::OrdinalTraits<LO>::invalid();
 
 2242  const SC SC_ZERO        = Teuchos::ScalarTraits<Scalar>::zero();
 
 2248  RCP<const map_type> Ccolmap = C.getColMap();
 
 2249  size_t m                    = Aview.origMatrix->getLocalNumRows();
 
 2250  size_t n                    = Ccolmap->getLocalNumElements();
 
 2253  const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
 
 2254  const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
 
 2255  const KCRS& Cmat = C.getLocalMatrixHost();
 
 2257  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
 
 2258  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
 
 2259  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
 
 2260  scalar_view_t Cvals = Cmat.values;
 
 2262  c_lno_view_t Irowptr;
 
 2263  lno_nnz_view_t Icolind;
 
 2264  scalar_view_t Ivals;
 
 2265  if (!Bview.importMatrix.is_null()) {
 
 2266    auto lclB = Bview.importMatrix->getLocalMatrixHost();
 
 2267    Irowptr   = lclB.graph.row_map;
 
 2268    Icolind   = lclB.graph.entries;
 
 2269    Ivals     = lclB.values;
 
 2281  std::vector<size_t> c_status(n, ST_INVALID);
 
 2284  size_t CSR_ip = 0, OLD_ip = 0;
 
 2285  for (
size_t i = 0; i < m; i++) {
 
 2288    OLD_ip = Crowptr[i];
 
 2289    CSR_ip = Crowptr[i + 1];
 
 2290    for (
size_t k = OLD_ip; k < CSR_ip; k++) {
 
 2291      c_status[Ccolind[k]] = k;
 
 2297    for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
 
 2298      LO Aik        = Acolind[k];
 
 2299      const SC Aval = Avals[k];
 
 2300      if (Aval == SC_ZERO)
 
 2303      if (targetMapToOrigRow[Aik] != LO_INVALID) {
 
 2305        size_t Bk = 
static_cast<size_t>(targetMapToOrigRow[Aik]);
 
 2307        for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
 
 2308          LO Bkj = Bcolind[j];
 
 2309          LO Cij = Bcol2Ccol[Bkj];
 
 2311          TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
 
 2312                                     std::runtime_error, 
"Trying to insert a new entry (" << i << 
"," << Cij << 
") into a static graph " 
 2313                                                                                          << 
"(c_status = " << c_status[Cij] << 
" of [" << OLD_ip << 
"," << CSR_ip << 
"))");
 
 2315          Cvals[c_status[Cij]] += Aval * Bvals[j];
 
 2320        size_t Ik = 
static_cast<size_t>(targetMapToImportRow[Aik]);
 
 2321        for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
 
 2322          LO Ikj = Icolind[j];
 
 2323          LO Cij = Icol2Ccol[Ikj];
 
 2325          TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
 
 2326                                     std::runtime_error, 
"Trying to insert a new entry (" << i << 
"," << Cij << 
") into a static graph " 
 2327                                                                                          << 
"(c_status = " << c_status[Cij] << 
" of [" << OLD_ip << 
"," << CSR_ip << 
"))");
 
 2329          Cvals[c_status[Cij]] += Aval * Ivals[j];
 
 2337    C.fillComplete(C.getDomainMap(), C.getRangeMap());
 
 2343template <
class Scalar,
 
 2345          class GlobalOrdinal,
 
 2347void jacobi_A_B_newmatrix(
 
 2349    const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Dinv,
 
 2350    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
 
 2351    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
 
 2352    CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
 
 2353    const std::string& label,
 
 2354    const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
 2355  using Teuchos::Array;
 
 2356  using Teuchos::ArrayRCP;
 
 2357  using Teuchos::ArrayView;
 
 2361  typedef LocalOrdinal LO;
 
 2362  typedef GlobalOrdinal GO;
 
 2365  typedef Import<LO, GO, NO> import_type;
 
 2366  typedef Map<LO, GO, NO> map_type;
 
 2367  typedef typename map_type::local_map_type local_map_type;
 
 2371  typedef typename KCRS::StaticCrsGraphType graph_t;
 
 2372  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
 
 2373  typedef typename NO::execution_space execution_space;
 
 2374  typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
 
 2375  typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
 
 2379  LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
 
 2382  RCP<const import_type> Cimport;
 
 2383  RCP<const map_type> Ccolmap;
 
 2384  RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
 
 2385  RCP<const import_type> Iimport = Bview.importMatrix.is_null() ? Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
 
 2386  local_map_type Acolmap_local   = Aview.colMap->getLocalMap();
 
 2387  local_map_type Browmap_local   = Bview.origMatrix->getRowMap()->getLocalMap();
 
 2388  local_map_type Irowmap_local;
 
 2389  if (!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
 
 2390  local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
 
 2391  local_map_type Icolmap_local;
 
 2392  if (!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
 
 2399  lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"), Bview.colMap->getLocalNumElements()), Icol2Ccol;
 
 2401  if (Bview.importMatrix.is_null()) {
 
 2404    Ccolmap = Bview.colMap;
 
 2408    Kokkos::RangePolicy<execution_space, LO> range(0, 
static_cast<LO
>(Bview.colMap->getLocalNumElements()));
 
 2409    Kokkos::parallel_for(
 
 2410        range, KOKKOS_LAMBDA(
const size_t i) {
 
 2411          Bcol2Ccol(i) = 
static_cast<LO
>(i);
 
 2422    if (!Bimport.is_null() && !Iimport.is_null()) {
 
 2423      Cimport = Bimport->setUnion(*Iimport);
 
 2424      Ccolmap = Cimport->getTargetMap();
 
 2426    } 
else if (!Bimport.is_null() && Iimport.is_null()) {
 
 2427      Cimport = Bimport->setUnion();
 
 2429    } 
else if (Bimport.is_null() && !Iimport.is_null()) {
 
 2430      Cimport = Iimport->setUnion();
 
 2433      throw std::runtime_error(
"TpetraExt::Jacobi status of matrix importers is nonsensical");
 
 2435    Ccolmap = Cimport->getTargetMap();
 
 2437    TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
 
 2438                               std::runtime_error, 
"Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way");
 
 2445    Kokkos::resize(Icol2Ccol, Bview.importMatrix->getColMap()->getLocalNumElements());
 
 2446    local_map_type Ccolmap_local = Ccolmap->getLocalMap();
 
 2447    Kokkos::parallel_for(
 
 2448        range_type(0, Bview.origMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
 
 2449          Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
 
 2451    Kokkos::parallel_for(
 
 2452        range_type(0, Bview.importMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
 
 2453          Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
 
 2461  C.replaceColMap(Ccolmap);
 
 2479  lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"), Aview.colMap->getLocalNumElements());
 
 2480  lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"), Aview.colMap->getLocalNumElements());
 
 2481  Kokkos::parallel_for(
 
 2482      range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex() + 1), KOKKOS_LAMBDA(
const LO i) {
 
 2483        GO aidx  = Acolmap_local.getGlobalElement(i);
 
 2484        LO B_LID = Browmap_local.getLocalElement(aidx);
 
 2485        if (B_LID != LO_INVALID) {
 
 2486          targetMapToOrigRow(i)   = B_LID;
 
 2487          targetMapToImportRow(i) = LO_INVALID;
 
 2489          LO I_LID                = Irowmap_local.getLocalElement(aidx);
 
 2490          targetMapToOrigRow(i)   = LO_INVALID;
 
 2491          targetMapToImportRow(i) = I_LID;
 
 2497  KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Node, lo_view_t>::jacobi_A_B_newmatrix_kernel_wrapper(omega, Dinv, Aview, Bview, targetMapToOrigRow, targetMapToImportRow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
 
 2504template <
class Scalar,
 
 2506          class GlobalOrdinal,
 
 2508          class LocalOrdinalViewType>
 
 2509void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
 
 2510                                                                                                                           const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Dinv,
 
 2511                                                                                                                           CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
 
 2512                                                                                                                           CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
 
 2513                                                                                                                           const LocalOrdinalViewType& targetMapToOrigRow,
 
 2514                                                                                                                           const LocalOrdinalViewType& targetMapToImportRow,
 
 2515                                                                                                                           const LocalOrdinalViewType& Bcol2Ccol,
 
 2516                                                                                                                           const LocalOrdinalViewType& Icol2Ccol,
 
 2517                                                                                                                           CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
 
 2518                                                                                                                           Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> Cimport,
 
 2519                                                                                                                           const std::string& label,
 
 2520                                                                                                                           const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
 2523  using Teuchos::Array;
 
 2524  using Teuchos::ArrayRCP;
 
 2525  using Teuchos::ArrayView;
 
 2530  typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
 
 2531  typedef typename KCRS::StaticCrsGraphType graph_t;
 
 2532  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
 
 2533  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
 
 2534  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
 
 2535  typedef typename KCRS::values_type::non_const_type scalar_view_t;
 
 2538  typedef typename scalar_view_t::memory_space scalar_memory_space;
 
 2541  typedef LocalOrdinal LO;
 
 2542  typedef GlobalOrdinal GO;
 
 2545  typedef Map<LO, GO, NO> map_type;
 
 2546  size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
 
 2547  LO LO_INVALID     = Teuchos::OrdinalTraits<LO>::invalid();
 
 2550  RCP<const map_type> Ccolmap = C.getColMap();
 
 2551  size_t m                    = Aview.origMatrix->getLocalNumRows();
 
 2552  size_t n                    = Ccolmap->getLocalNumElements();
 
 2553  size_t b_max_nnz_per_row    = Bview.origMatrix->getLocalMaxNumRowEntries();
 
 2556  const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
 
 2557  const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
 
 2559  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
 
 2560  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
 
 2561  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
 
 2563  c_lno_view_t Irowptr;
 
 2564  lno_nnz_view_t Icolind;
 
 2565  scalar_view_t Ivals;
 
 2566  if (!Bview.importMatrix.is_null()) {
 
 2567    auto lclB         = Bview.importMatrix->getLocalMatrixHost();
 
 2568    Irowptr           = lclB.graph.row_map;
 
 2569    Icolind           = lclB.graph.entries;
 
 2570    Ivals             = lclB.values;
 
 2571    b_max_nnz_per_row = std::max(b_max_nnz_per_row, Bview.importMatrix->getLocalMaxNumRowEntries());
 
 2576      Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
 
 2584  size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
 
 2585  Array<size_t> c_status(n, ST_INVALID);
 
 2594  size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
 
 2595  lno_view_t Crowptr(Kokkos::ViewAllocateWithoutInitializing(
"Crowptr"), m + 1);
 
 2596  lno_nnz_view_t Ccolind(Kokkos::ViewAllocateWithoutInitializing(
"Ccolind"), CSR_alloc);
 
 2597  scalar_view_t Cvals(Kokkos::ViewAllocateWithoutInitializing(
"Cvals"), CSR_alloc);
 
 2598  size_t CSR_ip = 0, OLD_ip = 0;
 
 2600  const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
 
 2614  for (
size_t i = 0; i < m; i++) {
 
 2617    Crowptr[i]        = CSR_ip;
 
 2618    SC minusOmegaDval = -omega * Dvals(i, 0);
 
 2621    for (
size_t j = Browptr[i]; j < Browptr[i + 1]; j++) {
 
 2622      Scalar Bval = Bvals[j];
 
 2623      if (Bval == SC_ZERO)
 
 2625      LO Bij = Bcolind[j];
 
 2626      LO Cij = Bcol2Ccol[Bij];
 
 2629      c_status[Cij]   = CSR_ip;
 
 2630      Ccolind[CSR_ip] = Cij;
 
 2631      Cvals[CSR_ip]   = Bvals[j];
 
 2636    for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
 
 2637      LO Aik        = Acolind[k];
 
 2638      const SC Aval = Avals[k];
 
 2639      if (Aval == SC_ZERO)
 
 2642      if (targetMapToOrigRow[Aik] != LO_INVALID) {
 
 2644        size_t Bk = 
static_cast<size_t>(targetMapToOrigRow[Aik]);
 
 2646        for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
 
 2647          LO Bkj = Bcolind[j];
 
 2648          LO Cij = Bcol2Ccol[Bkj];
 
 2650          if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
 
 2652            c_status[Cij]   = CSR_ip;
 
 2653            Ccolind[CSR_ip] = Cij;
 
 2654            Cvals[CSR_ip]   = minusOmegaDval * Aval * Bvals[j];
 
 2658            Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
 
 2664        size_t Ik = 
static_cast<size_t>(targetMapToImportRow[Aik]);
 
 2665        for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
 
 2666          LO Ikj = Icolind[j];
 
 2667          LO Cij = Icol2Ccol[Ikj];
 
 2669          if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
 
 2671            c_status[Cij]   = CSR_ip;
 
 2672            Ccolind[CSR_ip] = Cij;
 
 2673            Cvals[CSR_ip]   = minusOmegaDval * Aval * Ivals[j];
 
 2676            Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
 
 2683    if (i + 1 < m && CSR_ip + std::min(n, (Arowptr[i + 2] - Arowptr[i + 1] + 1) * b_max_nnz_per_row) > CSR_alloc) {
 
 2685      Kokkos::resize(Ccolind, CSR_alloc);
 
 2686      Kokkos::resize(Cvals, CSR_alloc);
 
 2690  Crowptr[m] = CSR_ip;
 
 2693  Kokkos::resize(Ccolind, CSR_ip);
 
 2694  Kokkos::resize(Cvals, CSR_ip);
 
 2703    C.replaceColMap(Ccolmap);
 
 2710    if (params.is_null() || params->get(
"sort entries", 
true))
 
 2711      Import_Util::sortCrsEntries(Crowptr, Ccolind, Cvals);
 
 2712    C.setAllValues(Crowptr, Ccolind, Cvals);
 
 2725    RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
 
 2726    labelList->set(
"Timer Label", label);
 
 2727    if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants", 
true));
 
 2728    RCP<const Export<LO, GO, NO>> dummyExport;
 
 2729    C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
 
 2735template <
class Scalar,
 
 2737          class GlobalOrdinal,
 
 2739void jacobi_A_B_reuse(
 
 2741    const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Dinv,
 
 2742    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
 
 2743    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
 
 2744    CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
 
 2745    const std::string& label,
 
 2746    const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
 2747  using Teuchos::Array;
 
 2748  using Teuchos::ArrayRCP;
 
 2749  using Teuchos::ArrayView;
 
 2753  typedef LocalOrdinal LO;
 
 2754  typedef GlobalOrdinal GO;
 
 2757  typedef Import<LO, GO, NO> import_type;
 
 2758  typedef Map<LO, GO, NO> map_type;
 
 2761  typedef typename map_type::local_map_type local_map_type;
 
 2763  typedef typename KCRS::StaticCrsGraphType graph_t;
 
 2764  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
 
 2765  typedef typename NO::execution_space execution_space;
 
 2766  typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
 
 2767  typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
 
 2769  RCP<const import_type> Cimport = C.getGraph()->getImporter();
 
 2770  lo_view_t Bcol2Ccol, Icol2Ccol;
 
 2771  lo_view_t targetMapToOrigRow;
 
 2772  lo_view_t targetMapToImportRow;
 
 2776    LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
 
 2779    RCP<const map_type> Ccolmap  = C.getColMap();
 
 2780    local_map_type Acolmap_local = Aview.colMap->getLocalMap();
 
 2781    local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
 
 2782    local_map_type Irowmap_local;
 
 2783    if (!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
 
 2784    local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
 
 2785    local_map_type Icolmap_local;
 
 2786    if (!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
 
 2787    local_map_type Ccolmap_local = Ccolmap->getLocalMap();
 
 2790    Bcol2Ccol = lo_view_t(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"), Bview.colMap->getLocalNumElements());
 
 2794      Kokkos::parallel_for(
 
 2795          range_type(0, Bview.origMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
 
 2796            Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
 
 2799      if (!Bview.importMatrix.is_null()) {
 
 2800        TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
 
 2801                                   std::runtime_error, 
"Tpetra::Jacobi: Import setUnion messed with the DomainMap in an unfortunate way");
 
 2803        Kokkos::resize(Icol2Ccol, Bview.importMatrix->getColMap()->getLocalNumElements());
 
 2804        Kokkos::parallel_for(
 
 2805            range_type(0, Bview.importMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
 
 2806              Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
 
 2812    targetMapToOrigRow   = lo_view_t(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"), Aview.colMap->getLocalNumElements());
 
 2813    targetMapToImportRow = lo_view_t(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"), Aview.colMap->getLocalNumElements());
 
 2814    Kokkos::parallel_for(
 
 2815        range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex() + 1), KOKKOS_LAMBDA(
const LO i) {
 
 2816          GO aidx  = Acolmap_local.getGlobalElement(i);
 
 2817          LO B_LID = Browmap_local.getLocalElement(aidx);
 
 2818          if (B_LID != LO_INVALID) {
 
 2819            targetMapToOrigRow(i)   = B_LID;
 
 2820            targetMapToImportRow(i) = LO_INVALID;
 
 2822            LO I_LID                = Irowmap_local.getLocalElement(aidx);
 
 2823            targetMapToOrigRow(i)   = LO_INVALID;
 
 2824            targetMapToImportRow(i) = I_LID;
 
 2831  KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Node, lo_view_t>::jacobi_A_B_reuse_kernel_wrapper(omega, Dinv, Aview, Bview, targetMapToOrigRow, targetMapToImportRow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
 
 2835template <
class Scalar,
 
 2837          class GlobalOrdinal,
 
 2839          class LocalOrdinalViewType>
 
 2840void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
 
 2841                                                                                                                       const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Dinv,
 
 2842                                                                                                                       CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
 
 2843                                                                                                                       CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
 
 2844                                                                                                                       const LocalOrdinalViewType& targetMapToOrigRow,
 
 2845                                                                                                                       const LocalOrdinalViewType& targetMapToImportRow,
 
 2846                                                                                                                       const LocalOrdinalViewType& Bcol2Ccol,
 
 2847                                                                                                                       const LocalOrdinalViewType& Icol2Ccol,
 
 2848                                                                                                                       CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
 
 2849                                                                                                                       Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> ,
 
 2850                                                                                                                       const std::string& label,
 
 2851                                                                                                                       const Teuchos::RCP<Teuchos::ParameterList>& ) {
 
 2859  typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
 
 2860  typedef typename KCRS::StaticCrsGraphType graph_t;
 
 2861  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
 
 2862  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
 
 2863  typedef typename KCRS::values_type::non_const_type scalar_view_t;
 
 2864  typedef typename scalar_view_t::memory_space scalar_memory_space;
 
 2867  typedef LocalOrdinal LO;
 
 2868  typedef GlobalOrdinal GO;
 
 2870  typedef Map<LO, GO, NO> map_type;
 
 2871  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
 
 2872  const LO LO_INVALID     = Teuchos::OrdinalTraits<LO>::invalid();
 
 2873  const SC SC_ZERO        = Teuchos::ScalarTraits<Scalar>::zero();
 
 2876  RCP<const map_type> Ccolmap = C.getColMap();
 
 2877  size_t m                    = Aview.origMatrix->getLocalNumRows();
 
 2878  size_t n                    = Ccolmap->getLocalNumElements();
 
 2881  const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
 
 2882  const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
 
 2883  const KCRS& Cmat = C.getLocalMatrixHost();
 
 2885  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
 
 2886  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
 
 2887  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
 
 2888  scalar_view_t Cvals = Cmat.values;
 
 2890  c_lno_view_t Irowptr;
 
 2891  lno_nnz_view_t Icolind;
 
 2892  scalar_view_t Ivals;
 
 2893  if (!Bview.importMatrix.is_null()) {
 
 2894    auto lclB = Bview.importMatrix->getLocalMatrixHost();
 
 2895    Irowptr   = lclB.graph.row_map;
 
 2896    Icolind   = lclB.graph.entries;
 
 2897    Ivals     = lclB.values;
 
 2902      Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
 
 2909  std::vector<size_t> c_status(n, ST_INVALID);
 
 2912  size_t CSR_ip = 0, OLD_ip = 0;
 
 2913  for (
size_t i = 0; i < m; i++) {
 
 2916    OLD_ip = Crowptr[i];
 
 2917    CSR_ip = Crowptr[i + 1];
 
 2918    for (
size_t k = OLD_ip; k < CSR_ip; k++) {
 
 2919      c_status[Ccolind[k]] = k;
 
 2925    SC minusOmegaDval = -omega * Dvals(i, 0);
 
 2928    for (
size_t j = Browptr[i]; j < Browptr[i + 1]; j++) {
 
 2929      Scalar Bval = Bvals[j];
 
 2930      if (Bval == SC_ZERO)
 
 2932      LO Bij = Bcolind[j];
 
 2933      LO Cij = Bcol2Ccol[Bij];
 
 2935      TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
 
 2936                                 std::runtime_error, 
"Trying to insert a new entry into a static graph");
 
 2938      Cvals[c_status[Cij]] = Bvals[j];
 
 2942    for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
 
 2943      LO Aik        = Acolind[k];
 
 2944      const SC Aval = Avals[k];
 
 2945      if (Aval == SC_ZERO)
 
 2948      if (targetMapToOrigRow[Aik] != LO_INVALID) {
 
 2950        size_t Bk = 
static_cast<size_t>(targetMapToOrigRow[Aik]);
 
 2952        for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
 
 2953          LO Bkj = Bcolind[j];
 
 2954          LO Cij = Bcol2Ccol[Bkj];
 
 2956          TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
 
 2957                                     std::runtime_error, 
"Trying to insert a new entry into a static graph");
 
 2959          Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
 
 2964        size_t Ik = 
static_cast<size_t>(targetMapToImportRow[Aik]);
 
 2965        for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
 
 2966          LO Ikj = Icolind[j];
 
 2967          LO Cij = Icol2Ccol[Ikj];
 
 2969          TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
 
 2970                                     std::runtime_error, 
"Trying to insert a new entry into a static graph");
 
 2972          Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
 
 2980    C.fillComplete(C.getDomainMap(), C.getRangeMap());
 
 2985template <
class Scalar,
 
 2987          class GlobalOrdinal,
 
 2989void import_and_extract_views(
 
 2990    const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
 
 2991    Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node>> targetMap,
 
 2992    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
 
 2993    Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> prototypeImporter,
 
 2994    bool userAssertsThereAreNoRemotes,
 
 2995    const std::string& label,
 
 2996    const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
 2997  using Teuchos::Array;
 
 2998  using Teuchos::ArrayView;
 
 2999  using Teuchos::null;
 
 3004  typedef LocalOrdinal LO;
 
 3005  typedef GlobalOrdinal GO;
 
 3008  typedef Map<LO, GO, NO> map_type;
 
 3009  typedef Import<LO, GO, NO> import_type;
 
 3010  typedef CrsMatrix<SC, LO, GO, NO> crs_matrix_type;
 
 3020  Aview.deleteContents();
 
 3022  Aview.origMatrix = rcp(&A, 
false);
 
 3024  Aview.origMatrix->getApplyHelper();
 
 3025  Aview.origRowMap           = A.getRowMap();
 
 3026  Aview.rowMap               = targetMap;
 
 3027  Aview.colMap               = A.getColMap();
 
 3028  Aview.domainMap            = A.getDomainMap();
 
 3029  Aview.importColMap         = null;
 
 3030  RCP<const map_type> rowMap = A.getRowMap();
 
 3031  const int numProcs         = rowMap->getComm()->getSize();
 
 3034  if (userAssertsThereAreNoRemotes || numProcs < 2)
 
 3037  RCP<const import_type> importer;
 
 3038  if (params != null && params->isParameter(
"importer")) {
 
 3039    importer = params->get<RCP<const import_type>>(
"importer");
 
 3047    RCP<const map_type> remoteRowMap;
 
 3048    size_t numRemote = 0;
 
 3050    if (!prototypeImporter.is_null() &&
 
 3051        prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
 
 3052        prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
 
 3056      ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
 
 3057      numRemote                      = prototypeImporter->getNumRemoteIDs();
 
 3059      Array<GO> remoteRows(numRemote);
 
 3060      for (
size_t i = 0; i < numRemote; i++)
 
 3061        remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
 
 3063      remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
 
 3064                                      rowMap->getIndexBase(), rowMap->getComm()));
 
 3067    } 
else if (prototypeImporter.is_null()) {
 
 3071      ArrayView<const GO> rows = targetMap->getLocalElementList();
 
 3072      size_t numRows           = targetMap->getLocalNumElements();
 
 3074      Array<GO> remoteRows(numRows);
 
 3075      for (
size_t i = 0; i < numRows; ++i) {
 
 3076        const LO mlid = rowMap->getLocalElement(rows[i]);
 
 3078        if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
 
 3079          remoteRows[numRemote++] = rows[i];
 
 3081      remoteRows.resize(numRemote);
 
 3082      remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
 
 3083                                      rowMap->getIndexBase(), rowMap->getComm()));
 
 3092      TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
 
 3093                                 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
 
 3106    Teuchos::reduceAll(*(rowMap->getComm()), Teuchos::REDUCE_MAX, (
global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote));
 
 3108    if (globalMaxNumRemote > 0) {
 
 3114        importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
 
 3116        importer = rcp(
new import_type(rowMap, remoteRowMap));
 
 3118        throw std::runtime_error(
"prototypeImporter->SourceMap() does not match A.getRowMap()!");
 
 3122      params->set(
"importer", importer);
 
 3125  if (importer != null) {
 
 3130    Teuchos::ParameterList labelList;
 
 3131    labelList.set(
"Timer Label", label);
 
 3132    auto& labelList_subList = labelList.sublist(
"matrixmatrix: kernel params", 
false);
 
 3135    bool overrideAllreduce         = 
false;
 
 3138    Teuchos::ParameterList params_sublist;
 
 3139    if (!params.is_null()) {
 
 3140      labelList.set(
"compute global constants", params->get(
"compute global constants", 
false));
 
 3141      params_sublist                  = params->sublist(
"matrixmatrix: kernel params", 
false);
 
 3142      mm_optimization_core_count      = params_sublist.get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
 
 3143      int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
 
 3144      if (mm_optimization_core_count2 < mm_optimization_core_count) mm_optimization_core_count = mm_optimization_core_count2;
 
 3145      isMM              = params_sublist.get(
"isMatrixMatrix_TransferAndFillComplete", 
false);
 
 3146      overrideAllreduce = params_sublist.get(
"MM_TAFC_OverrideAllreduceCheck", 
false);
 
 3148    labelList_subList.set(
"isMatrixMatrix_TransferAndFillComplete", isMM);
 
 3149    labelList_subList.set(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
 
 3150    labelList_subList.set(
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
 
 3153                                                                                 A.getDomainMap(), importer->getTargetMap(), rcpFromRef(labelList));
 
 3155    Aview.importMatrix->getApplyHelper();
 
 3161    sprintf(str,
"import_matrix.%d.dat",count);
 
 3166#ifdef HAVE_TPETRA_MMM_STATISTICS 
 3167    printMultiplicationStatistics(importer, label + std::string(
" I&X MMM"));
 
 3174    Aview.importColMap = Aview.importMatrix->getColMap();
 
 3180template <
class Scalar,
 
 3182          class GlobalOrdinal,
 
 3184void import_and_extract_views(
 
 3185    const BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& M,
 
 3186    Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node>> targetMap,
 
 3187    BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview,
 
 3188    Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> prototypeImporter,
 
 3189    bool userAssertsThereAreNoRemotes) {
 
 3190  using Teuchos::Array;
 
 3191  using Teuchos::ArrayView;
 
 3192  using Teuchos::null;
 
 3197  typedef LocalOrdinal LO;
 
 3198  typedef GlobalOrdinal GO;
 
 3201  typedef Map<LO, GO, NO> map_type;
 
 3202  typedef Import<LO, GO, NO> import_type;
 
 3203  typedef BlockCrsMatrix<SC, LO, GO, NO> blockcrs_matrix_type;
 
 3211  Mview.deleteContents();
 
 3215  Mview.origMatrix->getApplyHelper();
 
 3216  Mview.origRowMap           = M.getRowMap();
 
 3217  Mview.rowMap               = targetMap;
 
 3218  Mview.colMap               = M.getColMap();
 
 3219  Mview.importColMap         = null;
 
 3220  RCP<const map_type> rowMap = M.getRowMap();
 
 3221  const int numProcs         = rowMap->getComm()->getSize();
 
 3224  if (userAssertsThereAreNoRemotes || numProcs < 2) 
return;
 
 3228  RCP<const map_type> remoteRowMap;
 
 3229  size_t numRemote = 0;
 
 3231  if (!prototypeImporter.is_null() &&
 
 3232      prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
 
 3233      prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
 
 3235    ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
 
 3236    numRemote                      = prototypeImporter->getNumRemoteIDs();
 
 3238    Array<GO> remoteRows(numRemote);
 
 3239    for (
size_t i = 0; i < numRemote; i++)
 
 3240      remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
 
 3242    remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
 
 3243                                    rowMap->getIndexBase(), rowMap->getComm()));
 
 3246  } 
else if (prototypeImporter.is_null()) {
 
 3248    ArrayView<const GO> rows = targetMap->getLocalElementList();
 
 3249    size_t numRows           = targetMap->getLocalNumElements();
 
 3251    Array<GO> remoteRows(numRows);
 
 3252    for (
size_t i = 0; i < numRows; ++i) {
 
 3253      const LO mlid = rowMap->getLocalElement(rows[i]);
 
 3255      if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
 
 3256        remoteRows[numRemote++] = rows[i];
 
 3258    remoteRows.resize(numRemote);
 
 3259    remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
 
 3260                                    rowMap->getIndexBase(), rowMap->getComm()));
 
 3269    TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
 
 3270                               "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
 
 3278  Teuchos::reduceAll(*(rowMap->getComm()), Teuchos::REDUCE_MAX, (
global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote));
 
 3280  RCP<const import_type> importer;
 
 3282  if (globalMaxNumRemote > 0) {
 
 3285      importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
 
 3287      importer = rcp(
new import_type(rowMap, remoteRowMap));
 
 3289      throw std::runtime_error(
"prototypeImporter->SourceMap() does not match M.getRowMap()!");
 
 3292  if (importer != null) {
 
 3297    Mview.importMatrix->getApplyHelper();
 
 3300    Mview.importColMap = Mview.importMatrix->getColMap();
 
 3306template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node, 
class LocalOrdinalViewType>
 
 3308merge_matrices(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
 
 3309               CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
 
 3310               const LocalOrdinalViewType& Acol2Brow,
 
 3311               const LocalOrdinalViewType& Acol2Irow,
 
 3312               const LocalOrdinalViewType& Bcol2Ccol,
 
 3313               const LocalOrdinalViewType& Icol2Ccol,
 
 3314               const size_t mergedNodeNumCols) {
 
 3317  typedef typename KCRS::StaticCrsGraphType graph_t;
 
 3318  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
 
 3319  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
 
 3320  typedef typename KCRS::values_type::non_const_type scalar_view_t;
 
 3322  const KCRS& Ak = Aview.
origMatrix->getLocalMatrixDevice();
 
 3323  const KCRS& Bk = Bview.origMatrix->getLocalMatrixDevice();
 
 3326  if (!Bview.importMatrix.is_null() || (Bview.importMatrix.is_null() && (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap()))) {
 
 3330    RCP<const KCRS> Ik_;
 
 3331    if (!Bview.importMatrix.is_null()) Ik_ = Teuchos::rcpFromRef<const KCRS>(Bview.importMatrix->getLocalMatrixDevice());
 
 3332    const KCRS* Ik = Bview.importMatrix.is_null() ? 0 : &*Ik_;
 
 3334    if (Ik != 0) Iks = *Ik;
 
 3335    size_t merge_numrows = Ak.numCols();
 
 3337    lno_view_t Mrowptr(
"Mrowptr", merge_numrows + 1);
 
 3339    const LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
 
 3342    typedef typename Node::execution_space execution_space;
 
 3343    typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
 
 3344    Kokkos::parallel_scan(
 
 3345        "Tpetra_MatrixMatrix_merge_matrices_buildRowptr", range_type(0, merge_numrows),
 
 3346        KOKKOS_LAMBDA(
const size_t i, 
size_t& update, 
const bool final) {
 
 3347          if (
final) Mrowptr(i) = update;
 
 3350          if (Acol2Brow(i) != LO_INVALID)
 
 3351            ct = Bk.graph.row_map(Acol2Brow(i) + 1) - Bk.graph.row_map(Acol2Brow(i));
 
 3353            ct = Iks.graph.row_map(Acol2Irow(i) + 1) - Iks.graph.row_map(Acol2Irow(i));
 
 3356          if (
final && i + 1 == merge_numrows)
 
 3357            Mrowptr(i + 1) = update;
 
 3361    size_t merge_nnz = ::Tpetra::Details::getEntryOnHost(Mrowptr, merge_numrows);
 
 3362    lno_nnz_view_t Mcolind(Kokkos::ViewAllocateWithoutInitializing(
"Mcolind"), merge_nnz);
 
 3363    scalar_view_t Mvalues(Kokkos::ViewAllocateWithoutInitializing(
"Mvals"), merge_nnz);
 
 3366    typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
 
 3367    Kokkos::parallel_for(
 
 3368        "Tpetra_MatrixMatrix_merg_matrices_buildColindValues", range_type(0, merge_numrows), KOKKOS_LAMBDA(
const size_t i) {
 
 3369          if (Acol2Brow(i) != LO_INVALID) {
 
 3370            size_t row   = Acol2Brow(i);
 
 3371            size_t start = Bk.graph.row_map(row);
 
 3372            for (
size_t j = Mrowptr(i); j < Mrowptr(i + 1); j++) {
 
 3373              Mvalues(j) = Bk.values(j - Mrowptr(i) + start);
 
 3374              Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j - Mrowptr(i) + start));
 
 3377            size_t row   = Acol2Irow(i);
 
 3378            size_t start = Iks.graph.row_map(row);
 
 3379            for (
size_t j = Mrowptr(i); j < Mrowptr(i + 1); j++) {
 
 3380              Mvalues(j) = Iks.values(j - Mrowptr(i) + start);
 
 3381              Mcolind(j) = Icol2Ccol(Iks.graph.entries(j - Mrowptr(i) + start));
 
 3386    KCRS newmat(
"CrsMatrix", merge_numrows, mergedNodeNumCols, merge_nnz, Mvalues, Mrowptr, Mcolind);
 
 3396template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node, 
class LocalOrdinalViewType>
 
 3397const typename Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type
 
 3398merge_matrices(BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
 
 3399               BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
 
 3400               const LocalOrdinalViewType& Acol2Brow,
 
 3401               const LocalOrdinalViewType& Acol2Irow,
 
 3402               const LocalOrdinalViewType& Bcol2Ccol,
 
 3403               const LocalOrdinalViewType& Icol2Ccol,
 
 3404               const size_t mergedNodeNumCols) {
 
 3406  typedef typename Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type KBCRS;
 
 3407  typedef typename KBCRS::StaticCrsGraphType graph_t;
 
 3408  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
 
 3409  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
 
 3410  typedef typename KBCRS::values_type::non_const_type scalar_view_t;
 
 3413  const KBCRS& Ak = Aview.
origMatrix->getLocalMatrixDevice();
 
 3414  const KBCRS& Bk = Bview.origMatrix->getLocalMatrixDevice();
 
 3417  if (!Bview.importMatrix.is_null() ||
 
 3418      (Bview.importMatrix.is_null() &&
 
 3419       (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap()))) {
 
 3423    RCP<const KBCRS> Ik_;
 
 3424    if (!Bview.importMatrix.is_null()) Ik_ = Teuchos::rcpFromRef<const KBCRS>(Bview.importMatrix->getLocalMatrixDevice());
 
 3425    const KBCRS* Ik = Bview.importMatrix.is_null() ? 0 : &*Ik_;
 
 3427    if (Ik != 0) Iks = *Ik;
 
 3428    size_t merge_numrows = Ak.numCols();
 
 3431    lno_view_t Mrowptr(
"Mrowptr", merge_numrows + 1);
 
 3433    const LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
 
 3436    typedef typename Node::execution_space execution_space;
 
 3437    typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
 
 3438    Kokkos::parallel_scan(
 
 3439        "Tpetra_MatrixMatrix_merge_matrices_buildRowptr", range_type(0, merge_numrows),
 
 3440        KOKKOS_LAMBDA(
const size_t i, 
size_t& update, 
const bool final) {
 
 3441          if (
final) Mrowptr(i) = update;
 
 3444          if (Acol2Brow(i) != LO_INVALID)
 
 3445            ct = Bk.graph.row_map(Acol2Brow(i) + 1) - Bk.graph.row_map(Acol2Brow(i));
 
 3447            ct = Iks.graph.row_map(Acol2Irow(i) + 1) - Iks.graph.row_map(Acol2Irow(i));
 
 3450          if (
final && i + 1 == merge_numrows)
 
 3451            Mrowptr(i + 1) = update;
 
 3455    size_t merge_nnz    = ::Tpetra::Details::getEntryOnHost(Mrowptr, merge_numrows);
 
 3456    const int blocksize = Ak.blockDim();
 
 3457    lno_nnz_view_t Mcolind(Kokkos::ViewAllocateWithoutInitializing(
"Mcolind"), merge_nnz);
 
 3458    scalar_view_t Mvalues(Kokkos::ViewAllocateWithoutInitializing(
"Mvals"), merge_nnz * blocksize * blocksize);
 
 3461    typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
 
 3462    Kokkos::parallel_for(
 
 3463        "Tpetra_MatrixMatrix_merg_matrices_buildColindValues", range_type(0, merge_numrows), KOKKOS_LAMBDA(
const size_t i) {
 
 3464          if (Acol2Brow(i) != LO_INVALID) {
 
 3465            size_t row   = Acol2Brow(i);
 
 3466            size_t start = Bk.graph.row_map(row);
 
 3467            for (
size_t j = Mrowptr(i); j < Mrowptr(i + 1); j++) {
 
 3468              Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j - Mrowptr(i) + start));
 
 3470              for (
int b = 0; b < blocksize * blocksize; ++b) {
 
 3471                const int val_indx   = j * blocksize * blocksize + b;
 
 3472                const int b_val_indx = (j - Mrowptr(i) + 
start) * blocksize * blocksize + b;
 
 3473                Mvalues(val_indx)    = Bk.values(b_val_indx);
 
 3477            size_t row   = Acol2Irow(i);
 
 3478            size_t start = Iks.graph.row_map(row);
 
 3479            for (
size_t j = Mrowptr(i); j < Mrowptr(i + 1); j++) {
 
 3480              Mcolind(j) = Icol2Ccol(Iks.graph.entries(j - Mrowptr(i) + start));
 
 3482              for (
int b = 0; b < blocksize * blocksize; ++b) {
 
 3483                const int val_indx   = j * blocksize * blocksize + b;
 
 3484                const int b_val_indx = (j - Mrowptr(i) + 
start) * blocksize * blocksize + b;
 
 3485                Mvalues(val_indx)    = Iks.values(b_val_indx);
 
 3492    KBCRS newmat(
"CrsMatrix", merge_numrows, mergedNodeNumCols, merge_nnz, Mvalues, Mrowptr, Mcolind, blocksize);
 
 3501template <
typename SC, 
typename LO, 
typename GO, 
typename NO>
 
 3502void AddKernels<SC, LO, GO, NO>::
 
 3504        const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
 
 3505        const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
 
 3506        const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
 
 3507        const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
 
 3508        const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
 
 3509        const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
 
 3510        const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
 
 3511        const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
 
 3513        typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
 
 3514        typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
 
 3515        typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds) {
 
 3517  using Teuchos::TimeMonitor;
 
 3518  using AddKern = MMdetails::AddKernels<SC, LO, GO, NO>;
 
 3519  TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error, 
"Can't add matrices with different numbers of rows.");
 
 3520  auto nrows = Arowptrs.extent(0) - 1;
 
 3521  Crowptrs   = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
 
 3522  typename AddKern::KKH handle;
 
 3523  handle.create_spadd_handle(
true);
 
 3524  auto addHandle = handle.get_spadd_handle();
 
 3528  KokkosSparse::spadd_symbolic(&handle,
 
 3529                               nrows, numGlobalCols,
 
 3530                               Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
 
 3532  Cvals    = values_array(
"C values", addHandle->get_c_nnz());
 
 3533  Ccolinds = col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
 
 3537  KokkosSparse::spadd_numeric(&handle,
 
 3538                              nrows, numGlobalCols,
 
 3539                              Arowptrs, Acolinds, Avals, scalarA,
 
 3540                              Browptrs, Bcolinds, Bvals, scalarB,
 
 3541                              Crowptrs, Ccolinds, Cvals);
 
 3544template <
typename SC, 
typename LO, 
typename GO, 
typename NO>
 
 3545void AddKernels<SC, LO, GO, NO>::
 
 3547        const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
 
 3548        const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
 
 3549        const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
 
 3550        const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
 
 3551        const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
 
 3552        const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
 
 3553        const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
 
 3554        const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
 
 3556        typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
 
 3557        typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
 
 3558        typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds) {
 
 3560  using Teuchos::TimeMonitor;
 
 3561  using AddKern = MMdetails::AddKernels<SC, LO, GO, NO>;
 
 3562  TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error, 
"Can't add matrices with different numbers of rows.");
 
 3563  auto nrows = Arowptrs.extent(0) - 1;
 
 3564  Crowptrs   = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
 
 3565  typedef MMdetails::AddKernels<SC, LO, GO, NO> AddKern;
 
 3566  typename AddKern::KKH handle;
 
 3567  handle.create_spadd_handle(
false);
 
 3568  auto addHandle = handle.get_spadd_handle();
 
 3571  KokkosSparse::spadd_symbolic(&handle,
 
 3572                               nrows, numGlobalCols,
 
 3573                               Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
 
 3575  Cvals    = values_array(
"C values", addHandle->get_c_nnz());
 
 3576  Ccolinds = col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
 
 3579  KokkosSparse::spadd_numeric(&handle,
 
 3580                              nrows, numGlobalCols,
 
 3581                              Arowptrs, Acolinds, Avals, scalarA,
 
 3582                              Browptrs, Bcolinds, Bvals, scalarB,
 
 3583                              Crowptrs, Ccolinds, Cvals);
 
 3586template <
typename GO,
 
 3587          typename LocalIndicesType,
 
 3588          typename GlobalIndicesType,
 
 3589          typename ColMapType>
 
 3590struct ConvertLocalToGlobalFunctor {
 
 3591  ConvertLocalToGlobalFunctor(
 
 3592      const LocalIndicesType& colindsOrig_,
 
 3593      const GlobalIndicesType& colindsConverted_,
 
 3594      const ColMapType& colmap_)
 
 3595    : colindsOrig(colindsOrig_)
 
 3596    , colindsConverted(colindsConverted_)
 
 3597    , colmap(colmap_) {}
 
 3598  KOKKOS_INLINE_FUNCTION 
void 
 3599  operator()(
const GO i)
 const {
 
 3600    colindsConverted(i) = colmap.getGlobalElement(colindsOrig(i));
 
 3602  LocalIndicesType colindsOrig;
 
 3603  GlobalIndicesType colindsConverted;
 
 3607template <
typename SC, 
typename LO, 
typename GO, 
typename NO>
 
 3608void MMdetails::AddKernels<SC, LO, GO, NO>::
 
 3609    convertToGlobalAndAdd(
 
 3610        const typename MMdetails::AddKernels<SC, LO, GO, NO>::KCRS& A,
 
 3611        const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
 
 3612        const typename MMdetails::AddKernels<SC, LO, GO, NO>::KCRS& B,
 
 3613        const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
 
 3614        const typename MMdetails::AddKernels<SC, LO, GO, NO>::local_map_type& AcolMap,
 
 3615        const typename MMdetails::AddKernels<SC, LO, GO, NO>::local_map_type& BcolMap,
 
 3616        typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
 
 3617        typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
 
 3618        typename MMdetails::AddKernels<SC, LO, GO, NO>::global_col_inds_array& Ccolinds) {
 
 3620  using Teuchos::TimeMonitor;
 
 3623  using KKH_GO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, GO, impl_scalar_type,
 
 3624                                                                  typename NO::execution_space, 
typename NO::memory_space, 
typename NO::memory_space>;
 
 3626  const values_array Avals      = A.values;
 
 3627  const values_array Bvals      = B.values;
 
 3628  const col_inds_array Acolinds = A.graph.entries;
 
 3629  const col_inds_array Bcolinds = B.graph.entries;
 
 3630  auto Arowptrs                 = A.graph.row_map;
 
 3631  auto Browptrs                 = B.graph.row_map;
 
 3632  global_col_inds_array AcolindsConverted(Kokkos::ViewAllocateWithoutInitializing(
"A colinds (converted)"), Acolinds.extent(0));
 
 3633  global_col_inds_array BcolindsConverted(Kokkos::ViewAllocateWithoutInitializing(
"B colinds (converted)"), Bcolinds.extent(0));
 
 3637  ConvertLocalToGlobalFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertA(Acolinds, AcolindsConverted, AcolMap);
 
 3638  Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsA", range_type(0, Acolinds.extent(0)), convertA);
 
 3639  ConvertLocalToGlobalFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertB(Bcolinds, BcolindsConverted, BcolMap);
 
 3640  Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsB", range_type(0, Bcolinds.extent(0)), convertB);
 
 3642  handle.create_spadd_handle(
false);
 
 3643  auto addHandle = handle.get_spadd_handle();
 
 3646  auto nrows     = Arowptrs.extent(0) - 1;
 
 3647  Crowptrs       = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
 
 3648  KokkosSparse::spadd_symbolic(&handle,
 
 3650                               Arowptrs, AcolindsConverted, Browptrs, BcolindsConverted, Crowptrs);
 
 3651  Cvals    = values_array(
"C values", addHandle->get_c_nnz());
 
 3652  Ccolinds = global_col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
 
 3656  KokkosSparse::spadd_numeric(&handle,
 
 3658                              Arowptrs, AcolindsConverted, Avals, scalarA,
 
 3659                              Browptrs, BcolindsConverted, Bvals, scalarB,
 
 3660                              Crowptrs, Ccolinds, Cvals);
 
 3675#define TPETRA_MATRIXMATRIX_INSTANT(SCALAR, LO, GO, NODE)                                                                             \ 
 3676  template void MatrixMatrix::Multiply(                                                                                               \ 
 3677      const CrsMatrix<SCALAR, LO, GO, NODE>& A,                                                                                       \ 
 3679      const CrsMatrix<SCALAR, LO, GO, NODE>& B,                                                                                       \ 
 3681      CrsMatrix<SCALAR, LO, GO, NODE>& C,                                                                                             \ 
 3682      bool call_FillComplete_on_result,                                                                                               \ 
 3683      const std::string& label,                                                                                                       \ 
 3684      const Teuchos::RCP<Teuchos::ParameterList>& params);                                                                            \ 
 3686  template void MatrixMatrix::Multiply(                                                                                               \ 
 3687      const Teuchos::RCP<const BlockCrsMatrix<SCALAR, LO, GO, NODE>>& A,                                                              \ 
 3689      const Teuchos::RCP<const BlockCrsMatrix<SCALAR, LO, GO, NODE>>& B,                                                              \ 
 3691      Teuchos::RCP<BlockCrsMatrix<SCALAR, LO, GO, NODE>>& C,                                                                          \ 
 3692      const std::string& label);                                                                                                      \ 
 3694  template void MatrixMatrix::Jacobi(                                                                                                 \ 
 3696      const Vector<SCALAR, LO, GO, NODE>& Dinv,                                                                                       \ 
 3697      const CrsMatrix<SCALAR, LO, GO, NODE>& A,                                                                                       \ 
 3698      const CrsMatrix<SCALAR, LO, GO, NODE>& B,                                                                                       \ 
 3699      CrsMatrix<SCALAR, LO, GO, NODE>& C,                                                                                             \ 
 3700      bool call_FillComplete_on_result,                                                                                               \ 
 3701      const std::string& label,                                                                                                       \ 
 3702      const Teuchos::RCP<Teuchos::ParameterList>& params);                                                                            \ 
 3704  template void MatrixMatrix::Add(                                                                                                    \ 
 3705      const CrsMatrix<SCALAR, LO, GO, NODE>& A,                                                                                       \ 
 3708      const CrsMatrix<SCALAR, LO, GO, NODE>& B,                                                                                       \ 
 3711      Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE>>& C);                                                                              \ 
 3713  template void MatrixMatrix::Add(                                                                                                    \ 
 3714      const CrsMatrix<SCALAR, LO, GO, NODE>& A,                                                                                       \ 
 3717      const CrsMatrix<SCALAR, LO, GO, NODE>& B,                                                                                       \ 
 3720      const Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE>>& C);                                                                        \ 
 3722  template void MatrixMatrix::Add(                                                                                                    \ 
 3723      const CrsMatrix<SCALAR, LO, GO, NODE>& A,                                                                                       \ 
 3726      CrsMatrix<SCALAR, LO, GO, NODE>& B,                                                                                             \ 
 3729  template Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE>>                                                                              \ 
 3730  MatrixMatrix::add<SCALAR, LO, GO, NODE>(const SCALAR& alpha,                                                                        \ 
 3731                                          const bool transposeA,                                                                      \ 
 3732                                          const CrsMatrix<SCALAR, LO, GO, NODE>& A,                                                   \ 
 3733                                          const SCALAR& beta,                                                                         \ 
 3734                                          const bool transposeB,                                                                      \ 
 3735                                          const CrsMatrix<SCALAR, LO, GO, NODE>& B,                                                   \ 
 3736                                          const Teuchos::RCP<const Map<LO, GO, NODE>>& domainMap,                                     \ 
 3737                                          const Teuchos::RCP<const Map<LO, GO, NODE>>& rangeMap,                                      \ 
 3738                                          const Teuchos::RCP<Teuchos::ParameterList>& params);                                        \ 
 3741  MatrixMatrix::add<SCALAR, LO, GO, NODE>(const SCALAR& alpha,                                                                        \ 
 3742                                          const bool transposeA,                                                                      \ 
 3743                                          const CrsMatrix<SCALAR, LO, GO, NODE>& A,                                                   \ 
 3744                                          const SCALAR& beta,                                                                         \ 
 3745                                          const bool transposeB,                                                                      \ 
 3746                                          const CrsMatrix<SCALAR, LO, GO, NODE>& B,                                                   \ 
 3747                                          CrsMatrix<SCALAR, LO, GO, NODE>& C,                                                         \ 
 3748                                          const Teuchos::RCP<const Map<LO, GO, NODE>>& domainMap,                                     \ 
 3749                                          const Teuchos::RCP<const Map<LO, GO, NODE>>& rangeMap,                                      \ 
 3750                                          const Teuchos::RCP<Teuchos::ParameterList>& params);                                        \ 
 3752  template struct MMdetails::AddKernels<SCALAR, LO, GO, NODE>;                                                                        \ 
 3754  template void MMdetails::import_and_extract_views<SCALAR, LO, GO, NODE>(const CrsMatrix<SCALAR, LO, GO, NODE>& M,                   \ 
 3755                                                                          Teuchos::RCP<const Map<LO, GO, NODE>> targetMap,            \ 
 3756                                                                          CrsMatrixStruct<SCALAR, LO, GO, NODE>& Mview,               \ 
 3757                                                                          Teuchos::RCP<const Import<LO, GO, NODE>> prototypeImporter, \ 
 3758                                                                          bool userAssertsThereAreNoRemotes,                          \ 
 3759                                                                          const std::string& label,                                   \ 
 3760                                                                          const Teuchos::RCP<Teuchos::ParameterList>& params);        \ 
 3762  template void MMdetails::import_and_extract_views<SCALAR, LO, GO, NODE>(const BlockCrsMatrix<SCALAR, LO, GO, NODE>& M,              \ 
 3763                                                                          Teuchos::RCP<const Map<LO, GO, NODE>> targetMap,            \ 
 3764                                                                          BlockCrsMatrixStruct<SCALAR, LO, GO, NODE>& Mview,          \ 
 3765                                                                          Teuchos::RCP<const Import<LO, GO, NODE>> prototypeImporter, \ 
 3766                                                                          bool userAssertsThereAreNoRemotes); 
Matrix Market file readers and writers for Tpetra objects.
 
Forward declaration of some Tpetra Matrix Matrix objects.
 
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
 
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
 
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
 
Declaration and definition of Tpetra::Details::getEntryOnHost.
 
Utility functions for packing and unpacking sparse matrix entries.
 
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects.
 
Stand-alone utility functions and macros.
 
Struct that holds views of the contents of a CrsMatrix.
 
Teuchos::RCP< const map_type > colMap
Col map for the original version of the matrix.
 
Teuchos::RCP< const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > origMatrix
The original matrix.
 
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
 
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
 
static void writeSparseFile(const std::string &filename, const sparse_matrix_type &matrix, const std::string &matrixName, const std::string &matrixDescription, const bool debug=false)
Print the sparse matrix in Matrix Market format, with comments.
 
void start()
Start the deep_copy counter.
 
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const bool transposeB, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap=Teuchos::null, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute the sparse matrix sum C = scalarA * Op(A) + scalarB * Op(B), where Op(X) is either X or its t...
 
void Multiply(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, bool transposeB, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Sparse matrix-matrix multiply.
 
void Jacobi(Scalar omega, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Dinv, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
 
void Add(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, Scalar scalarA, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarB)
 
Namespace Tpetra contains the class and methods constituting the Tpetra library.
 
void removeCrsMatrixZeros(CrsMatrixType &matrix, typename Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::magnitudeType const &threshold=Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::magnitude(Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::zero()))
Remove zero entries from a matrix.
 
size_t global_size_t
Global size_t object.