10#ifndef TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP 
   11#define TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP 
   13#ifdef HAVE_TPETRA_INST_OPENMP 
   19template <
class Scalar,
 
   21          class GlobalOrdinal, 
class LocalOrdinalViewType>
 
   22struct KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, 
Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
 
   23  static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
   24                                                       CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
 
   25                                                       const LocalOrdinalViewType& Acol2Brow,
 
   26                                                       const LocalOrdinalViewType& Acol2Irow,
 
   27                                                       const LocalOrdinalViewType& Bcol2Ccol,
 
   28                                                       const LocalOrdinalViewType& Icol2Ccol,
 
   29                                                       CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
 
   30                                                       Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
 
   31                                                       const std::string& label                           = std::string(),
 
   32                                                       const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
 
   34  static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
   35                                                   CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
 
   36                                                   const LocalOrdinalViewType& Acol2Brow,
 
   37                                                   const LocalOrdinalViewType& Acol2Irow,
 
   38                                                   const LocalOrdinalViewType& Bcol2Ccol,
 
   39                                                   const LocalOrdinalViewType& Icol2Ccol,
 
   40                                                   CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
 
   41                                                   Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
 
   42                                                   const std::string& label                           = std::string(),
 
   43                                                   const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
 
   47template <
class Scalar,
 
   49          class GlobalOrdinal, 
class LocalOrdinalViewType>
 
   50struct KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, 
Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
 
   51  static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
 
   52                                                         const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
 
   53                                                         CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
   54                                                         CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
 
   55                                                         const LocalOrdinalViewType& Acol2Brow,
 
   56                                                         const LocalOrdinalViewType& Acol2Irow,
 
   57                                                         const LocalOrdinalViewType& Bcol2Ccol,
 
   58                                                         const LocalOrdinalViewType& Icol2Ccol,
 
   59                                                         CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
 
   60                                                         Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
 
   61                                                         const std::string& label                           = std::string(),
 
   62                                                         const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
 
   64  static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
 
   65                                                     const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
 
   66                                                     CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
   67                                                     CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
 
   68                                                     const LocalOrdinalViewType& Acol2Brow,
 
   69                                                     const LocalOrdinalViewType& Acol2Irow,
 
   70                                                     const LocalOrdinalViewType& Bcol2Ccol,
 
   71                                                     const LocalOrdinalViewType& Icol2Ccol,
 
   72                                                     CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
 
   73                                                     Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
 
   74                                                     const std::string& label                           = std::string(),
 
   75                                                     const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
 
   77  static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
 
   78                                                        const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
 
   79                                                        CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
   80                                                        CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
 
   81                                                        const LocalOrdinalViewType& Acol2Brow,
 
   82                                                        const LocalOrdinalViewType& Acol2Irow,
 
   83                                                        const LocalOrdinalViewType& Bcol2Ccol,
 
   84                                                        const LocalOrdinalViewType& Icol2Ccol,
 
   85                                                        CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
 
   86                                                        Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
 
   87                                                        const std::string& label                           = std::string(),
 
   88                                                        const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
 
   92template <
class Scalar,
 
   94          class GlobalOrdinal, 
class LocalOrdinalViewType>
 
   95struct KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, 
Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
 
   96  static inline void mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
 
   97                                                         CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
   98                                                         CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
 
   99                                                         const LocalOrdinalViewType& Acol2Prow,
 
  100                                                         const LocalOrdinalViewType& Acol2PIrow,
 
  101                                                         const LocalOrdinalViewType& Pcol2Ccol,
 
  102                                                         const LocalOrdinalViewType& PIcol2Ccol,
 
  103                                                         CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
 
  104                                                         Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
 
  105                                                         const std::string& label                           = std::string(),
 
  106                                                         const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
 
  108  static inline void mult_R_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
 
  109                                                     CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
  110                                                     CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
 
  111                                                     const LocalOrdinalViewType& Acol2Prow,
 
  112                                                     const LocalOrdinalViewType& Acol2PIrow,
 
  113                                                     const LocalOrdinalViewType& Pcol2Ccol,
 
  114                                                     const LocalOrdinalViewType& PIcol2Ccol,
 
  115                                                     CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
 
  116                                                     Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
 
  117                                                     const std::string& label                           = std::string(),
 
  118                                                     const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
 
  120  static inline void mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
  121                                                          CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
 
  122                                                          const LocalOrdinalViewType& Acol2Prow,
 
  123                                                          const LocalOrdinalViewType& Acol2PIrow,
 
  124                                                          const LocalOrdinalViewType& Pcol2Ccol,
 
  125                                                          const LocalOrdinalViewType& PIcol2Ccol,
 
  126                                                          CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
 
  127                                                          Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
 
  128                                                          const std::string& label                           = std::string(),
 
  129                                                          const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
 
  131  static inline void mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
  132                                                      CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
 
  133                                                      const LocalOrdinalViewType& Acol2Prow,
 
  134                                                      const LocalOrdinalViewType& Acol2PIrow,
 
  135                                                      const LocalOrdinalViewType& Pcol2Ccol,
 
  136                                                      const LocalOrdinalViewType& PIcol2Ccol,
 
  137                                                      CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
 
  138                                                      Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
 
  139                                                      const std::string& label                           = std::string(),
 
  140                                                      const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
 
  144template <
class Scalar,
 
  147          class LocalOrdinalViewType>
 
  148void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
  149                                                                                                                                                                 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
 
  150                                                                                                                                                                 const LocalOrdinalViewType& Acol2Brow,
 
  151                                                                                                                                                                 const LocalOrdinalViewType& Acol2Irow,
 
  152                                                                                                                                                                 const LocalOrdinalViewType& Bcol2Ccol,
 
  153                                                                                                                                                                 const LocalOrdinalViewType& Icol2Ccol,
 
  154                                                                                                                                                                 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
 
  155                                                                                                                                                                 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
 
  156                                                                                                                                                                 const std::string& label,
 
  157                                                                                                                                                                 const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
  158#ifdef HAVE_TPETRA_MMM_TIMINGS 
  159  std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
 
  160  using Teuchos::TimeMonitor;
 
  161  Teuchos::RCP<TimeMonitor> MM;
 
  165  std::string nodename(
"OpenMP");
 
  170  typedef typename KCRS::device_type device_t;
 
  171  typedef typename KCRS::StaticCrsGraphType graph_t;
 
  172  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
 
  173  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
 
  174  typedef typename KCRS::values_type::non_const_type scalar_view_t;
 
  177  int team_work_size = 16;  
 
  178  std::string myalg(
"SPGEMM_KK_MEMORY");
 
  180  if (!params.is_null()) {
 
  181    if (params->isParameter(
"openmp: algorithm"))
 
  182      myalg = params->get(
"openmp: algorithm", myalg);
 
  183    if (params->isParameter(
"openmp: team work size"))
 
  184      team_work_size = params->get(
"openmp: team work size", team_work_size);
 
  187  if (myalg == 
"LTG") {
 
  189    ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_newmatrix_LowThreadGustavsonKernel(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
 
  192#ifdef HAVE_TPETRA_MMM_TIMINGS 
  193    MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPWrapper"))));
 
  196    typedef KokkosKernels::Experimental::KokkosKernelsHandle<
 
  197        typename lno_view_t::const_value_type, 
typename lno_nnz_view_t::const_value_type, 
typename scalar_view_t::const_value_type,
 
  198        typename device_t::execution_space, 
typename device_t::memory_space, 
typename device_t::memory_space>
 
  202    const KCRS& Ak = Aview.origMatrix->getLocalMatrixDevice();
 
  206    std::string alg = nodename + std::string(
" algorithm");
 
  208    if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
 
  209    KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
 
  212    const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
 
  214#ifdef HAVE_TPETRA_MMM_TIMINGS 
  216    MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPCore"))));
 
  220    typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
 
  223    typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
 
  224    typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
 
  226    lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing(
"non_const_lnow_row"), AnumRows + 1);
 
  227    lno_nnz_view_t entriesC;
 
  228    scalar_view_t valuesC;
 
  230    kh.create_spgemm_handle(alg_enum);
 
  231    kh.set_team_work_size(team_work_size);
 
  233    KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Ak.graph.row_map, Ak.graph.entries, 
false, Bmerged.graph.row_map, Bmerged.graph.entries, 
false, row_mapC);
 
  235    size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
 
  237      entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
 
  238      valuesC  = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
 
  241    KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Ak.graph.row_map, Ak.graph.entries, Ak.values, 
false, Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, 
false, row_mapC, entriesC, valuesC);
 
  242    kh.destroy_spgemm_handle();
 
  244#ifdef HAVE_TPETRA_MMM_TIMINGS 
  246    MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPSort"))));
 
  249    if (params.is_null() || params->get(
"sort entries", 
true))
 
  250      Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
 
  251    C.setAllValues(row_mapC, entriesC, valuesC);
 
  255#ifdef HAVE_TPETRA_MMM_TIMINGS 
  257  MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPESFC"))));
 
  261  RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
 
  262  labelList->set(
"Timer Label", label);
 
  263  if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants", 
true));
 
  264  RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
 
  265  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
 
  269    Teuchos::ArrayRCP< const size_t > Crowptr;
 
  270    Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
 
  271    Teuchos::ArrayRCP< const Scalar > Cvalues;
 
  272    C.getAllValues(Crowptr,Ccolind,Cvalues);
 
  275    int MyPID = C->getComm()->getRank();
 
  276    printf(
"[%d] Crowptr = ",MyPID);
 
  277    for(
size_t i=0; i<(size_t) Crowptr.size(); i++) {
 
  278      printf(
"%3d ",(
int)Crowptr.getConst()[i]);
 
  281    printf(
"[%d] Ccolind = ",MyPID);
 
  282    for(
size_t i=0; i<(size_t)Ccolind.size(); i++) {
 
  283      printf(
"%3d ",(
int)Ccolind.getConst()[i]);
 
  293template <
class Scalar,
 
  296          class LocalOrdinalViewType>
 
  297void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
  298                                                                                                                                                             CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
 
  299                                                                                                                                                             const LocalOrdinalViewType& Acol2Brow,
 
  300                                                                                                                                                             const LocalOrdinalViewType& Acol2Irow,
 
  301                                                                                                                                                             const LocalOrdinalViewType& Bcol2Ccol,
 
  302                                                                                                                                                             const LocalOrdinalViewType& Icol2Ccol,
 
  303                                                                                                                                                             CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
 
  304                                                                                                                                                             Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
 
  305                                                                                                                                                             const std::string& label,
 
  306                                                                                                                                                             const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
  307#ifdef HAVE_TPETRA_MMM_TIMINGS 
  308  std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
 
  309  using Teuchos::TimeMonitor;
 
  310  Teuchos::RCP<TimeMonitor> MM;
 
  317  int team_work_size = 16;  
 
  318  std::string myalg(
"LTG");
 
  319  if (!params.is_null()) {
 
  320    if (params->isParameter(
"openmp: algorithm"))
 
  321      myalg = params->get(
"openmp: algorithm", myalg);
 
  322    if (params->isParameter(
"openmp: team work size"))
 
  323      team_work_size = params->get(
"openmp: team work size", team_work_size);
 
  326  if (myalg == 
"LTG") {
 
  328    ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_reuse_LowThreadGustavsonKernel(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
 
  330    throw std::runtime_error(
"Tpetra::MatrixMatrix::MMM reuse unknown kernel");
 
  333#ifdef HAVE_TPETRA_MMM_TIMINGS 
  335  MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse OpenMPESFC"))));
 
  337  C.fillComplete(C.getDomainMap(), C.getRangeMap());
 
  341template <
class Scalar,
 
  344          class LocalOrdinalViewType>
 
  345void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
 
  346                                                                                                                                                                    const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
 
  347                                                                                                                                                                    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
  348                                                                                                                                                                    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
 
  349                                                                                                                                                                    const LocalOrdinalViewType& Acol2Brow,
 
  350                                                                                                                                                                    const LocalOrdinalViewType& Acol2Irow,
 
  351                                                                                                                                                                    const LocalOrdinalViewType& Bcol2Ccol,
 
  352                                                                                                                                                                    const LocalOrdinalViewType& Icol2Ccol,
 
  353                                                                                                                                                                    CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
 
  354                                                                                                                                                                    Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
 
  355                                                                                                                                                                    const std::string& label,
 
  356                                                                                                                                                                    const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
  357#ifdef HAVE_TPETRA_MMM_TIMINGS 
  358  std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
 
  359  using Teuchos::TimeMonitor;
 
  360  Teuchos::RCP<TimeMonitor> MM;
 
  367  int team_work_size = 16;  
 
  368  std::string myalg(
"LTG");
 
  369  if (!params.is_null()) {
 
  370    if (params->isParameter(
"openmp: jacobi algorithm"))
 
  371      myalg = params->get(
"openmp: jacobi algorithm", myalg);
 
  372    if (params->isParameter(
"openmp: team work size"))
 
  373      team_work_size = params->get(
"openmp: team work size", team_work_size);
 
  376  if (myalg == 
"LTG") {
 
  378    ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_LowThreadGustavsonKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
 
  379  } 
else if (myalg == 
"MSAK") {
 
  380    ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
 
  381  } 
else if (myalg == 
"KK") {
 
  382    jacobi_A_B_newmatrix_KokkosKernels(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
 
  384    throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
 
  387#ifdef HAVE_TPETRA_MMM_TIMINGS 
  389  MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix OpenMPESFC"))));
 
  393  RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
 
  394  labelList->set(
"Timer Label", label);
 
  395  if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants", 
true));
 
  398  if (!C.isFillComplete()) {
 
  399    RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
 
  400    C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
 
  405template <
class Scalar,
 
  408          class LocalOrdinalViewType>
 
  409void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
 
  410                                                                                                                                                                const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
 
  411                                                                                                                                                                CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
  412                                                                                                                                                                CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
 
  413                                                                                                                                                                const LocalOrdinalViewType& Acol2Brow,
 
  414                                                                                                                                                                const LocalOrdinalViewType& Acol2Irow,
 
  415                                                                                                                                                                const LocalOrdinalViewType& Bcol2Ccol,
 
  416                                                                                                                                                                const LocalOrdinalViewType& Icol2Ccol,
 
  417                                                                                                                                                                CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
 
  418                                                                                                                                                                Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
 
  419                                                                                                                                                                const std::string& label,
 
  420                                                                                                                                                                const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
  421#ifdef HAVE_TPETRA_MMM_TIMINGS 
  422  std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
 
  423  using Teuchos::TimeMonitor;
 
  424  Teuchos::RCP<TimeMonitor> MM;
 
  431  int team_work_size = 16;  
 
  432  std::string myalg(
"LTG");
 
  433  if (!params.is_null()) {
 
  434    if (params->isParameter(
"openmp: jacobi algorithm"))
 
  435      myalg = params->get(
"openmp: jacobi algorithm", myalg);
 
  436    if (params->isParameter(
"openmp: team work size"))
 
  437      team_work_size = params->get(
"openmp: team work size", team_work_size);
 
  440  if (myalg == 
"LTG") {
 
  442    ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_reuse_LowThreadGustavsonKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
 
  444    throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi reuse unknown kernel");
 
  447#ifdef HAVE_TPETRA_MMM_TIMINGS 
  449  MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse OpenMPESFC"))));
 
  451  C.fillComplete(C.getDomainMap(), C.getRangeMap());
 
  455template <
class Scalar,
 
  458          class LocalOrdinalViewType>
 
  459void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
 
  460                                                                                                                                                                   const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
 
  461                                                                                                                                                                   CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
  462                                                                                                                                                                   CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
 
  463                                                                                                                                                                   const LocalOrdinalViewType& Acol2Brow,
 
  464                                                                                                                                                                   const LocalOrdinalViewType& Acol2Irow,
 
  465                                                                                                                                                                   const LocalOrdinalViewType& Bcol2Ccol,
 
  466                                                                                                                                                                   const LocalOrdinalViewType& Icol2Ccol,
 
  467                                                                                                                                                                   CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
 
  468                                                                                                                                                                   Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
 
  469                                                                                                                                                                   const std::string& label,
 
  470                                                                                                                                                                   const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
  471#ifdef HAVE_TPETRA_MMM_TIMINGS 
  472  std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
 
  473  using Teuchos::TimeMonitor;
 
  474  Teuchos::RCP<TimeMonitor> MM;
 
  480    auto rowMap = Aview.origMatrix->getRowMap();
 
  482    Aview.origMatrix->getLocalDiagCopy(diags);
 
  483    size_t diagLength = rowMap->getLocalNumElements();
 
  484    Teuchos::Array<Scalar> diagonal(diagLength);
 
  485    diags.get1dCopy(diagonal());
 
  487    for (
size_t i = 0; i < diagLength; ++i) {
 
  488      TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
 
  490                                 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl
 
  491                                                                          << 
"KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
 
  496  using device_t       = 
typename Tpetra::KokkosCompat::KokkosOpenMPWrapperNode::device_type;
 
  498  using graph_t        = 
typename matrix_t::StaticCrsGraphType;
 
  499  using lno_view_t     = 
typename graph_t::row_map_type::non_const_type;
 
  500  using c_lno_view_t   = 
typename graph_t::row_map_type::const_type;
 
  501  using lno_nnz_view_t = 
typename graph_t::entries_type::non_const_type;
 
  502  using scalar_view_t  = 
typename matrix_t::values_type::non_const_type;
 
  505  using handle_t = 
typename KokkosKernels::Experimental::KokkosKernelsHandle<
 
  506      typename lno_view_t::const_value_type, 
typename lno_nnz_view_t::const_value_type, 
typename scalar_view_t::const_value_type,
 
  507      typename device_t::execution_space, 
typename device_t::memory_space, 
typename device_t::memory_space>;
 
  510  c_lno_view_t Irowptr;
 
  511  lno_nnz_view_t Icolind;
 
  513  if (!Bview.importMatrix.is_null()) {
 
  514    auto lclB = Bview.importMatrix->getLocalMatrixDevice();
 
  515    Irowptr   = lclB.graph.row_map;
 
  516    Icolind   = lclB.graph.entries;
 
  521  const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
 
  524  const matrix_t& Amat = Aview.origMatrix->getLocalMatrixDevice();
 
  525  const matrix_t& Bmat = Bview.origMatrix->getLocalMatrixDevice();
 
  527  typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
 
  528  typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
 
  529  typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
 
  531  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
 
  532  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
 
  533  const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
 
  536  lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing(
"non_const_lnow_row"), AnumRows + 1);
 
  537  lno_nnz_view_t entriesC;
 
  538  scalar_view_t valuesC;
 
  541  int team_work_size = 16;
 
  542  std::string myalg(
"SPGEMM_KK_MEMORY");
 
  543  if (!params.is_null()) {
 
  544    if (params->isParameter(
"cuda: algorithm"))
 
  545      myalg = params->get(
"cuda: algorithm", myalg);
 
  546    if (params->isParameter(
"cuda: team work size"))
 
  547      team_work_size = params->get(
"cuda: team work size", team_work_size);
 
  551  std::string nodename(
"OpenMP");
 
  552  std::string alg = nodename + std::string(
" algorithm");
 
  553  if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
 
  554  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
 
  558  kh.create_spgemm_handle(alg_enum);
 
  559  kh.set_team_work_size(team_work_size);
 
  561  KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
 
  562                                Arowptr, Acolind, 
false,
 
  563                                Browptr, Bcolind, 
false,
 
  566  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
 
  568    entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
 
  569    valuesC  = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
 
  572  KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
 
  573                                            Arowptr, Acolind, Avals, 
false,
 
  574                                            Browptr, Bcolind, Bvals, 
false,
 
  575                                            row_mapC, entriesC, valuesC,
 
  576                                            omega, Dinv.getLocalViewDevice(Tpetra::Access::ReadOnly));
 
  577  kh.destroy_spgemm_handle();
 
  579#ifdef HAVE_TPETRA_MMM_TIMINGS 
  581  MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix OpenMPSort"))));
 
  585  if (params.is_null() || params->get(
"sort entries", 
true))
 
  586    Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
 
  587  C.setAllValues(row_mapC, entriesC, valuesC);
 
  589#ifdef HAVE_TPETRA_MMM_TIMINGS 
  591  MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix OpenMPESFC"))));
 
  595  Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
 
  596  labelList->set(
"Timer Label", label);
 
  597  if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants", 
true));
 
  598  Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
 
  599  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
 
  603template <
class Scalar,
 
  606          class LocalOrdinalViewType>
 
  607void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
 
  608                                                                                                                                                                    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
  609                                                                                                                                                                    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
 
  610                                                                                                                                                                    const LocalOrdinalViewType& Acol2Prow,
 
  611                                                                                                                                                                    const LocalOrdinalViewType& Acol2PIrow,
 
  612                                                                                                                                                                    const LocalOrdinalViewType& Pcol2Accol,
 
  613                                                                                                                                                                    const LocalOrdinalViewType& PIcol2Accol,
 
  614                                                                                                                                                                    CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
 
  615                                                                                                                                                                    Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
 
  616                                                                                                                                                                    const std::string& label,
 
  617                                                                                                                                                                    const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
  618#ifdef HAVE_TPETRA_MMM_TIMINGS 
  619  std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
 
  620  using Teuchos::TimeMonitor;
 
  621  Teuchos::RCP<TimeMonitor> MM;
 
  625  std::string nodename(
"OpenMP");
 
  628  std::string myalg(
"LTG");
 
  630  if (!params.is_null()) {
 
  631    if (params->isParameter(
"openmp: rap algorithm"))
 
  632      myalg = params->get(
"openmp: rap algorithm", myalg);
 
  635  if (myalg == 
"LTG") {
 
  637    ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol, PIcol2Accol, Ac, Acimport, label, params);
 
  639    throw std::runtime_error(
"Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
 
  644template <
class Scalar,
 
  647          class LocalOrdinalViewType>
 
  648void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_R_A_P_reuse_kernel_wrapper(
 
  649    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
 
  650    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
  651    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
 
  653    const LocalOrdinalViewType& Acol2Prow,
 
  654    const LocalOrdinalViewType& Acol2Irow,
 
  655    const LocalOrdinalViewType& Pcol2Ccol,
 
  656    const LocalOrdinalViewType& Icol2Ccol,
 
  657    CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
 
  658    Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
 
  659    const std::string& label,
 
  660    const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
  661#ifdef HAVE_TPETRA_MMM_TIMINGS 
  662  std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
 
  663  using Teuchos::TimeMonitor;
 
  664  Teuchos::RCP<TimeMonitor> MM;
 
  671  std::string myalg(
"LTG");
 
  672  if (!params.is_null()) {
 
  673    if (params->isParameter(
"openmp: rap algorithm"))
 
  674      myalg = params->get(
"openmp: rap algorithm", myalg);
 
  677  if (myalg == 
"LTG") {
 
  679    ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2Irow, Pcol2Ccol, Icol2Ccol, C, Cimport, label, params);
 
  681    throw std::runtime_error(
"Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
 
  684#ifdef HAVE_TPETRA_MMM_TIMINGS 
  686  MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP Reuse OpenMPESFC"))));
 
  688  C.fillComplete(C.getDomainMap(), C.getRangeMap());
 
  692template <
class Scalar,
 
  695          class LocalOrdinalViewType>
 
  696void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
  698                                                                                                                                                                     CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
 
  699                                                                                                                                                                     const LocalOrdinalViewType& Acol2Prow,
 
  700                                                                                                                                                                     const LocalOrdinalViewType& Acol2PIrow,
 
  701                                                                                                                                                                     const LocalOrdinalViewType& Pcol2Accol,
 
  702                                                                                                                                                                     const LocalOrdinalViewType& PIcol2Accol,
 
  703                                                                                                                                                                     CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
 
  704                                                                                                                                                                     Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
 
  705                                                                                                                                                                     const std::string& label,
 
  706                                                                                                                                                                     const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
  707#ifdef HAVE_TPETRA_MMM_TIMINGS 
  708  std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
 
  709  using Teuchos::TimeMonitor;
 
  710  Teuchos::RCP<TimeMonitor> MM;
 
  714  std::string nodename(
"OpenMP");
 
  717  std::string myalg(
"LTG");
 
  719  if (!params.is_null()) {
 
  720    if (params->isParameter(
"openmp: ptap algorithm"))
 
  721      myalg = params->get(
"openmp: ptap algorithm", myalg);
 
  724  if (myalg == 
"LTG") {
 
  725#ifdef HAVE_TPETRA_MMM_TIMINGS 
  727    MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"PTAP local transpose"))));
 
  730    using Teuchos::ParameterList;
 
  732    using LO = LocalOrdinal;
 
  733    using GO = GlobalOrdinal;
 
  737    using transposer_type =
 
  738        RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
 
  739    transposer_type transposer(Pview.origMatrix, label + 
"XP: ");
 
  740    RCP<ParameterList> transposeParams(
new ParameterList);
 
  741    if (!params.is_null()) {
 
  742      transposeParams->set(
"compute global constants",
 
  743                           params->get(
"compute global constants: temporaries",
 
  746    transposeParams->set(
"sort", 
false);
 
  747    RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
 
  748        transposer.createTransposeLocal(transposeParams);
 
  749    CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
 
  750    Rview.origMatrix = Ptrans;
 
  752    using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel;
 
  753    mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
 
  754                                                  PIcol2Accol, Ac, Acimport, label, params);
 
  756    throw std::runtime_error(
"Tpetra::MatrixMatrix::PT_A_P newmatrix unknown kernel");
 
  761template <
class Scalar,
 
  764          class LocalOrdinalViewType>
 
  765void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
 
  767                                                                                                                                                                 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
 
  768                                                                                                                                                                 const LocalOrdinalViewType& Acol2Prow,
 
  769                                                                                                                                                                 const LocalOrdinalViewType& Acol2PIrow,
 
  770                                                                                                                                                                 const LocalOrdinalViewType& Pcol2Accol,
 
  771                                                                                                                                                                 const LocalOrdinalViewType& PIcol2Accol,
 
  772                                                                                                                                                                 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
 
  773                                                                                                                                                                 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
 
  774                                                                                                                                                                 const std::string& label,
 
  775                                                                                                                                                                 const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
  776#ifdef HAVE_TPETRA_MMM_TIMINGS 
  777  std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
 
  778  using Teuchos::TimeMonitor;
 
  779  Teuchos::RCP<TimeMonitor> MM;
 
  783  std::string nodename(
"OpenMP");
 
  786  std::string myalg(
"LTG");
 
  788  if (!params.is_null()) {
 
  789    if (params->isParameter(
"openmp: ptap algorithm"))
 
  790      myalg = params->get(
"openmp: ptap algorithm", myalg);
 
  793  if (myalg == 
"LTG") {
 
  794#ifdef HAVE_TPETRA_MMM_TIMINGS 
  796    MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"PTAP local transpose"))));
 
  799    using Teuchos::ParameterList;
 
  801    using LO = LocalOrdinal;
 
  802    using GO = GlobalOrdinal;
 
  806    using transposer_type =
 
  807        RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
 
  808    transposer_type transposer(Pview.origMatrix, label + 
"XP: ");
 
  809    RCP<ParameterList> transposeParams(
new ParameterList);
 
  810    if (!params.is_null()) {
 
  811      transposeParams->set(
"compute global constants",
 
  812                           params->get(
"compute global constants: temporaries",
 
  815    transposeParams->set(
"sort", 
false);
 
  816    RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
 
  817        transposer.createTransposeLocal(transposeParams);
 
  818    CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
 
  819    Rview.origMatrix = Ptrans;
 
  821    using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel;
 
  822    mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
 
  823                                              PIcol2Accol, Ac, Acimport, label, params);
 
  825    throw std::runtime_error(
"Tpetra::MatrixMatrix::PT_A_P reuse unknown kernel");
 
  827  Ac.fillComplete(Ac.getDomainMap(), Ac.getRangeMap());
 
Struct that holds views of the contents of a CrsMatrix.
 
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
 
static bool debug()
Whether Tpetra is in debug mode.
 
Namespace Tpetra contains the class and methods constituting the Tpetra library.