10#ifndef TPETRA_MATRIXMATRIX_SYCL_DEF_HPP 
   11#define TPETRA_MATRIXMATRIX_SYCL_DEF_HPP 
   13#include "Tpetra_Details_IntRowPtrHelper.hpp" 
   15#ifdef HAVE_TPETRA_INST_SYCL 
   21template <
class Scalar,
 
   24          class LocalOrdinalViewType>
 
   25struct KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, 
Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType> {
 
   26  static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
 
   27                                                       CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
 
   28                                                       const LocalOrdinalViewType& Acol2Brow,
 
   29                                                       const LocalOrdinalViewType& Acol2Irow,
 
   30                                                       const LocalOrdinalViewType& Bcol2Ccol,
 
   31                                                       const LocalOrdinalViewType& Icol2Ccol,
 
   32                                                       CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
 
   33                                                       Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
 
   34                                                       const std::string& label                           = std::string(),
 
   35                                                       const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
 
   37  static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
 
   38                                                   CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
 
   39                                                   const LocalOrdinalViewType& Acol2Brow,
 
   40                                                   const LocalOrdinalViewType& Acol2Irow,
 
   41                                                   const LocalOrdinalViewType& Bcol2Ccol,
 
   42                                                   const LocalOrdinalViewType& Icol2Ccol,
 
   43                                                   CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
 
   44                                                   Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
 
   45                                                   const std::string& label                           = std::string(),
 
   46                                                   const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
 
   50template <
class Scalar,
 
   52          class GlobalOrdinal, 
class LocalOrdinalViewType>
 
   53struct KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, 
Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType> {
 
   54  static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
 
   55                                                         const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
 
   56                                                         CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
 
   57                                                         CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
 
   58                                                         const LocalOrdinalViewType& Acol2Brow,
 
   59                                                         const LocalOrdinalViewType& Acol2Irow,
 
   60                                                         const LocalOrdinalViewType& Bcol2Ccol,
 
   61                                                         const LocalOrdinalViewType& Icol2Ccol,
 
   62                                                         CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
 
   63                                                         Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
 
   64                                                         const std::string& label                           = std::string(),
 
   65                                                         const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
 
   67  static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
 
   68                                                     const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
 
   69                                                     CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
 
   70                                                     CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
 
   71                                                     const LocalOrdinalViewType& Acol2Brow,
 
   72                                                     const LocalOrdinalViewType& Acol2Irow,
 
   73                                                     const LocalOrdinalViewType& Bcol2Ccol,
 
   74                                                     const LocalOrdinalViewType& Icol2Ccol,
 
   75                                                     CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
 
   76                                                     Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
 
   77                                                     const std::string& label                           = std::string(),
 
   78                                                     const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
 
   80  static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
 
   81                                                        const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
 
   82                                                        CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
 
   83                                                        CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
 
   84                                                        const LocalOrdinalViewType& Acol2Brow,
 
   85                                                        const LocalOrdinalViewType& Acol2Irow,
 
   86                                                        const LocalOrdinalViewType& Bcol2Ccol,
 
   87                                                        const LocalOrdinalViewType& Icol2Ccol,
 
   88                                                        CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
 
   89                                                        Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
 
   90                                                        const std::string& label                           = std::string(),
 
   91                                                        const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
 
   96template <
class Scalar,
 
   99          class LocalOrdinalViewType>
 
  100void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
 
  101                                                                                                                                                               CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
 
  102                                                                                                                                                               const LocalOrdinalViewType& Acol2Brow,
 
  103                                                                                                                                                               const LocalOrdinalViewType& Acol2Irow,
 
  104                                                                                                                                                               const LocalOrdinalViewType& Bcol2Ccol,
 
  105                                                                                                                                                               const LocalOrdinalViewType& Icol2Ccol,
 
  106                                                                                                                                                               CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
 
  107                                                                                                                                                               Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
 
  108                                                                                                                                                               const std::string& label,
 
  109                                                                                                                                                               const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
  110#ifdef HAVE_TPETRA_MMM_TIMINGS 
  111  std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
 
  112  using Teuchos::TimeMonitor;
 
  113  Teuchos::RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLWrapper")))));
 
  116  typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
 
  117  std::string nodename(
"SYCL");
 
  122  typedef typename KCRS::device_type device_t;
 
  123  typedef typename KCRS::StaticCrsGraphType graph_t;
 
  124  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
 
  125  using int_view_t = Kokkos::View<int*, typename lno_view_t::array_layout, typename lno_view_t::memory_space, typename lno_view_t::memory_traits>;
 
  126  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
 
  127  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
 
  128  typedef typename KCRS::values_type::non_const_type scalar_view_t;
 
  132  int team_work_size = 16;  
 
  133  std::string myalg(
"SPGEMM_KK_MEMORY");
 
  134  if (!params.is_null()) {
 
  135    if (params->isParameter(
"sycl: algorithm"))
 
  136      myalg = params->get(
"sycl: algorithm", myalg);
 
  137    if (params->isParameter(
"sycl: team work size"))
 
  138      team_work_size = params->get(
"sycl: team work size", team_work_size);
 
  142  typedef KokkosKernels::Experimental::KokkosKernelsHandle<
 
  143      typename lno_view_t::const_value_type, 
typename lno_nnz_view_t::const_value_type, 
typename scalar_view_t::const_value_type,
 
  144      typename device_t::execution_space, 
typename device_t::memory_space, 
typename device_t::memory_space>
 
  146  using IntKernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<
 
  147      typename int_view_t::const_value_type, 
typename lno_nnz_view_t::const_value_type, 
typename scalar_view_t::const_value_type,
 
  148      typename device_t::execution_space, 
typename device_t::memory_space, 
typename device_t::memory_space>;
 
  151  const KCRS& Amat = Aview.origMatrix->getLocalMatrixDevice();
 
  152  const KCRS& Bmat = Bview.origMatrix->getLocalMatrixDevice();
 
  154  c_lno_view_t Arowptr         = Amat.graph.row_map,
 
  155               Browptr         = Bmat.graph.row_map;
 
  156  const lno_nnz_view_t Acolind = Amat.graph.entries,
 
  157                       Bcolind = Bmat.graph.entries;
 
  158  const scalar_view_t Avals    = Amat.values,
 
  162  std::string alg = nodename + std::string(
" algorithm");
 
  164  if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
 
  165  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
 
  168  const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
 
  170#ifdef HAVE_TPETRA_MMM_TIMINGS 
  172  MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLCore"))));
 
  176  typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
 
  177  typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
 
  178  typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
 
  180  lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing(
"non_const_lno_row"), AnumRows + 1);
 
  181  lno_nnz_view_t entriesC;
 
  182  scalar_view_t valuesC;
 
  187  const bool useIntRowptrs =
 
  188      irph.shouldUseIntRowptrs() &&
 
  189      Aview.
origMatrix->getApplyHelper()->shouldUseIntRowptrs();
 
  193    kh.create_spgemm_handle(alg_enum);
 
  194    kh.set_team_work_size(team_work_size);
 
  196    int_view_t int_row_mapC(Kokkos::ViewAllocateWithoutInitializing(
"non_const_int_row"), AnumRows + 1);
 
  198    auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
 
  199    auto Bint = irph.getIntRowptrMatrix(Bmerged);
 
  200    KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Aint.graph.row_map, Aint.graph.entries, 
false, Bint.graph.row_map, Bint.graph.entries, 
false, int_row_mapC);
 
  202    size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
 
  204      entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
 
  205      valuesC  = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
 
  207    KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Aint.graph.row_map, Aint.graph.entries, Aint.values, 
false, Bint.graph.row_map, Bint.graph.entries, Bint.values, 
false, int_row_mapC, entriesC, valuesC);
 
  209    Kokkos::parallel_for(
 
  210        int_row_mapC.size(), KOKKOS_LAMBDA(
int i) { row_mapC(i) = int_row_mapC(i); });
 
  211    kh.destroy_spgemm_handle();
 
  215    kh.create_spgemm_handle(alg_enum);
 
  216    kh.set_team_work_size(team_work_size);
 
  218    KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Amat.graph.row_map, Amat.graph.entries, 
false, Bmerged.graph.row_map, Bmerged.graph.entries, 
false, row_mapC);
 
  220    size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
 
  222      entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
 
  223      valuesC  = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
 
  226    KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Amat.graph.row_map, Amat.graph.entries, Amat.values, 
false, Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, 
false, row_mapC, entriesC, valuesC);
 
  227    kh.destroy_spgemm_handle();
 
  230#ifdef HAVE_TPETRA_MMM_TIMINGS 
  232  MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLSort"))));
 
  236  if (params.is_null() || params->get(
"sort entries", 
true))
 
  237    Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
 
  238  C.setAllValues(row_mapC, entriesC, valuesC);
 
  240#ifdef HAVE_TPETRA_MMM_TIMINGS 
  242  MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLESFC"))));
 
  246  RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
 
  247  labelList->set(
"Timer Label", label);
 
  248  if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants", 
true));
 
  249  RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
 
  250  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
 
  254template <
class Scalar,
 
  257          class LocalOrdinalViewType>
 
  258void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(
 
  259    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
 
  260    CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
 
  261    const LocalOrdinalViewType& targetMapToOrigRow_dev,
 
  262    const LocalOrdinalViewType& targetMapToImportRow_dev,
 
  263    const LocalOrdinalViewType& Bcol2Ccol_dev,
 
  264    const LocalOrdinalViewType& Icol2Ccol_dev,
 
  265    CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
 
  266    Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
 
  267    const std::string& label,
 
  268    const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
  270  typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
 
  272#ifdef HAVE_TPETRA_MMM_TIMINGS 
  273  std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
 
  274  using Teuchos::TimeMonitor;
 
  275  Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
 
  276  Teuchos::RCP<Teuchos::TimeMonitor> MM2;
 
  282  typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
 
  283  typedef typename KCRS::StaticCrsGraphType graph_t;
 
  284  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
 
  285  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
 
  286  typedef typename KCRS::values_type::non_const_type scalar_view_t;
 
  289  typedef LocalOrdinal LO;
 
  290  typedef GlobalOrdinal GO;
 
  292  typedef Map<LO, GO, NO> map_type;
 
  293  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
 
  294  const LO LO_INVALID     = Teuchos::OrdinalTraits<LO>::invalid();
 
  295  const SC SC_ZERO        = Teuchos::ScalarTraits<Scalar>::zero();
 
  303  auto targetMapToOrigRow =
 
  304      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
 
  305                                          targetMapToOrigRow_dev);
 
  306  auto targetMapToImportRow =
 
  307      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
 
  308                                          targetMapToImportRow_dev);
 
  310      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
 
  313      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
 
  317  RCP<const map_type> Ccolmap = C.getColMap();
 
  318  size_t m                    = Aview.origMatrix->getLocalNumRows();
 
  319  size_t n                    = Ccolmap->getLocalNumElements();
 
  322  const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
 
  323  const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
 
  324  const KCRS& Cmat = C.getLocalMatrixHost();
 
  326  c_lno_view_t Arowptr         = Amat.graph.row_map,
 
  327               Browptr         = Bmat.graph.row_map,
 
  328               Crowptr         = Cmat.graph.row_map;
 
  329  const lno_nnz_view_t Acolind = Amat.graph.entries,
 
  330                       Bcolind = Bmat.graph.entries,
 
  331                       Ccolind = Cmat.graph.entries;
 
  332  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
 
  333  scalar_view_t Cvals = Cmat.values;
 
  335  c_lno_view_t Irowptr;
 
  336  lno_nnz_view_t Icolind;
 
  338  if (!Bview.importMatrix.is_null()) {
 
  339    auto lclB = Bview.importMatrix->getLocalMatrixHost();
 
  340    Irowptr   = lclB.graph.row_map;
 
  341    Icolind   = lclB.graph.entries;
 
  345#ifdef HAVE_TPETRA_MMM_TIMINGS 
  347  MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
 
  359  std::vector<size_t> c_status(n, ST_INVALID);
 
  362  size_t CSR_ip = 0, OLD_ip = 0;
 
  363  for (
size_t i = 0; i < m; i++) {
 
  367    CSR_ip = Crowptr[i + 1];
 
  368    for (
size_t k = OLD_ip; k < CSR_ip; k++) {
 
  369      c_status[Ccolind[k]] = k;
 
  375    for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
 
  377      const SC Aval = Avals[k];
 
  381      if (targetMapToOrigRow[Aik] != LO_INVALID) {
 
  383        size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
 
  385        for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
 
  387          LO Cij = Bcol2Ccol[Bkj];
 
  389          TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
 
  390                                     std::runtime_error, 
"Trying to insert a new entry (" << i << 
"," << Cij << 
") into a static graph " 
  391                                                                                          << 
"(c_status = " << c_status[Cij] << 
" of [" << OLD_ip << 
"," << CSR_ip << 
"))");
 
  393          Cvals[c_status[Cij]] += Aval * Bvals[j];
 
  398        size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
 
  399        for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
 
  401          LO Cij = Icol2Ccol[Ikj];
 
  403          TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
 
  404                                     std::runtime_error, 
"Trying to insert a new entry (" << i << 
"," << Cij << 
") into a static graph " 
  405                                                                                          << 
"(c_status = " << c_status[Cij] << 
" of [" << OLD_ip << 
"," << CSR_ip << 
"))");
 
  407          Cvals[c_status[Cij]] += Aval * Ivals[j];
 
  413  C.fillComplete(C.getDomainMap(), C.getRangeMap());
 
  417template <
class Scalar,
 
  420          class LocalOrdinalViewType>
 
  421void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
 
  422                                                                                                                                                                  const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
 
  423                                                                                                                                                                  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
 
  424                                                                                                                                                                  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
 
  425                                                                                                                                                                  const LocalOrdinalViewType& Acol2Brow,
 
  426                                                                                                                                                                  const LocalOrdinalViewType& Acol2Irow,
 
  427                                                                                                                                                                  const LocalOrdinalViewType& Bcol2Ccol,
 
  428                                                                                                                                                                  const LocalOrdinalViewType& Icol2Ccol,
 
  429                                                                                                                                                                  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
 
  430                                                                                                                                                                  Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
 
  431                                                                                                                                                                  const std::string& label,
 
  432                                                                                                                                                                  const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
  433#ifdef HAVE_TPETRA_MMM_TIMINGS 
  434  std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
 
  435  using Teuchos::TimeMonitor;
 
  436  Teuchos::RCP<TimeMonitor> MM;
 
  444  std::string myalg(
"KK");
 
  445  if (!params.is_null()) {
 
  446    if (params->isParameter(
"sycl: jacobi algorithm"))
 
  447      myalg = params->get(
"sycl: jacobi algorithm", myalg);
 
  450  if (myalg == 
"MSAK") {
 
  451    ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
 
  452  } 
else if (myalg == 
"KK") {
 
  453    jacobi_A_B_newmatrix_KokkosKernels(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
 
  455    throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
 
  458#ifdef HAVE_TPETRA_MMM_TIMINGS 
  460  MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLESFC"))));
 
  464  RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
 
  465  labelList->set(
"Timer Label", label);
 
  466  if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants", 
true));
 
  469  if (!C.isFillComplete()) {
 
  470    RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
 
  471    C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
 
  476template <
class Scalar,
 
  479          class LocalOrdinalViewType>
 
  480void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
 
  481                                                                                                                                                              const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
 
  482                                                                                                                                                              CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
 
  483                                                                                                                                                              CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
 
  484                                                                                                                                                              const LocalOrdinalViewType& targetMapToOrigRow_dev,
 
  485                                                                                                                                                              const LocalOrdinalViewType& targetMapToImportRow_dev,
 
  486                                                                                                                                                              const LocalOrdinalViewType& Bcol2Ccol_dev,
 
  487                                                                                                                                                              const LocalOrdinalViewType& Icol2Ccol_dev,
 
  488                                                                                                                                                              CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
 
  489                                                                                                                                                              Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
 
  490                                                                                                                                                              const std::string& label,
 
  491                                                                                                                                                              const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
  493  typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
 
  495#ifdef HAVE_TPETRA_MMM_TIMINGS 
  496  std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
 
  497  using Teuchos::TimeMonitor;
 
  498  Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SYCLCore"))));
 
  499  Teuchos::RCP<Teuchos::TimeMonitor> MM2;
 
  505  typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
 
  506  typedef typename KCRS::StaticCrsGraphType graph_t;
 
  507  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
 
  508  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
 
  509  typedef typename KCRS::values_type::non_const_type scalar_view_t;
 
  510  typedef typename scalar_view_t::memory_space scalar_memory_space;
 
  513  typedef LocalOrdinal LO;
 
  514  typedef GlobalOrdinal GO;
 
  516  typedef Map<LO, GO, NO> map_type;
 
  517  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
 
  518  const LO LO_INVALID     = Teuchos::OrdinalTraits<LO>::invalid();
 
  519  const SC SC_ZERO        = Teuchos::ScalarTraits<Scalar>::zero();
 
  527  auto targetMapToOrigRow =
 
  528      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
 
  529                                          targetMapToOrigRow_dev);
 
  530  auto targetMapToImportRow =
 
  531      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
 
  532                                          targetMapToImportRow_dev);
 
  534      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
 
  537      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
 
  541  RCP<const map_type> Ccolmap = C.getColMap();
 
  542  size_t m                    = Aview.origMatrix->getLocalNumRows();
 
  543  size_t n                    = Ccolmap->getLocalNumElements();
 
  546  const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
 
  547  const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
 
  548  const KCRS& Cmat = C.getLocalMatrixHost();
 
  550  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
 
  551  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
 
  552  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
 
  553  scalar_view_t Cvals = Cmat.values;
 
  555  c_lno_view_t Irowptr;
 
  556  lno_nnz_view_t Icolind;
 
  558  if (!Bview.importMatrix.is_null()) {
 
  559    auto lclB = Bview.importMatrix->getLocalMatrixHost();
 
  560    Irowptr   = lclB.graph.row_map;
 
  561    Icolind   = lclB.graph.entries;
 
  567      Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
 
  569#ifdef HAVE_TPETRA_MMM_TIMINGS 
  571  MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SYCLCore - Compare"))));
 
  579  std::vector<size_t> c_status(n, ST_INVALID);
 
  582  size_t CSR_ip = 0, OLD_ip = 0;
 
  583  for (
size_t i = 0; i < m; i++) {
 
  587    CSR_ip = Crowptr[i + 1];
 
  588    for (
size_t k = OLD_ip; k < CSR_ip; k++) {
 
  589      c_status[Ccolind[k]] = k;
 
  595    SC minusOmegaDval = -omega * Dvals(i, 0);
 
  598    for (
size_t j = Browptr[i]; j < Browptr[i + 1]; j++) {
 
  599      Scalar Bval = Bvals[j];
 
  603      LO Cij = Bcol2Ccol[Bij];
 
  605      TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
 
  606                                 std::runtime_error, 
"Trying to insert a new entry into a static graph");
 
  608      Cvals[c_status[Cij]] = Bvals[j];
 
  612    for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
 
  614      const SC Aval = Avals[k];
 
  618      if (targetMapToOrigRow[Aik] != LO_INVALID) {
 
  620        size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
 
  622        for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
 
  624          LO Cij = Bcol2Ccol[Bkj];
 
  626          TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
 
  627                                     std::runtime_error, 
"Trying to insert a new entry into a static graph");
 
  629          Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
 
  634        size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
 
  635        for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
 
  637          LO Cij = Icol2Ccol[Ikj];
 
  639          TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
 
  640                                     std::runtime_error, 
"Trying to insert a new entry into a static graph");
 
  642          Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
 
  648#ifdef HAVE_TPETRA_MMM_TIMINGS 
  651  MM  = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
 
  654  C.fillComplete(C.getDomainMap(), C.getRangeMap());
 
  658template <
class Scalar,
 
  661          class LocalOrdinalViewType>
 
  662void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
 
  663                                                                                                                                                                 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
 
  664                                                                                                                                                                 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
 
  665                                                                                                                                                                 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
 
  666                                                                                                                                                                 const LocalOrdinalViewType& Acol2Brow,
 
  667                                                                                                                                                                 const LocalOrdinalViewType& Acol2Irow,
 
  668                                                                                                                                                                 const LocalOrdinalViewType& Bcol2Ccol,
 
  669                                                                                                                                                                 const LocalOrdinalViewType& Icol2Ccol,
 
  670                                                                                                                                                                 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
 
  671                                                                                                                                                                 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
 
  672                                                                                                                                                                 const std::string& label,
 
  673                                                                                                                                                                 const Teuchos::RCP<Teuchos::ParameterList>& params) {
 
  674#ifdef HAVE_TPETRA_MMM_TIMINGS 
  675  std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
 
  676  using Teuchos::TimeMonitor;
 
  677  Teuchos::RCP<TimeMonitor> MM;
 
  683    auto rowMap = Aview.origMatrix->getRowMap();
 
  685    Aview.origMatrix->getLocalDiagCopy(diags);
 
  686    size_t diagLength = rowMap->getLocalNumElements();
 
  687    Teuchos::Array<Scalar> diagonal(diagLength);
 
  688    diags.get1dCopy(diagonal());
 
  690    for (
size_t i = 0; i < diagLength; ++i) {
 
  691      TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
 
  693                                 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl
 
  694                                                                          << 
"KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
 
  699  using device_t       = 
typename Tpetra::KokkosCompat::KokkosSYCLWrapperNode::device_type;
 
  701  using graph_t        = 
typename matrix_t::StaticCrsGraphType;
 
  702  using lno_view_t     = 
typename graph_t::row_map_type::non_const_type;
 
  703  using int_view_t     = Kokkos::View<
int*,
 
  704                                  typename lno_view_t::array_layout,
 
  705                                  typename lno_view_t::memory_space,
 
  706                                  typename lno_view_t::memory_traits>;
 
  707  using c_lno_view_t   = 
typename graph_t::row_map_type::const_type;
 
  708  using lno_nnz_view_t = 
typename graph_t::entries_type::non_const_type;
 
  709  using scalar_view_t  = 
typename matrix_t::values_type::non_const_type;
 
  712  using handle_t = 
typename KokkosKernels::Experimental::KokkosKernelsHandle<
 
  713      typename lno_view_t::const_value_type, 
typename lno_nnz_view_t::const_value_type, 
typename scalar_view_t::const_value_type,
 
  714      typename device_t::execution_space, 
typename device_t::memory_space, 
typename device_t::memory_space>;
 
  715  using int_handle_t = 
typename KokkosKernels::Experimental::KokkosKernelsHandle<
 
  716      typename int_view_t::const_value_type, 
typename lno_nnz_view_t::const_value_type, 
typename scalar_view_t::const_value_type,
 
  717      typename device_t::execution_space, 
typename device_t::memory_space, 
typename device_t::memory_space>;
 
  720  const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
 
  723  const matrix_t& Amat = Aview.origMatrix->getLocalMatrixDevice();
 
  724  const matrix_t& Bmat = Bview.origMatrix->getLocalMatrixDevice();
 
  726  typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
 
  727  typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
 
  728  typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
 
  731  lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing(
"row_mapC"), AnumRows + 1);
 
  732  lno_nnz_view_t entriesC;
 
  733  scalar_view_t valuesC;
 
  736  int team_work_size = 16;
 
  737  std::string myalg(
"SPGEMM_KK_MEMORY");
 
  738  if (!params.is_null()) {
 
  739    if (params->isParameter(
"sycl: algorithm"))
 
  740      myalg = params->get(
"sycl: algorithm", myalg);
 
  741    if (params->isParameter(
"sycl: team work size"))
 
  742      team_work_size = params->get(
"sycl: team work size", team_work_size);
 
  746  std::string nodename(
"SYCL");
 
  747  std::string alg = nodename + std::string(
" algorithm");
 
  748  if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
 
  749  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
 
  753  const bool useIntRowptrs =
 
  754      irph.shouldUseIntRowptrs() &&
 
  755      Aview.
origMatrix->getApplyHelper()->shouldUseIntRowptrs();
 
  759    kh.create_spgemm_handle(alg_enum);
 
  760    kh.set_team_work_size(team_work_size);
 
  762    int_view_t int_row_mapC(Kokkos::ViewAllocateWithoutInitializing(
"int_row_mapC"), AnumRows + 1);
 
  764    auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
 
  765    auto Bint = irph.getIntRowptrMatrix(Bmerged);
 
  767    KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
 
  768                                  Aint.graph.row_map, Aint.graph.entries, 
false,
 
  769                                  Bint.graph.row_map, Bint.graph.entries, 
false,
 
  772    size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
 
  774      entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
 
  775      valuesC  = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
 
  780    KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
 
  781                                              Aint.graph.row_map, Aint.graph.entries, Amat.values, 
false,
 
  782                                              Bint.graph.row_map, Bint.graph.entries, Bint.values, 
false,
 
  783                                              int_row_mapC, entriesC, valuesC,
 
  784                                              omega, Dinv.getLocalViewDevice(Access::ReadOnly));
 
  786    Kokkos::parallel_for(
 
  787        int_row_mapC.size(), KOKKOS_LAMBDA(
int i) { row_mapC(i) = int_row_mapC(i); });
 
  788    kh.destroy_spgemm_handle();
 
  791    kh.create_spgemm_handle(alg_enum);
 
  792    kh.set_team_work_size(team_work_size);
 
  794    KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
 
  795                                  Amat.graph.row_map, Amat.graph.entries, 
false,
 
  796                                  Bmerged.graph.row_map, Bmerged.graph.entries, 
false,
 
  799    size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
 
  801      entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
 
  802      valuesC  = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
 
  804    KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
 
  805                                              Amat.graph.row_map, Amat.graph.entries, Amat.values, 
false,
 
  806                                              Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, 
false,
 
  807                                              row_mapC, entriesC, valuesC,
 
  808                                              omega, Dinv.getLocalViewDevice(Access::ReadOnly));
 
  809    kh.destroy_spgemm_handle();
 
  812#ifdef HAVE_TPETRA_MMM_TIMINGS 
  814  MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLSort"))));
 
  818  if (params.is_null() || params->get(
"sort entries", 
true))
 
  819    Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
 
  820  C.setAllValues(row_mapC, entriesC, valuesC);
 
  822#ifdef HAVE_TPETRA_MMM_TIMINGS 
  824  MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLESFC"))));
 
  828  Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
 
  829  labelList->set(
"Timer Label", label);
 
  830  if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants", 
true));
 
  831  Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
 
  832  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
 
Struct that holds views of the contents of a CrsMatrix.
 
Teuchos::RCP< const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > origMatrix
The original matrix.
 
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
 
static bool debug()
Whether Tpetra is in debug mode.
 
Namespace Tpetra contains the class and methods constituting the Tpetra library.