10#ifndef TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
11#define TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
13#include "Tpetra_Details_IntRowPtrHelper.hpp"
15#ifdef HAVE_TPETRA_INST_SYCL
21template <
class Scalar,
24 class LocalOrdinalViewType>
25struct KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal,
Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType> {
26 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
27 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
28 const LocalOrdinalViewType& Acol2Brow,
29 const LocalOrdinalViewType& Acol2Irow,
30 const LocalOrdinalViewType& Bcol2Ccol,
31 const LocalOrdinalViewType& Icol2Ccol,
32 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
33 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
34 const std::string& label = std::string(),
35 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
37 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
38 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
39 const LocalOrdinalViewType& Acol2Brow,
40 const LocalOrdinalViewType& Acol2Irow,
41 const LocalOrdinalViewType& Bcol2Ccol,
42 const LocalOrdinalViewType& Icol2Ccol,
43 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
44 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
45 const std::string& label = std::string(),
46 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
50template <
class Scalar,
52 class GlobalOrdinal,
class LocalOrdinalViewType>
53struct KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal,
Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType> {
54 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
55 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
56 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
57 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
58 const LocalOrdinalViewType& Acol2Brow,
59 const LocalOrdinalViewType& Acol2Irow,
60 const LocalOrdinalViewType& Bcol2Ccol,
61 const LocalOrdinalViewType& Icol2Ccol,
62 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
63 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
64 const std::string& label = std::string(),
65 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
67 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
68 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
69 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
70 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
71 const LocalOrdinalViewType& Acol2Brow,
72 const LocalOrdinalViewType& Acol2Irow,
73 const LocalOrdinalViewType& Bcol2Ccol,
74 const LocalOrdinalViewType& Icol2Ccol,
75 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
76 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
77 const std::string& label = std::string(),
78 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
80 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
81 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
82 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
83 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
84 const LocalOrdinalViewType& Acol2Brow,
85 const LocalOrdinalViewType& Acol2Irow,
86 const LocalOrdinalViewType& Bcol2Ccol,
87 const LocalOrdinalViewType& Icol2Ccol,
88 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
89 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
90 const std::string& label = std::string(),
91 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
96template <
class Scalar,
99 class LocalOrdinalViewType>
100void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
101 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
102 const LocalOrdinalViewType& Acol2Brow,
103 const LocalOrdinalViewType& Acol2Irow,
104 const LocalOrdinalViewType& Bcol2Ccol,
105 const LocalOrdinalViewType& Icol2Ccol,
106 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
107 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
108 const std::string& label,
109 const Teuchos::RCP<Teuchos::ParameterList>& params) {
110#ifdef HAVE_TPETRA_MMM_TIMINGS
111 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
112 using Teuchos::TimeMonitor;
113 Teuchos::RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLWrapper")))));
116 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
117 std::string nodename(
"SYCL");
122 typedef typename KCRS::device_type device_t;
123 typedef typename KCRS::StaticCrsGraphType graph_t;
124 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
125 using int_view_t = Kokkos::View<int*, typename lno_view_t::array_layout, typename lno_view_t::memory_space, typename lno_view_t::memory_traits>;
126 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
127 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
128 typedef typename KCRS::values_type::non_const_type scalar_view_t;
132 int team_work_size = 16;
133 std::string myalg(
"SPGEMM_KK_MEMORY");
134 if (!params.is_null()) {
135 if (params->isParameter(
"sycl: algorithm"))
136 myalg = params->get(
"sycl: algorithm", myalg);
137 if (params->isParameter(
"sycl: team work size"))
138 team_work_size = params->get(
"sycl: team work size", team_work_size);
142 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
143 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
144 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space>
146 using IntKernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<
147 typename int_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
148 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space>;
151 const KCRS& Amat = Aview.origMatrix->getLocalMatrixDevice();
152 const KCRS& Bmat = Bview.origMatrix->getLocalMatrixDevice();
154 c_lno_view_t Arowptr = Amat.graph.row_map,
155 Browptr = Bmat.graph.row_map;
156 const lno_nnz_view_t Acolind = Amat.graph.entries,
157 Bcolind = Bmat.graph.entries;
158 const scalar_view_t Avals = Amat.values,
162 std::string alg = nodename + std::string(
" algorithm");
164 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
165 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
168 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
170#ifdef HAVE_TPETRA_MMM_TIMINGS
172 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLCore"))));
176 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
177 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
178 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
180 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing(
"non_const_lno_row"), AnumRows + 1);
181 lno_nnz_view_t entriesC;
182 scalar_view_t valuesC;
187 const bool useIntRowptrs =
188 irph.shouldUseIntRowptrs() &&
189 Aview.
origMatrix->getApplyHelper()->shouldUseIntRowptrs();
193 kh.create_spgemm_handle(alg_enum);
194 kh.set_team_work_size(team_work_size);
196 int_view_t int_row_mapC(Kokkos::ViewAllocateWithoutInitializing(
"non_const_int_row"), AnumRows + 1);
198 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
199 auto Bint = irph.getIntRowptrMatrix(Bmerged);
200 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Aint.graph.row_map, Aint.graph.entries,
false, Bint.graph.row_map, Bint.graph.entries,
false, int_row_mapC);
202 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
204 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
205 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
207 KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Aint.graph.row_map, Aint.graph.entries, Aint.values,
false, Bint.graph.row_map, Bint.graph.entries, Bint.values,
false, int_row_mapC, entriesC, valuesC);
209 Kokkos::parallel_for(
210 int_row_mapC.size(), KOKKOS_LAMBDA(
int i) { row_mapC(i) = int_row_mapC(i); });
211 kh.destroy_spgemm_handle();
215 kh.create_spgemm_handle(alg_enum);
216 kh.set_team_work_size(team_work_size);
218 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Amat.graph.row_map, Amat.graph.entries,
false, Bmerged.graph.row_map, Bmerged.graph.entries,
false, row_mapC);
220 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
222 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
223 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
226 KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Amat.graph.row_map, Amat.graph.entries, Amat.values,
false, Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values,
false, row_mapC, entriesC, valuesC);
227 kh.destroy_spgemm_handle();
230#ifdef HAVE_TPETRA_MMM_TIMINGS
232 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLSort"))));
236 if (params.is_null() || params->get(
"sort entries",
true))
237 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
238 C.setAllValues(row_mapC, entriesC, valuesC);
240#ifdef HAVE_TPETRA_MMM_TIMINGS
242 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLESFC"))));
246 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
247 labelList->set(
"Timer Label", label);
248 if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants",
true));
249 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
250 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
254template <
class Scalar,
257 class LocalOrdinalViewType>
258void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(
259 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
260 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
261 const LocalOrdinalViewType& targetMapToOrigRow_dev,
262 const LocalOrdinalViewType& targetMapToImportRow_dev,
263 const LocalOrdinalViewType& Bcol2Ccol_dev,
264 const LocalOrdinalViewType& Icol2Ccol_dev,
265 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
266 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
267 const std::string& label,
268 const Teuchos::RCP<Teuchos::ParameterList>& params) {
270 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
272#ifdef HAVE_TPETRA_MMM_TIMINGS
273 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
274 using Teuchos::TimeMonitor;
275 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
276 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
282 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
283 typedef typename KCRS::StaticCrsGraphType graph_t;
284 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
285 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
286 typedef typename KCRS::values_type::non_const_type scalar_view_t;
289 typedef LocalOrdinal LO;
290 typedef GlobalOrdinal GO;
292 typedef Map<LO, GO, NO> map_type;
293 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
294 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
295 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
303 auto targetMapToOrigRow =
304 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
305 targetMapToOrigRow_dev);
306 auto targetMapToImportRow =
307 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
308 targetMapToImportRow_dev);
310 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
313 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
317 RCP<const map_type> Ccolmap = C.getColMap();
318 size_t m = Aview.origMatrix->getLocalNumRows();
319 size_t n = Ccolmap->getLocalNumElements();
322 const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
323 const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
324 const KCRS& Cmat = C.getLocalMatrixHost();
326 c_lno_view_t Arowptr = Amat.graph.row_map,
327 Browptr = Bmat.graph.row_map,
328 Crowptr = Cmat.graph.row_map;
329 const lno_nnz_view_t Acolind = Amat.graph.entries,
330 Bcolind = Bmat.graph.entries,
331 Ccolind = Cmat.graph.entries;
332 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
333 scalar_view_t Cvals = Cmat.values;
335 c_lno_view_t Irowptr;
336 lno_nnz_view_t Icolind;
338 if (!Bview.importMatrix.is_null()) {
339 auto lclB = Bview.importMatrix->getLocalMatrixHost();
340 Irowptr = lclB.graph.row_map;
341 Icolind = lclB.graph.entries;
345#ifdef HAVE_TPETRA_MMM_TIMINGS
347 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
359 std::vector<size_t> c_status(n, ST_INVALID);
362 size_t CSR_ip = 0, OLD_ip = 0;
363 for (
size_t i = 0; i < m; i++) {
367 CSR_ip = Crowptr[i + 1];
368 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
369 c_status[Ccolind[k]] = k;
375 for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
377 const SC Aval = Avals[k];
381 if (targetMapToOrigRow[Aik] != LO_INVALID) {
383 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
385 for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
387 LO Cij = Bcol2Ccol[Bkj];
389 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
390 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph "
391 <<
"(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
393 Cvals[c_status[Cij]] += Aval * Bvals[j];
398 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
399 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
401 LO Cij = Icol2Ccol[Ikj];
403 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
404 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph "
405 <<
"(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
407 Cvals[c_status[Cij]] += Aval * Ivals[j];
413 C.fillComplete(C.getDomainMap(), C.getRangeMap());
417template <
class Scalar,
420 class LocalOrdinalViewType>
421void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
422 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
423 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
424 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
425 const LocalOrdinalViewType& Acol2Brow,
426 const LocalOrdinalViewType& Acol2Irow,
427 const LocalOrdinalViewType& Bcol2Ccol,
428 const LocalOrdinalViewType& Icol2Ccol,
429 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
430 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
431 const std::string& label,
432 const Teuchos::RCP<Teuchos::ParameterList>& params) {
433#ifdef HAVE_TPETRA_MMM_TIMINGS
434 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
435 using Teuchos::TimeMonitor;
436 Teuchos::RCP<TimeMonitor> MM;
444 std::string myalg(
"KK");
445 if (!params.is_null()) {
446 if (params->isParameter(
"sycl: jacobi algorithm"))
447 myalg = params->get(
"sycl: jacobi algorithm", myalg);
450 if (myalg ==
"MSAK") {
451 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
452 }
else if (myalg ==
"KK") {
453 jacobi_A_B_newmatrix_KokkosKernels(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
455 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
458#ifdef HAVE_TPETRA_MMM_TIMINGS
460 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLESFC"))));
464 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
465 labelList->set(
"Timer Label", label);
466 if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants",
true));
469 if (!C.isFillComplete()) {
470 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
471 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
476template <
class Scalar,
479 class LocalOrdinalViewType>
480void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
481 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
482 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
483 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
484 const LocalOrdinalViewType& targetMapToOrigRow_dev,
485 const LocalOrdinalViewType& targetMapToImportRow_dev,
486 const LocalOrdinalViewType& Bcol2Ccol_dev,
487 const LocalOrdinalViewType& Icol2Ccol_dev,
488 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
489 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
490 const std::string& label,
491 const Teuchos::RCP<Teuchos::ParameterList>& params) {
493 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
495#ifdef HAVE_TPETRA_MMM_TIMINGS
496 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
497 using Teuchos::TimeMonitor;
498 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SYCLCore"))));
499 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
505 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
506 typedef typename KCRS::StaticCrsGraphType graph_t;
507 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
508 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
509 typedef typename KCRS::values_type::non_const_type scalar_view_t;
510 typedef typename scalar_view_t::memory_space scalar_memory_space;
513 typedef LocalOrdinal LO;
514 typedef GlobalOrdinal GO;
516 typedef Map<LO, GO, NO> map_type;
517 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
518 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
519 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
527 auto targetMapToOrigRow =
528 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
529 targetMapToOrigRow_dev);
530 auto targetMapToImportRow =
531 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
532 targetMapToImportRow_dev);
534 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
537 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
541 RCP<const map_type> Ccolmap = C.getColMap();
542 size_t m = Aview.origMatrix->getLocalNumRows();
543 size_t n = Ccolmap->getLocalNumElements();
546 const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
547 const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
548 const KCRS& Cmat = C.getLocalMatrixHost();
550 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
551 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
552 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
553 scalar_view_t Cvals = Cmat.values;
555 c_lno_view_t Irowptr;
556 lno_nnz_view_t Icolind;
558 if (!Bview.importMatrix.is_null()) {
559 auto lclB = Bview.importMatrix->getLocalMatrixHost();
560 Irowptr = lclB.graph.row_map;
561 Icolind = lclB.graph.entries;
567 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
569#ifdef HAVE_TPETRA_MMM_TIMINGS
571 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SYCLCore - Compare"))));
579 std::vector<size_t> c_status(n, ST_INVALID);
582 size_t CSR_ip = 0, OLD_ip = 0;
583 for (
size_t i = 0; i < m; i++) {
587 CSR_ip = Crowptr[i + 1];
588 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
589 c_status[Ccolind[k]] = k;
595 SC minusOmegaDval = -omega * Dvals(i, 0);
598 for (
size_t j = Browptr[i]; j < Browptr[i + 1]; j++) {
599 Scalar Bval = Bvals[j];
603 LO Cij = Bcol2Ccol[Bij];
605 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
606 std::runtime_error,
"Trying to insert a new entry into a static graph");
608 Cvals[c_status[Cij]] = Bvals[j];
612 for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
614 const SC Aval = Avals[k];
618 if (targetMapToOrigRow[Aik] != LO_INVALID) {
620 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
622 for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
624 LO Cij = Bcol2Ccol[Bkj];
626 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
627 std::runtime_error,
"Trying to insert a new entry into a static graph");
629 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
634 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
635 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
637 LO Cij = Icol2Ccol[Ikj];
639 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
640 std::runtime_error,
"Trying to insert a new entry into a static graph");
642 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
648#ifdef HAVE_TPETRA_MMM_TIMINGS
651 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
654 C.fillComplete(C.getDomainMap(), C.getRangeMap());
658template <
class Scalar,
661 class LocalOrdinalViewType>
662void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
663 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
664 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
665 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
666 const LocalOrdinalViewType& Acol2Brow,
667 const LocalOrdinalViewType& Acol2Irow,
668 const LocalOrdinalViewType& Bcol2Ccol,
669 const LocalOrdinalViewType& Icol2Ccol,
670 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
671 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
672 const std::string& label,
673 const Teuchos::RCP<Teuchos::ParameterList>& params) {
674#ifdef HAVE_TPETRA_MMM_TIMINGS
675 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
676 using Teuchos::TimeMonitor;
677 Teuchos::RCP<TimeMonitor> MM;
683 auto rowMap = Aview.origMatrix->getRowMap();
685 Aview.origMatrix->getLocalDiagCopy(diags);
686 size_t diagLength = rowMap->getLocalNumElements();
687 Teuchos::Array<Scalar> diagonal(diagLength);
688 diags.get1dCopy(diagonal());
690 for (
size_t i = 0; i < diagLength; ++i) {
691 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
693 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl
694 <<
"KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
699 using device_t =
typename Tpetra::KokkosCompat::KokkosSYCLWrapperNode::device_type;
701 using graph_t =
typename matrix_t::StaticCrsGraphType;
702 using lno_view_t =
typename graph_t::row_map_type::non_const_type;
703 using int_view_t = Kokkos::View<
int*,
704 typename lno_view_t::array_layout,
705 typename lno_view_t::memory_space,
706 typename lno_view_t::memory_traits>;
707 using c_lno_view_t =
typename graph_t::row_map_type::const_type;
708 using lno_nnz_view_t =
typename graph_t::entries_type::non_const_type;
709 using scalar_view_t =
typename matrix_t::values_type::non_const_type;
712 using handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
713 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
714 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space>;
715 using int_handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
716 typename int_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
717 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space>;
720 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
723 const matrix_t& Amat = Aview.origMatrix->getLocalMatrixDevice();
724 const matrix_t& Bmat = Bview.origMatrix->getLocalMatrixDevice();
726 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
727 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
728 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
731 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing(
"row_mapC"), AnumRows + 1);
732 lno_nnz_view_t entriesC;
733 scalar_view_t valuesC;
736 int team_work_size = 16;
737 std::string myalg(
"SPGEMM_KK_MEMORY");
738 if (!params.is_null()) {
739 if (params->isParameter(
"sycl: algorithm"))
740 myalg = params->get(
"sycl: algorithm", myalg);
741 if (params->isParameter(
"sycl: team work size"))
742 team_work_size = params->get(
"sycl: team work size", team_work_size);
746 std::string nodename(
"SYCL");
747 std::string alg = nodename + std::string(
" algorithm");
748 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
749 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
753 const bool useIntRowptrs =
754 irph.shouldUseIntRowptrs() &&
755 Aview.
origMatrix->getApplyHelper()->shouldUseIntRowptrs();
759 kh.create_spgemm_handle(alg_enum);
760 kh.set_team_work_size(team_work_size);
762 int_view_t int_row_mapC(Kokkos::ViewAllocateWithoutInitializing(
"int_row_mapC"), AnumRows + 1);
764 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
765 auto Bint = irph.getIntRowptrMatrix(Bmerged);
767 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
768 Aint.graph.row_map, Aint.graph.entries,
false,
769 Bint.graph.row_map, Bint.graph.entries,
false,
772 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
774 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
775 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
780 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
781 Aint.graph.row_map, Aint.graph.entries, Amat.values,
false,
782 Bint.graph.row_map, Bint.graph.entries, Bint.values,
false,
783 int_row_mapC, entriesC, valuesC,
784 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
786 Kokkos::parallel_for(
787 int_row_mapC.size(), KOKKOS_LAMBDA(
int i) { row_mapC(i) = int_row_mapC(i); });
788 kh.destroy_spgemm_handle();
791 kh.create_spgemm_handle(alg_enum);
792 kh.set_team_work_size(team_work_size);
794 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
795 Amat.graph.row_map, Amat.graph.entries,
false,
796 Bmerged.graph.row_map, Bmerged.graph.entries,
false,
799 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
801 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
802 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
804 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
805 Amat.graph.row_map, Amat.graph.entries, Amat.values,
false,
806 Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values,
false,
807 row_mapC, entriesC, valuesC,
808 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
809 kh.destroy_spgemm_handle();
812#ifdef HAVE_TPETRA_MMM_TIMINGS
814 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLSort"))));
818 if (params.is_null() || params->get(
"sort entries",
true))
819 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
820 C.setAllValues(row_mapC, entriesC, valuesC);
822#ifdef HAVE_TPETRA_MMM_TIMINGS
824 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLESFC"))));
828 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
829 labelList->set(
"Timer Label", label);
830 if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants",
true));
831 Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
832 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
Struct that holds views of the contents of a CrsMatrix.
Teuchos::RCP< const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > origMatrix
The original matrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.