Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
TpetraExt_MatrixMatrix_SYCL.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
11#define TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
12
13#include "Tpetra_Details_IntRowPtrHelper.hpp"
14
15#ifdef HAVE_TPETRA_INST_SYCL
16namespace Tpetra {
17namespace MMdetails {
18
19/*********************************************************************************************************/
20// MMM KernelWrappers for Partial Specialization to SYCL
21template <class Scalar,
22 class LocalOrdinal,
23 class GlobalOrdinal,
24 class LocalOrdinalViewType>
25struct KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType> {
26 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
27 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
28 const LocalOrdinalViewType& Acol2Brow,
29 const LocalOrdinalViewType& Acol2Irow,
30 const LocalOrdinalViewType& Bcol2Ccol,
31 const LocalOrdinalViewType& Icol2Ccol,
32 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
33 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
34 const std::string& label = std::string(),
35 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
36
37 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
38 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
39 const LocalOrdinalViewType& Acol2Brow,
40 const LocalOrdinalViewType& Acol2Irow,
41 const LocalOrdinalViewType& Bcol2Ccol,
42 const LocalOrdinalViewType& Icol2Ccol,
43 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
44 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
45 const std::string& label = std::string(),
46 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
47};
48
49// Jacobi KernelWrappers for Partial Specialization to SYCL
50template <class Scalar,
51 class LocalOrdinal,
52 class GlobalOrdinal, class LocalOrdinalViewType>
53struct KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType> {
54 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
55 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
56 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
57 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
58 const LocalOrdinalViewType& Acol2Brow,
59 const LocalOrdinalViewType& Acol2Irow,
60 const LocalOrdinalViewType& Bcol2Ccol,
61 const LocalOrdinalViewType& Icol2Ccol,
62 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
63 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
64 const std::string& label = std::string(),
65 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
66
67 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
68 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
69 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
70 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
71 const LocalOrdinalViewType& Acol2Brow,
72 const LocalOrdinalViewType& Acol2Irow,
73 const LocalOrdinalViewType& Bcol2Ccol,
74 const LocalOrdinalViewType& Icol2Ccol,
75 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
76 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
77 const std::string& label = std::string(),
78 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
79
80 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
81 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
82 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
83 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
84 const LocalOrdinalViewType& Acol2Brow,
85 const LocalOrdinalViewType& Acol2Irow,
86 const LocalOrdinalViewType& Bcol2Ccol,
87 const LocalOrdinalViewType& Icol2Ccol,
88 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
89 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
90 const std::string& label = std::string(),
91 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
92};
93
94/*********************************************************************************************************/
95// AB NewMatrix Kernel wrappers (KokkosKernels/SYCL Version)
96template <class Scalar,
97 class LocalOrdinal,
98 class GlobalOrdinal,
99 class LocalOrdinalViewType>
100void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
101 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
102 const LocalOrdinalViewType& Acol2Brow,
103 const LocalOrdinalViewType& Acol2Irow,
104 const LocalOrdinalViewType& Bcol2Ccol,
105 const LocalOrdinalViewType& Icol2Ccol,
106 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
107 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
108 const std::string& label,
109 const Teuchos::RCP<Teuchos::ParameterList>& params) {
110#ifdef HAVE_TPETRA_MMM_TIMINGS
111 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
112 using Teuchos::TimeMonitor;
113 Teuchos::RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLWrapper")))));
114#endif
115 // Node-specific code
116 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
117 std::string nodename("SYCL");
118
119 // Lots and lots of typedefs
120 using Teuchos::RCP;
122 typedef typename KCRS::device_type device_t;
123 typedef typename KCRS::StaticCrsGraphType graph_t;
124 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
125 using int_view_t = Kokkos::View<int*, typename lno_view_t::array_layout, typename lno_view_t::memory_space, typename lno_view_t::memory_traits>;
126 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
127 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
128 typedef typename KCRS::values_type::non_const_type scalar_view_t;
129 // typedef typename graph_t::row_map_type::const_type lno_view_t_const;
130
131 // Options
132 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
133 std::string myalg("SPGEMM_KK_MEMORY");
134 if (!params.is_null()) {
135 if (params->isParameter("sycl: algorithm"))
136 myalg = params->get("sycl: algorithm", myalg);
137 if (params->isParameter("sycl: team work size"))
138 team_work_size = params->get("sycl: team work size", team_work_size);
139 }
140
141 // KokkosKernelsHandle
142 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
143 typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
144 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>
145 KernelHandle;
146 using IntKernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<
147 typename int_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
148 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
149
150 // Grab the Kokkos::SparseCrsMatrices
151 const KCRS& Amat = Aview.origMatrix->getLocalMatrixDevice();
152 const KCRS& Bmat = Bview.origMatrix->getLocalMatrixDevice();
153
154 c_lno_view_t Arowptr = Amat.graph.row_map,
155 Browptr = Bmat.graph.row_map;
156 const lno_nnz_view_t Acolind = Amat.graph.entries,
157 Bcolind = Bmat.graph.entries;
158 const scalar_view_t Avals = Amat.values,
159 Bvals = Bmat.values;
160
161 // Get the algorithm mode
162 std::string alg = nodename + std::string(" algorithm");
163 // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
164 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
165 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
166
167 // Merge the B and Bimport matrices
168 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
169
170#ifdef HAVE_TPETRA_MMM_TIMINGS
171 MM = Teuchos::null;
172 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLCore"))));
173#endif
174
175 // Do the multiply on whatever we've got
176 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
177 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
178 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
179
180 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_lno_row"), AnumRows + 1);
181 lno_nnz_view_t entriesC;
182 scalar_view_t valuesC;
183
184 // static_assert(std::is_void_v<typename KCRS::row_map_type::non_const_value_type>, "");
185 // static_assert(std::is_void_v<typename decltype(Bmerged)::row_map_type::non_const_value_type>, "");
186 Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
187 const bool useIntRowptrs =
188 irph.shouldUseIntRowptrs() &&
189 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
190
191 if (useIntRowptrs) {
192 IntKernelHandle kh;
193 kh.create_spgemm_handle(alg_enum);
194 kh.set_team_work_size(team_work_size);
195
196 int_view_t int_row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_int_row"), AnumRows + 1);
197
198 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
199 auto Bint = irph.getIntRowptrMatrix(Bmerged);
200 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Aint.graph.row_map, Aint.graph.entries, false, Bint.graph.row_map, Bint.graph.entries, false, int_row_mapC);
201
202 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
203 if (c_nnz_size) {
204 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
205 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
206 }
207 KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Aint.graph.row_map, Aint.graph.entries, Aint.values, false, Bint.graph.row_map, Bint.graph.entries, Bint.values, false, int_row_mapC, entriesC, valuesC);
208 // transfer the integer rowptrs back to the correct rowptr type
209 Kokkos::parallel_for(
210 int_row_mapC.size(), KOKKOS_LAMBDA(int i) { row_mapC(i) = int_row_mapC(i); });
211 kh.destroy_spgemm_handle();
212
213 } else {
214 KernelHandle kh;
215 kh.create_spgemm_handle(alg_enum);
216 kh.set_team_work_size(team_work_size);
217
218 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Amat.graph.row_map, Amat.graph.entries, false, Bmerged.graph.row_map, Bmerged.graph.entries, false, row_mapC);
219
220 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
221 if (c_nnz_size) {
222 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
223 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
224 }
225
226 KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Amat.graph.row_map, Amat.graph.entries, Amat.values, false, Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false, row_mapC, entriesC, valuesC);
227 kh.destroy_spgemm_handle();
228 }
229
230#ifdef HAVE_TPETRA_MMM_TIMINGS
231 MM = Teuchos::null;
232 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLSort"))));
233#endif
234
235 // Sort & set values
236 if (params.is_null() || params->get("sort entries", true))
237 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
238 C.setAllValues(row_mapC, entriesC, valuesC);
239
240#ifdef HAVE_TPETRA_MMM_TIMINGS
241 MM = Teuchos::null;
242 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLESFC"))));
243#endif
244
245 // Final Fillcomplete
246 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
247 labelList->set("Timer Label", label);
248 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
249 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
250 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
251}
252
253/*********************************************************************************************************/
254template <class Scalar,
255 class LocalOrdinal,
256 class GlobalOrdinal,
257 class LocalOrdinalViewType>
258void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(
259 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
260 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
261 const LocalOrdinalViewType& targetMapToOrigRow_dev,
262 const LocalOrdinalViewType& targetMapToImportRow_dev,
263 const LocalOrdinalViewType& Bcol2Ccol_dev,
264 const LocalOrdinalViewType& Icol2Ccol_dev,
265 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
266 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
267 const std::string& label,
268 const Teuchos::RCP<Teuchos::ParameterList>& params) {
269 // FIXME: Right now, this is a cut-and-paste of the serial kernel
270 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
271
272#ifdef HAVE_TPETRA_MMM_TIMINGS
273 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
274 using Teuchos::TimeMonitor;
275 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse SerialCore"))));
276 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
277#endif
278 using Teuchos::RCP;
279 using Teuchos::rcp;
280
281 // Lots and lots of typedefs
282 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
283 typedef typename KCRS::StaticCrsGraphType graph_t;
284 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
285 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
286 typedef typename KCRS::values_type::non_const_type scalar_view_t;
287
288 typedef Scalar SC;
289 typedef LocalOrdinal LO;
290 typedef GlobalOrdinal GO;
291 typedef Node NO;
292 typedef Map<LO, GO, NO> map_type;
293 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
294 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
295 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
296
297 // Since this is being run on SYCL, we need to fence because the below code will use UVM
298 // typename graph_t::execution_space().fence();
299
300 // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
301 // KDDKDD UVM Ideally, this function would run on device and use
302 // KDDKDD UVM KokkosKernels instead of this host implementation.
303 auto targetMapToOrigRow =
304 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
305 targetMapToOrigRow_dev);
306 auto targetMapToImportRow =
307 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
308 targetMapToImportRow_dev);
309 auto Bcol2Ccol =
310 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
311 Bcol2Ccol_dev);
312 auto Icol2Ccol =
313 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
314 Icol2Ccol_dev);
315
316 // Sizes
317 RCP<const map_type> Ccolmap = C.getColMap();
318 size_t m = Aview.origMatrix->getLocalNumRows();
319 size_t n = Ccolmap->getLocalNumElements();
320
321 // Grab the Kokkos::SparseCrsMatrices & inner stuff
322 const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
323 const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
324 const KCRS& Cmat = C.getLocalMatrixHost();
325
326 c_lno_view_t Arowptr = Amat.graph.row_map,
327 Browptr = Bmat.graph.row_map,
328 Crowptr = Cmat.graph.row_map;
329 const lno_nnz_view_t Acolind = Amat.graph.entries,
330 Bcolind = Bmat.graph.entries,
331 Ccolind = Cmat.graph.entries;
332 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
333 scalar_view_t Cvals = Cmat.values;
334
335 c_lno_view_t Irowptr;
336 lno_nnz_view_t Icolind;
337 scalar_view_t Ivals;
338 if (!Bview.importMatrix.is_null()) {
339 auto lclB = Bview.importMatrix->getLocalMatrixHost();
340 Irowptr = lclB.graph.row_map;
341 Icolind = lclB.graph.entries;
342 Ivals = lclB.values;
343 }
344
345#ifdef HAVE_TPETRA_MMM_TIMINGS
346 MM2 = Teuchos::null;
347 MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SerialCore - Compare"))));
348#endif
349
350 // Classic csr assembly (low memory edition)
351 // mfh 27 Sep 2016: The c_status array is an implementation detail
352 // of the local sparse matrix-matrix multiply routine.
353
354 // The status array will contain the index into colind where this entry was last deposited.
355 // c_status[i] < CSR_ip - not in the row yet
356 // c_status[i] >= CSR_ip - this is the entry where you can find the data
357 // We start with this filled with INVALID's indicating that there are no entries yet.
358 // Sadly, this complicates the code due to the fact that size_t's are unsigned.
359 std::vector<size_t> c_status(n, ST_INVALID);
360
361 // For each row of A/C
362 size_t CSR_ip = 0, OLD_ip = 0;
363 for (size_t i = 0; i < m; i++) {
364 // First fill the c_status array w/ locations where we're allowed to
365 // generate nonzeros for this row
366 OLD_ip = Crowptr[i];
367 CSR_ip = Crowptr[i + 1];
368 for (size_t k = OLD_ip; k < CSR_ip; k++) {
369 c_status[Ccolind[k]] = k;
370
371 // Reset values in the row of C
372 Cvals[k] = SC_ZERO;
373 }
374
375 for (size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
376 LO Aik = Acolind[k];
377 const SC Aval = Avals[k];
378 if (Aval == SC_ZERO)
379 continue;
380
381 if (targetMapToOrigRow[Aik] != LO_INVALID) {
382 // Local matrix
383 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
384
385 for (size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
386 LO Bkj = Bcolind[j];
387 LO Cij = Bcol2Ccol[Bkj];
388
389 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
390 std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph "
391 << "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
392
393 Cvals[c_status[Cij]] += Aval * Bvals[j];
394 }
395
396 } else {
397 // Remote matrix
398 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
399 for (size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
400 LO Ikj = Icolind[j];
401 LO Cij = Icol2Ccol[Ikj];
402
403 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
404 std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph "
405 << "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
406
407 Cvals[c_status[Cij]] += Aval * Ivals[j];
408 }
409 }
410 }
411 }
412
413 C.fillComplete(C.getDomainMap(), C.getRangeMap());
414}
415
416/*********************************************************************************************************/
417template <class Scalar,
418 class LocalOrdinal,
419 class GlobalOrdinal,
420 class LocalOrdinalViewType>
421void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
422 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
423 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
424 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
425 const LocalOrdinalViewType& Acol2Brow,
426 const LocalOrdinalViewType& Acol2Irow,
427 const LocalOrdinalViewType& Bcol2Ccol,
428 const LocalOrdinalViewType& Icol2Ccol,
429 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
430 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
431 const std::string& label,
432 const Teuchos::RCP<Teuchos::ParameterList>& params) {
433#ifdef HAVE_TPETRA_MMM_TIMINGS
434 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
435 using Teuchos::TimeMonitor;
436 Teuchos::RCP<TimeMonitor> MM;
437#endif
438
439 // Node-specific code
440 using Teuchos::RCP;
441
442 // Options
443 // int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer // unreferenced
444 std::string myalg("KK");
445 if (!params.is_null()) {
446 if (params->isParameter("sycl: jacobi algorithm"))
447 myalg = params->get("sycl: jacobi algorithm", myalg);
448 }
449
450 if (myalg == "MSAK") {
451 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
452 } else if (myalg == "KK") {
453 jacobi_A_B_newmatrix_KokkosKernels(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
454 } else {
455 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
456 }
457
458#ifdef HAVE_TPETRA_MMM_TIMINGS
459 MM = Teuchos::null;
460 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLESFC"))));
461#endif
462
463 // Final Fillcomplete
464 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
465 labelList->set("Timer Label", label);
466 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
467
468 // NOTE: MSAK already fillCompletes, so we have to check here
469 if (!C.isFillComplete()) {
470 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
471 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
472 }
473}
474
475/*********************************************************************************************************/
476template <class Scalar,
477 class LocalOrdinal,
478 class GlobalOrdinal,
479 class LocalOrdinalViewType>
480void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
481 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
482 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
483 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
484 const LocalOrdinalViewType& targetMapToOrigRow_dev,
485 const LocalOrdinalViewType& targetMapToImportRow_dev,
486 const LocalOrdinalViewType& Bcol2Ccol_dev,
487 const LocalOrdinalViewType& Icol2Ccol_dev,
488 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
489 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
490 const std::string& label,
491 const Teuchos::RCP<Teuchos::ParameterList>& params) {
492 // FIXME: Right now, this is a cut-and-paste of the serial kernel
493 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
494
495#ifdef HAVE_TPETRA_MMM_TIMINGS
496 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
497 using Teuchos::TimeMonitor;
498 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse SYCLCore"))));
499 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
500#endif
501 using Teuchos::RCP;
502 using Teuchos::rcp;
503
504 // Lots and lots of typedefs
505 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
506 typedef typename KCRS::StaticCrsGraphType graph_t;
507 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
508 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
509 typedef typename KCRS::values_type::non_const_type scalar_view_t;
510 typedef typename scalar_view_t::memory_space scalar_memory_space;
511
512 typedef Scalar SC;
513 typedef LocalOrdinal LO;
514 typedef GlobalOrdinal GO;
515 typedef Node NO;
516 typedef Map<LO, GO, NO> map_type;
517 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
518 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
519 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
520
521 // Since this is being run on SYCL, we need to fence because the below host code will use UVM
522 // KDDKDD typename graph_t::execution_space().fence();
523
524 // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
525 // KDDKDD UVM Ideally, this function would run on device and use
526 // KDDKDD UVM KokkosKernels instead of this host implementation.
527 auto targetMapToOrigRow =
528 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
529 targetMapToOrigRow_dev);
530 auto targetMapToImportRow =
531 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
532 targetMapToImportRow_dev);
533 auto Bcol2Ccol =
534 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
535 Bcol2Ccol_dev);
536 auto Icol2Ccol =
537 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
538 Icol2Ccol_dev);
539
540 // Sizes
541 RCP<const map_type> Ccolmap = C.getColMap();
542 size_t m = Aview.origMatrix->getLocalNumRows();
543 size_t n = Ccolmap->getLocalNumElements();
544
545 // Grab the Kokkos::SparseCrsMatrices & inner stuff
546 const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
547 const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
548 const KCRS& Cmat = C.getLocalMatrixHost();
549
550 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
551 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
552 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
553 scalar_view_t Cvals = Cmat.values;
554
555 c_lno_view_t Irowptr;
556 lno_nnz_view_t Icolind;
557 scalar_view_t Ivals;
558 if (!Bview.importMatrix.is_null()) {
559 auto lclB = Bview.importMatrix->getLocalMatrixHost();
560 Irowptr = lclB.graph.row_map;
561 Icolind = lclB.graph.entries;
562 Ivals = lclB.values;
563 }
564
565 // Jacobi-specific inner stuff
566 auto Dvals =
567 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
568
569#ifdef HAVE_TPETRA_MMM_TIMINGS
570 MM2 = Teuchos::null;
571 MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse SYCLCore - Compare"))));
572#endif
573
574 // The status array will contain the index into colind where this entry was last deposited.
575 // c_status[i] < CSR_ip - not in the row yet
576 // c_status[i] >= CSR_ip - this is the entry where you can find the data
577 // We start with this filled with INVALID's indicating that there are no entries yet.
578 // Sadly, this complicates the code due to the fact that size_t's are unsigned.
579 std::vector<size_t> c_status(n, ST_INVALID);
580
581 // For each row of A/C
582 size_t CSR_ip = 0, OLD_ip = 0;
583 for (size_t i = 0; i < m; i++) {
584 // First fill the c_status array w/ locations where we're allowed to
585 // generate nonzeros for this row
586 OLD_ip = Crowptr[i];
587 CSR_ip = Crowptr[i + 1];
588 for (size_t k = OLD_ip; k < CSR_ip; k++) {
589 c_status[Ccolind[k]] = k;
590
591 // Reset values in the row of C
592 Cvals[k] = SC_ZERO;
593 }
594
595 SC minusOmegaDval = -omega * Dvals(i, 0);
596
597 // Entries of B
598 for (size_t j = Browptr[i]; j < Browptr[i + 1]; j++) {
599 Scalar Bval = Bvals[j];
600 if (Bval == SC_ZERO)
601 continue;
602 LO Bij = Bcolind[j];
603 LO Cij = Bcol2Ccol[Bij];
604
605 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
606 std::runtime_error, "Trying to insert a new entry into a static graph");
607
608 Cvals[c_status[Cij]] = Bvals[j];
609 }
610
611 // Entries of -omega * Dinv * A * B
612 for (size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
613 LO Aik = Acolind[k];
614 const SC Aval = Avals[k];
615 if (Aval == SC_ZERO)
616 continue;
617
618 if (targetMapToOrigRow[Aik] != LO_INVALID) {
619 // Local matrix
620 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
621
622 for (size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
623 LO Bkj = Bcolind[j];
624 LO Cij = Bcol2Ccol[Bkj];
625
626 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
627 std::runtime_error, "Trying to insert a new entry into a static graph");
628
629 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
630 }
631
632 } else {
633 // Remote matrix
634 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
635 for (size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
636 LO Ikj = Icolind[j];
637 LO Cij = Icol2Ccol[Ikj];
638
639 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
640 std::runtime_error, "Trying to insert a new entry into a static graph");
641
642 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
643 }
644 }
645 }
646 }
647
648#ifdef HAVE_TPETRA_MMM_TIMINGS
649 MM2 = Teuchos::null;
650 MM = Teuchos::null;
651 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse ESFC"))));
652#endif
653
654 C.fillComplete(C.getDomainMap(), C.getRangeMap());
655}
656
657/*********************************************************************************************************/
658template <class Scalar,
659 class LocalOrdinal,
660 class GlobalOrdinal,
661 class LocalOrdinalViewType>
662void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
663 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
664 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
665 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
666 const LocalOrdinalViewType& Acol2Brow,
667 const LocalOrdinalViewType& Acol2Irow,
668 const LocalOrdinalViewType& Bcol2Ccol,
669 const LocalOrdinalViewType& Icol2Ccol,
670 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
671 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
672 const std::string& label,
673 const Teuchos::RCP<Teuchos::ParameterList>& params) {
674#ifdef HAVE_TPETRA_MMM_TIMINGS
675 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
676 using Teuchos::TimeMonitor;
677 Teuchos::RCP<TimeMonitor> MM;
678#endif
679
680 // Check if the diagonal entries exist in debug mode
681 const bool debug = Tpetra::Details::Behavior::debug();
682 if (debug) {
683 auto rowMap = Aview.origMatrix->getRowMap();
684 Tpetra::Vector<Scalar> diags(rowMap);
685 Aview.origMatrix->getLocalDiagCopy(diags);
686 size_t diagLength = rowMap->getLocalNumElements();
687 Teuchos::Array<Scalar> diagonal(diagLength);
688 diags.get1dCopy(diagonal());
689
690 for (size_t i = 0; i < diagLength; ++i) {
691 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
692 std::runtime_error,
693 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl
694 << "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
695 }
696 }
697
698 // Usings
699 using device_t = typename Tpetra::KokkosCompat::KokkosSYCLWrapperNode::device_type;
701 using graph_t = typename matrix_t::StaticCrsGraphType;
702 using lno_view_t = typename graph_t::row_map_type::non_const_type;
703 using int_view_t = Kokkos::View<int*,
704 typename lno_view_t::array_layout,
705 typename lno_view_t::memory_space,
706 typename lno_view_t::memory_traits>;
707 using c_lno_view_t = typename graph_t::row_map_type::const_type;
708 using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
709 using scalar_view_t = typename matrix_t::values_type::non_const_type;
710
711 // KokkosKernels handle
712 using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
713 typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
714 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
715 using int_handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
716 typename int_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
717 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
718
719 // Merge the B and Bimport matrices
720 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
721
722 // Get the properties and arrays of input matrices
723 const matrix_t& Amat = Aview.origMatrix->getLocalMatrixDevice();
724 const matrix_t& Bmat = Bview.origMatrix->getLocalMatrixDevice();
725
726 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
727 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
728 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
729
730 // Arrays of the output matrix
731 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("row_mapC"), AnumRows + 1);
732 lno_nnz_view_t entriesC;
733 scalar_view_t valuesC;
734
735 // Options
736 int team_work_size = 16;
737 std::string myalg("SPGEMM_KK_MEMORY");
738 if (!params.is_null()) {
739 if (params->isParameter("sycl: algorithm"))
740 myalg = params->get("sycl: algorithm", myalg);
741 if (params->isParameter("sycl: team work size"))
742 team_work_size = params->get("sycl: team work size", team_work_size);
743 }
744
745 // Get the algorithm mode
746 std::string nodename("SYCL");
747 std::string alg = nodename + std::string(" algorithm");
748 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
749 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
750
751 // decide whether to use integer-typed row pointers for this spgemm
752 Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
753 const bool useIntRowptrs =
754 irph.shouldUseIntRowptrs() &&
755 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
756
757 if (useIntRowptrs) {
758 int_handle_t kh;
759 kh.create_spgemm_handle(alg_enum);
760 kh.set_team_work_size(team_work_size);
761
762 int_view_t int_row_mapC(Kokkos::ViewAllocateWithoutInitializing("int_row_mapC"), AnumRows + 1);
763
764 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
765 auto Bint = irph.getIntRowptrMatrix(Bmerged);
766
767 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
768 Aint.graph.row_map, Aint.graph.entries, false,
769 Bint.graph.row_map, Bint.graph.entries, false,
770 int_row_mapC);
771
772 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
773 if (c_nnz_size) {
774 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
775 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
776 }
777
778 // even though there is no TPL for this, we have to use the same handle that was used in the symbolic phase,
779 // so need to have a special int-typed call for this as well.
780 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
781 Aint.graph.row_map, Aint.graph.entries, Amat.values, false,
782 Bint.graph.row_map, Bint.graph.entries, Bint.values, false,
783 int_row_mapC, entriesC, valuesC,
784 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
785 // transfer the integer rowptrs back to the correct rowptr type
786 Kokkos::parallel_for(
787 int_row_mapC.size(), KOKKOS_LAMBDA(int i) { row_mapC(i) = int_row_mapC(i); });
788 kh.destroy_spgemm_handle();
789 } else {
790 handle_t kh;
791 kh.create_spgemm_handle(alg_enum);
792 kh.set_team_work_size(team_work_size);
793
794 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
795 Amat.graph.row_map, Amat.graph.entries, false,
796 Bmerged.graph.row_map, Bmerged.graph.entries, false,
797 row_mapC);
798
799 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
800 if (c_nnz_size) {
801 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
802 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
803 }
804 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
805 Amat.graph.row_map, Amat.graph.entries, Amat.values, false,
806 Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false,
807 row_mapC, entriesC, valuesC,
808 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
809 kh.destroy_spgemm_handle();
810 }
811
812#ifdef HAVE_TPETRA_MMM_TIMINGS
813 MM = Teuchos::null;
814 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLSort"))));
815#endif
816
817 // Sort & set values
818 if (params.is_null() || params->get("sort entries", true))
819 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
820 C.setAllValues(row_mapC, entriesC, valuesC);
821
822#ifdef HAVE_TPETRA_MMM_TIMINGS
823 MM = Teuchos::null;
824 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLESFC"))));
825#endif
826
827 // Final Fillcomplete
828 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
829 labelList->set("Timer Label", label);
830 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
831 Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
832 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
833}
834
835} // namespace MMdetails
836} // namespace Tpetra
837
838#endif // SYCL
839
840#endif
Struct that holds views of the contents of a CrsMatrix.
Teuchos::RCP< const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > origMatrix
The original matrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.