Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
TpetraExt_MatrixMatrix_SYCL.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
11#define TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
12
13#include "Tpetra_Details_IntRowPtrHelper.hpp"
14
15#ifdef HAVE_TPETRA_INST_SYCL
16namespace Tpetra {
17namespace MMdetails {
18
19/*********************************************************************************************************/
20// MMM KernelWrappers for Partial Specialization to SYCL
21template <class Scalar,
22 class LocalOrdinal,
23 class GlobalOrdinal,
24 class LocalOrdinalViewType>
25struct KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType> {
26 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
27 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
28 const LocalOrdinalViewType& Acol2Brow,
29 const LocalOrdinalViewType& Acol2Irow,
30 const LocalOrdinalViewType& Bcol2Ccol,
31 const LocalOrdinalViewType& Icol2Ccol,
32 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
33 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
34 const std::string& label = std::string(),
35 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
36
37 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
38 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
39 const LocalOrdinalViewType& Acol2Brow,
40 const LocalOrdinalViewType& Acol2Irow,
41 const LocalOrdinalViewType& Bcol2Ccol,
42 const LocalOrdinalViewType& Icol2Ccol,
43 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
44 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
45 const std::string& label = std::string(),
46 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
47};
48
49// Jacobi KernelWrappers for Partial Specialization to SYCL
50template <class Scalar,
51 class LocalOrdinal,
52 class GlobalOrdinal, class LocalOrdinalViewType>
53struct KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType> {
54 static inline void jacobi_A_B_newmatrix_kernel_wrapper(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
55 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
56 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
57 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
58 const LocalOrdinalViewType& Acol2Brow,
59 const LocalOrdinalViewType& Acol2Irow,
60 const LocalOrdinalViewType& Bcol2Ccol,
61 const LocalOrdinalViewType& Icol2Ccol,
62 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
63 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
64 const std::string& label = std::string(),
65 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
66
67 static inline void jacobi_A_B_reuse_kernel_wrapper(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
68 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
69 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
70 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
71 const LocalOrdinalViewType& Acol2Brow,
72 const LocalOrdinalViewType& Acol2Irow,
73 const LocalOrdinalViewType& Bcol2Ccol,
74 const LocalOrdinalViewType& Icol2Ccol,
75 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
76 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
77 const std::string& label = std::string(),
78 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
79
80 static inline void jacobi_A_B_newmatrix_KokkosKernels(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
81 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
82 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
83 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
84 const LocalOrdinalViewType& Acol2Brow,
85 const LocalOrdinalViewType& Acol2Irow,
86 const LocalOrdinalViewType& Bcol2Ccol,
87 const LocalOrdinalViewType& Icol2Ccol,
88 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
89 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
90 const std::string& label = std::string(),
91 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
92};
93
94/*********************************************************************************************************/
95// AB NewMatrix Kernel wrappers (KokkosKernels/SYCL Version)
96template <class Scalar,
97 class LocalOrdinal,
98 class GlobalOrdinal,
99 class LocalOrdinalViewType>
100void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
101 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
102 const LocalOrdinalViewType& Acol2Brow,
103 const LocalOrdinalViewType& Acol2Irow,
104 const LocalOrdinalViewType& Bcol2Ccol,
105 const LocalOrdinalViewType& Icol2Ccol,
106 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
107 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
108 const std::string& label,
109 const Teuchos::RCP<Teuchos::ParameterList>& params) {
110#ifdef HAVE_TPETRA_MMM_TIMINGS
111 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
112 using Teuchos::TimeMonitor;
113 Teuchos::RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLWrapper")))));
114#endif
115 // Node-specific code
116 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
117 std::string nodename("SYCL");
118
119 // Lots and lots of typedefs
120 using Teuchos::RCP;
122 typedef typename KCRS::device_type device_t;
123 typedef typename KCRS::StaticCrsGraphType graph_t;
124 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
125 using int_view_t = Kokkos::View<int*, typename lno_view_t::array_layout, typename lno_view_t::memory_space, typename lno_view_t::memory_traits>;
126 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
127 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
128 typedef typename KCRS::values_type::non_const_type scalar_view_t;
129 // typedef typename graph_t::row_map_type::const_type lno_view_t_const;
130
131 // Options
132 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
133 std::string myalg("SPGEMM_KK_MEMORY");
134 if (!params.is_null()) {
135 if (params->isParameter("sycl: algorithm"))
136 myalg = params->get("sycl: algorithm", myalg);
137 if (params->isParameter("sycl: team work size"))
138 team_work_size = params->get("sycl: team work size", team_work_size);
139 }
140
141 // KokkosKernelsHandle
142 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
143 typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
144 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>
145 KernelHandle;
146 using IntKernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<
147 typename int_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
148 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
149
150 // Grab the Kokkos::SparseCrsMatrices
151 const KCRS Amat = Aview.origMatrix->getLocalMatrixDevice();
152 const KCRS Bmat = Bview.origMatrix->getLocalMatrixDevice();
153
154 c_lno_view_t Arowptr = Amat.graph.row_map,
155 Browptr = Bmat.graph.row_map;
156 const lno_nnz_view_t Acolind = Amat.graph.entries,
157 Bcolind = Bmat.graph.entries;
158 const scalar_view_t Avals = Amat.values,
159 Bvals = Bmat.values;
160
161 // Get the algorithm mode
162 std::string alg = nodename + std::string(" algorithm");
163 // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
164 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
165 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
166
167 // Merge the B and Bimport matrices
168 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
169
170#ifdef HAVE_TPETRA_MMM_TIMINGS
171 MM = Teuchos::null;
172 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLCore"))));
173#endif
174
175 // Do the multiply on whatever we've got
176 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
177 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
178 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
179
180 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_lno_row"), AnumRows + 1);
181 lno_nnz_view_t entriesC;
182 scalar_view_t valuesC;
183
184 // static_assert(std::is_void_v<typename KCRS::row_map_type::non_const_value_type>, "");
185 // static_assert(std::is_void_v<typename decltype(Bmerged)::row_map_type::non_const_value_type>, "");
186 Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
187 const bool useIntRowptrs =
188 irph.shouldUseIntRowptrs() &&
189 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
190
191 if (useIntRowptrs) {
192 IntKernelHandle kh;
193 kh.create_spgemm_handle(alg_enum);
194 kh.set_team_work_size(team_work_size);
195
196 int_view_t int_row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_int_row"), AnumRows + 1);
197
198 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
199 auto Bint = irph.getIntRowptrMatrix(Bmerged);
200 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Aint.graph.row_map, Aint.graph.entries, false, Bint.graph.row_map, Bint.graph.entries, false, int_row_mapC);
201
202 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
203 if (c_nnz_size) {
204 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
205 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
206 }
207 KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Aint.graph.row_map, Aint.graph.entries, Aint.values, false, Bint.graph.row_map, Bint.graph.entries, Bint.values, false, int_row_mapC, entriesC, valuesC);
208 // transfer the integer rowptrs back to the correct rowptr type
209 Kokkos::parallel_for(
210 int_row_mapC.size(), KOKKOS_LAMBDA(int i) { row_mapC(i) = int_row_mapC(i); });
211 kh.destroy_spgemm_handle();
212
213 } else {
214 KernelHandle kh;
215 kh.create_spgemm_handle(alg_enum);
216 kh.set_team_work_size(team_work_size);
217
218 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Amat.graph.row_map, Amat.graph.entries, false, Bmerged.graph.row_map, Bmerged.graph.entries, false, row_mapC);
219
220 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
221 if (c_nnz_size) {
222 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
223 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
224 }
225
226 KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Amat.graph.row_map, Amat.graph.entries, Amat.values, false, Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false, row_mapC, entriesC, valuesC);
227 kh.destroy_spgemm_handle();
228 }
229
230#ifdef HAVE_TPETRA_MMM_TIMINGS
231 MM = Teuchos::null;
232 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLSort"))));
233#endif
234
235 // Sort & set values
236 if (params.is_null() || params->get("sort entries", true))
237 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
238 C.setAllValues(row_mapC, entriesC, valuesC);
239
240#ifdef HAVE_TPETRA_MMM_TIMINGS
241 MM = Teuchos::null;
242 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLESFC"))));
243#endif
244
245 // Final Fillcomplete
246 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
247 labelList->set("Timer Label", label);
248 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
249 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
250 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
251}
252
253/*********************************************************************************************************/
254template <class Scalar,
255 class LocalOrdinal,
256 class GlobalOrdinal,
257 class LocalOrdinalViewType>
258void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(
259 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
260 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
261 const LocalOrdinalViewType& targetMapToOrigRow_dev,
262 const LocalOrdinalViewType& targetMapToImportRow_dev,
263 const LocalOrdinalViewType& Bcol2Ccol_dev,
264 const LocalOrdinalViewType& Icol2Ccol_dev,
265 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
266 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
267 const std::string& label,
268 const Teuchos::RCP<Teuchos::ParameterList>& params) {
269 // FIXME: Right now, this is a cut-and-paste of the serial kernel
270 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
271
272 // By default, if A*B results in an entry that is not supported by the graph of C, we throw.
273 // This option allows to override this behavior and silently ignores such entries.
274 bool throwOnInsert = true;
275 if (!params.is_null() && params->isType<bool>("MM Throw For Non-Existent Entries"))
276 throwOnInsert = params->get<bool>("MM Throw For Non-Existent Entries");
277
278#ifdef HAVE_TPETRA_MMM_TIMINGS
279 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
280 using Teuchos::TimeMonitor;
281 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse SerialCore"))));
282 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
283#endif
284 using Teuchos::RCP;
285 using Teuchos::rcp;
286
287 // Lots and lots of typedefs
288 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
289 typedef typename KCRS::StaticCrsGraphType graph_t;
290 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
291 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
292 typedef typename KCRS::values_type::non_const_type scalar_view_t;
293
294 typedef Scalar SC;
295 typedef LocalOrdinal LO;
296 typedef GlobalOrdinal GO;
297 typedef Node NO;
298 typedef Map<LO, GO, NO> map_type;
299 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
300 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
301 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
302
303 // Since this is being run on SYCL, we need to fence because the below code will use UVM
304 // typename graph_t::execution_space().fence();
305
306 // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
307 // KDDKDD UVM Ideally, this function would run on device and use
308 // KDDKDD UVM KokkosKernels instead of this host implementation.
309 auto targetMapToOrigRow =
310 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
311 targetMapToOrigRow_dev);
312 auto targetMapToImportRow =
313 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
314 targetMapToImportRow_dev);
315 auto Bcol2Ccol =
316 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
317 Bcol2Ccol_dev);
318 auto Icol2Ccol =
319 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
320 Icol2Ccol_dev);
321
322 // Sizes
323 RCP<const map_type> Ccolmap = C.getColMap();
324 size_t m = Aview.origMatrix->getLocalNumRows();
325 size_t n = Ccolmap->getLocalNumElements();
326
327 // Grab the Kokkos::SparseCrsMatrices & inner stuff
328 const KCRS Amat = Aview.origMatrix->getLocalMatrixHost();
329 const KCRS Bmat = Bview.origMatrix->getLocalMatrixHost();
330 const KCRS Cmat = C.getLocalMatrixHost();
331
332 c_lno_view_t Arowptr = Amat.graph.row_map,
333 Browptr = Bmat.graph.row_map,
334 Crowptr = Cmat.graph.row_map;
335 const lno_nnz_view_t Acolind = Amat.graph.entries,
336 Bcolind = Bmat.graph.entries,
337 Ccolind = Cmat.graph.entries;
338 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
339 scalar_view_t Cvals = Cmat.values;
340
341 c_lno_view_t Irowptr;
342 lno_nnz_view_t Icolind;
343 scalar_view_t Ivals;
344 if (!Bview.importMatrix.is_null()) {
345 auto lclB = Bview.importMatrix->getLocalMatrixHost();
346 Irowptr = lclB.graph.row_map;
347 Icolind = lclB.graph.entries;
348 Ivals = lclB.values;
349 }
350
351#ifdef HAVE_TPETRA_MMM_TIMINGS
352 MM2 = Teuchos::null;
353 MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SerialCore - Compare"))));
354#endif
355
356 // Classic csr assembly (low memory edition)
357 // mfh 27 Sep 2016: The c_status array is an implementation detail
358 // of the local sparse matrix-matrix multiply routine.
359
360 // The status array will contain the index into colind where this entry was last deposited.
361 // c_status[i] < CSR_ip - not in the row yet
362 // c_status[i] >= CSR_ip - this is the entry where you can find the data
363 // We start with this filled with INVALID's indicating that there are no entries yet.
364 // Sadly, this complicates the code due to the fact that size_t's are unsigned.
365 std::vector<size_t> c_status(n, ST_INVALID);
366
367 // For each row of A/C
368 size_t CSR_ip = 0, OLD_ip = 0;
369 for (size_t i = 0; i < m; i++) {
370 // First fill the c_status array w/ locations where we're allowed to
371 // generate nonzeros for this row
372 OLD_ip = Crowptr[i];
373 CSR_ip = Crowptr[i + 1];
374 for (size_t k = OLD_ip; k < CSR_ip; k++) {
375 c_status[Ccolind[k]] = k;
376
377 // Reset values in the row of C
378 Cvals[k] = SC_ZERO;
379 }
380
381 for (size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
382 LO Aik = Acolind[k];
383 const SC Aval = Avals[k];
384 if (Aval == SC_ZERO)
385 continue;
386
387 if (targetMapToOrigRow[Aik] != LO_INVALID) {
388 // Local matrix
389 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
390
391 for (size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
392 LO Bkj = Bcolind[j];
393 LO Cij = Bcol2Ccol[Bkj];
394
395 const bool badInsert = c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip;
396 if (!badInsert)
397 Cvals[c_status[Cij]] += Aval * Bvals[j];
398 else if (throwOnInsert)
399 TEUCHOS_TEST_FOR_EXCEPTION(badInsert,
400 std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph "
401 << "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
402 }
403
404 } else {
405 // Remote matrix
406 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
407 for (size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
408 LO Ikj = Icolind[j];
409 LO Cij = Icol2Ccol[Ikj];
410
411 const bool badInsert = c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip;
412 if (!badInsert)
413 Cvals[c_status[Cij]] += Aval * Ivals[j];
414 else if (throwOnInsert)
415 TEUCHOS_TEST_FOR_EXCEPTION(badInsert,
416 std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph "
417 << "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
418 }
419 }
420 }
421 }
422
423 C.fillComplete(C.getDomainMap(), C.getRangeMap());
424}
425
426/*********************************************************************************************************/
427template <class Scalar,
428 class LocalOrdinal,
429 class GlobalOrdinal,
430 class LocalOrdinalViewType>
431void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
432 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
433 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
434 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
435 const LocalOrdinalViewType& Acol2Brow,
436 const LocalOrdinalViewType& Acol2Irow,
437 const LocalOrdinalViewType& Bcol2Ccol,
438 const LocalOrdinalViewType& Icol2Ccol,
439 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
440 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
441 const std::string& label,
442 const Teuchos::RCP<Teuchos::ParameterList>& params) {
443#ifdef HAVE_TPETRA_MMM_TIMINGS
444 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
445 using Teuchos::TimeMonitor;
446 Teuchos::RCP<TimeMonitor> MM;
447#endif
448
449 // Node-specific code
450 using Teuchos::RCP;
451
452 // Options
453 // int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer // unreferenced
454 std::string myalg("KK");
455 if (!params.is_null()) {
456 if (params->isParameter("sycl: jacobi algorithm"))
457 myalg = params->get("sycl: jacobi algorithm", myalg);
458 }
459
460 if (myalg == "MSAK") {
461 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
462 } else if (myalg == "KK") {
463 jacobi_A_B_newmatrix_KokkosKernels(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
464 } else {
465 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
466 }
467
468#ifdef HAVE_TPETRA_MMM_TIMINGS
469 MM = Teuchos::null;
470 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLESFC"))));
471#endif
472
473 // Final Fillcomplete
474 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
475 labelList->set("Timer Label", label);
476 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
477
478 // NOTE: MSAK already fillCompletes, so we have to check here
479 if (!C.isFillComplete()) {
480 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
481 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
482 }
483}
484
485/*********************************************************************************************************/
486template <class Scalar,
487 class LocalOrdinal,
488 class GlobalOrdinal,
489 class LocalOrdinalViewType>
490void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
491 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
492 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
493 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
494 const LocalOrdinalViewType& targetMapToOrigRow_dev,
495 const LocalOrdinalViewType& targetMapToImportRow_dev,
496 const LocalOrdinalViewType& Bcol2Ccol_dev,
497 const LocalOrdinalViewType& Icol2Ccol_dev,
498 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
499 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
500 const std::string& label,
501 const Teuchos::RCP<Teuchos::ParameterList>& params) {
502 // FIXME: Right now, this is a cut-and-paste of the serial kernel
503 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
504
505#ifdef HAVE_TPETRA_MMM_TIMINGS
506 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
507 using Teuchos::TimeMonitor;
508 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse SYCLCore"))));
509 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
510#endif
511 using Teuchos::RCP;
512 using Teuchos::rcp;
513
514 // Lots and lots of typedefs
515 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
516 typedef typename KCRS::StaticCrsGraphType graph_t;
517 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
518 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
519 typedef typename KCRS::values_type::non_const_type scalar_view_t;
520 typedef typename scalar_view_t::memory_space scalar_memory_space;
521
522 typedef Scalar SC;
523 typedef LocalOrdinal LO;
524 typedef GlobalOrdinal GO;
525 typedef Node NO;
526 typedef Map<LO, GO, NO> map_type;
527 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
528 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
529 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
530
531 // Since this is being run on SYCL, we need to fence because the below host code will use UVM
532 // KDDKDD typename graph_t::execution_space().fence();
533
534 // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
535 // KDDKDD UVM Ideally, this function would run on device and use
536 // KDDKDD UVM KokkosKernels instead of this host implementation.
537 auto targetMapToOrigRow =
538 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
539 targetMapToOrigRow_dev);
540 auto targetMapToImportRow =
541 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
542 targetMapToImportRow_dev);
543 auto Bcol2Ccol =
544 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
545 Bcol2Ccol_dev);
546 auto Icol2Ccol =
547 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
548 Icol2Ccol_dev);
549
550 // Sizes
551 RCP<const map_type> Ccolmap = C.getColMap();
552 size_t m = Aview.origMatrix->getLocalNumRows();
553 size_t n = Ccolmap->getLocalNumElements();
554
555 // Grab the Kokkos::SparseCrsMatrices & inner stuff
556 const KCRS Amat = Aview.origMatrix->getLocalMatrixHost();
557 const KCRS Bmat = Bview.origMatrix->getLocalMatrixHost();
558 const KCRS Cmat = C.getLocalMatrixHost();
559
560 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
561 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
562 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
563 scalar_view_t Cvals = Cmat.values;
564
565 c_lno_view_t Irowptr;
566 lno_nnz_view_t Icolind;
567 scalar_view_t Ivals;
568 if (!Bview.importMatrix.is_null()) {
569 auto lclB = Bview.importMatrix->getLocalMatrixHost();
570 Irowptr = lclB.graph.row_map;
571 Icolind = lclB.graph.entries;
572 Ivals = lclB.values;
573 }
574
575 // Jacobi-specific inner stuff
576 auto Dvals =
577 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
578
579#ifdef HAVE_TPETRA_MMM_TIMINGS
580 MM2 = Teuchos::null;
581 MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse SYCLCore - Compare"))));
582#endif
583
584 // The status array will contain the index into colind where this entry was last deposited.
585 // c_status[i] < CSR_ip - not in the row yet
586 // c_status[i] >= CSR_ip - this is the entry where you can find the data
587 // We start with this filled with INVALID's indicating that there are no entries yet.
588 // Sadly, this complicates the code due to the fact that size_t's are unsigned.
589 std::vector<size_t> c_status(n, ST_INVALID);
590
591 // For each row of A/C
592 size_t CSR_ip = 0, OLD_ip = 0;
593 for (size_t i = 0; i < m; i++) {
594 // First fill the c_status array w/ locations where we're allowed to
595 // generate nonzeros for this row
596 OLD_ip = Crowptr[i];
597 CSR_ip = Crowptr[i + 1];
598 for (size_t k = OLD_ip; k < CSR_ip; k++) {
599 c_status[Ccolind[k]] = k;
600
601 // Reset values in the row of C
602 Cvals[k] = SC_ZERO;
603 }
604
605 SC minusOmegaDval = -omega * Dvals(i, 0);
606
607 // Entries of B
608 for (size_t j = Browptr[i]; j < Browptr[i + 1]; j++) {
609 Scalar Bval = Bvals[j];
610 if (Bval == SC_ZERO)
611 continue;
612 LO Bij = Bcolind[j];
613 LO Cij = Bcol2Ccol[Bij];
614
615 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
616 std::runtime_error, "Trying to insert a new entry into a static graph");
617
618 Cvals[c_status[Cij]] = Bvals[j];
619 }
620
621 // Entries of -omega * Dinv * A * B
622 for (size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
623 LO Aik = Acolind[k];
624 const SC Aval = Avals[k];
625 if (Aval == SC_ZERO)
626 continue;
627
628 if (targetMapToOrigRow[Aik] != LO_INVALID) {
629 // Local matrix
630 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
631
632 for (size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
633 LO Bkj = Bcolind[j];
634 LO Cij = Bcol2Ccol[Bkj];
635
636 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
637 std::runtime_error, "Trying to insert a new entry into a static graph");
638
639 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
640 }
641
642 } else {
643 // Remote matrix
644 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
645 for (size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
646 LO Ikj = Icolind[j];
647 LO Cij = Icol2Ccol[Ikj];
648
649 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
650 std::runtime_error, "Trying to insert a new entry into a static graph");
651
652 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
653 }
654 }
655 }
656 }
657
658#ifdef HAVE_TPETRA_MMM_TIMINGS
659 MM2 = Teuchos::null;
660 MM = Teuchos::null;
661 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse ESFC"))));
662#endif
663
664 C.fillComplete(C.getDomainMap(), C.getRangeMap());
665}
666
667/*********************************************************************************************************/
668template <class Scalar,
669 class LocalOrdinal,
670 class GlobalOrdinal,
671 class LocalOrdinalViewType>
672void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
673 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
674 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
675 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
676 const LocalOrdinalViewType& Acol2Brow,
677 const LocalOrdinalViewType& Acol2Irow,
678 const LocalOrdinalViewType& Bcol2Ccol,
679 const LocalOrdinalViewType& Icol2Ccol,
680 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
681 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
682 const std::string& label,
683 const Teuchos::RCP<Teuchos::ParameterList>& params) {
684#ifdef HAVE_TPETRA_MMM_TIMINGS
685 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
686 using Teuchos::TimeMonitor;
687 Teuchos::RCP<TimeMonitor> MM;
688#endif
689
690 // Check if the diagonal entries exist in debug mode
691 const bool debug = Tpetra::Details::Behavior::debug();
692 if (debug) {
693 auto rowMap = Aview.origMatrix->getRowMap();
694 Tpetra::Vector<Scalar> diags(rowMap);
695 Aview.origMatrix->getLocalDiagCopy(diags);
696 size_t diagLength = rowMap->getLocalNumElements();
697 Teuchos::Array<Scalar> diagonal(diagLength);
698 diags.get1dCopy(diagonal());
699
700 for (size_t i = 0; i < diagLength; ++i) {
701 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
702 std::runtime_error,
703 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl
704 << "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
705 }
706 }
707
708 // Usings
709 using device_t = typename Tpetra::KokkosCompat::KokkosSYCLWrapperNode::device_type;
711 using graph_t = typename matrix_t::StaticCrsGraphType;
712 using lno_view_t = typename graph_t::row_map_type::non_const_type;
713 using int_view_t = Kokkos::View<int*,
714 typename lno_view_t::array_layout,
715 typename lno_view_t::memory_space,
716 typename lno_view_t::memory_traits>;
717 using c_lno_view_t = typename graph_t::row_map_type::const_type;
718 using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
719 using scalar_view_t = typename matrix_t::values_type::non_const_type;
720
721 // KokkosKernels handle
722 using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
723 typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
724 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
725 using int_handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
726 typename int_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
727 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
728
729 // Merge the B and Bimport matrices
730 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
731
732 // Get the properties and arrays of input matrices
733 const matrix_t Amat = Aview.origMatrix->getLocalMatrixDevice();
734 const matrix_t Bmat = Bview.origMatrix->getLocalMatrixDevice();
735
736 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
737 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
738 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
739
740 // Arrays of the output matrix
741 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("row_mapC"), AnumRows + 1);
742 lno_nnz_view_t entriesC;
743 scalar_view_t valuesC;
744
745 // Options
746 int team_work_size = 16;
747 std::string myalg("SPGEMM_KK_MEMORY");
748 if (!params.is_null()) {
749 if (params->isParameter("sycl: algorithm"))
750 myalg = params->get("sycl: algorithm", myalg);
751 if (params->isParameter("sycl: team work size"))
752 team_work_size = params->get("sycl: team work size", team_work_size);
753 }
754
755 // Get the algorithm mode
756 std::string nodename("SYCL");
757 std::string alg = nodename + std::string(" algorithm");
758 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
759 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
760
761 // decide whether to use integer-typed row pointers for this spgemm
762 Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
763 const bool useIntRowptrs =
764 irph.shouldUseIntRowptrs() &&
765 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
766
767 const Scalar jacobiOmega = omega * Teuchos::ScalarTraits<Scalar>::one();
768
769 if (useIntRowptrs) {
770 int_handle_t kh;
771 kh.create_spgemm_handle(alg_enum);
772 kh.set_team_work_size(team_work_size);
773
774 int_view_t int_row_mapC(Kokkos::ViewAllocateWithoutInitializing("int_row_mapC"), AnumRows + 1);
775
776 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
777 auto Bint = irph.getIntRowptrMatrix(Bmerged);
778
779 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
780 Aint.graph.row_map, Aint.graph.entries, false,
781 Bint.graph.row_map, Bint.graph.entries, false,
782 int_row_mapC);
783
784 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
785 if (c_nnz_size) {
786 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
787 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
788 }
789
790 // even though there is no TPL for this, we have to use the same handle that was used in the symbolic phase,
791 // so need to have a special int-typed call for this as well.
792 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
793 Aint.graph.row_map, Aint.graph.entries, Amat.values, false,
794 Bint.graph.row_map, Bint.graph.entries, Bint.values, false,
795 int_row_mapC, entriesC, valuesC,
796 jacobiOmega, Dinv.getLocalViewDevice(Access::ReadOnly));
797 // transfer the integer rowptrs back to the correct rowptr type
798 Kokkos::parallel_for(
799 int_row_mapC.size(), KOKKOS_LAMBDA(int i) { row_mapC(i) = int_row_mapC(i); });
800 kh.destroy_spgemm_handle();
801 } else {
802 handle_t kh;
803 kh.create_spgemm_handle(alg_enum);
804 kh.set_team_work_size(team_work_size);
805
806 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
807 Amat.graph.row_map, Amat.graph.entries, false,
808 Bmerged.graph.row_map, Bmerged.graph.entries, false,
809 row_mapC);
810
811 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
812 if (c_nnz_size) {
813 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
814 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
815 }
816 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
817 Amat.graph.row_map, Amat.graph.entries, Amat.values, false,
818 Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false,
819 row_mapC, entriesC, valuesC,
820 jacobiOmega, Dinv.getLocalViewDevice(Access::ReadOnly));
821 kh.destroy_spgemm_handle();
822 }
823
824#ifdef HAVE_TPETRA_MMM_TIMINGS
825 MM = Teuchos::null;
826 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLSort"))));
827#endif
828
829 // Sort & set values
830 if (params.is_null() || params->get("sort entries", true))
831 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
832 C.setAllValues(row_mapC, entriesC, valuesC);
833
834#ifdef HAVE_TPETRA_MMM_TIMINGS
835 MM = Teuchos::null;
836 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLESFC"))));
837#endif
838
839 // Final Fillcomplete
840 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
841 labelList->set("Timer Label", label);
842 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
843 Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
844 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
845}
846
847} // namespace MMdetails
848} // namespace Tpetra
849
850#endif // SYCL
851
852#endif
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Struct that holds views of the contents of a CrsMatrix.
Teuchos::RCP< const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > origMatrix
The original matrix.
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.