Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
TpetraExt_MatrixMatrix_HIP.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_MATRIXMATRIX_HIP_DEF_HPP
11#define TPETRA_MATRIXMATRIX_HIP_DEF_HPP
12
13#include "Tpetra_Details_IntRowPtrHelper.hpp"
14
15#ifdef HAVE_TPETRA_INST_HIP
16namespace Tpetra {
17namespace MMdetails {
18
19/*********************************************************************************************************/
20// MMM KernelWrappers for Partial Specialization to HIP
21template <class Scalar,
22 class LocalOrdinal,
23 class GlobalOrdinal,
24 class LocalOrdinalViewType>
25struct KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType> {
26 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
27 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
28 const LocalOrdinalViewType& Acol2Brow,
29 const LocalOrdinalViewType& Acol2Irow,
30 const LocalOrdinalViewType& Bcol2Ccol,
31 const LocalOrdinalViewType& Icol2Ccol,
32 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
33 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
34 const std::string& label = std::string(),
35 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
36
37 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
38 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
39 const LocalOrdinalViewType& Acol2Brow,
40 const LocalOrdinalViewType& Acol2Irow,
41 const LocalOrdinalViewType& Bcol2Ccol,
42 const LocalOrdinalViewType& Icol2Ccol,
43 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
44 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
45 const std::string& label = std::string(),
46 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
47};
48
49// Jacobi KernelWrappers for Partial Specialization to HIP
50template <class Scalar,
51 class LocalOrdinal,
52 class GlobalOrdinal, class LocalOrdinalViewType>
53struct KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType> {
54 static inline void jacobi_A_B_newmatrix_kernel_wrapper(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
55 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
56 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
57 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
58 const LocalOrdinalViewType& Acol2Brow,
59 const LocalOrdinalViewType& Acol2Irow,
60 const LocalOrdinalViewType& Bcol2Ccol,
61 const LocalOrdinalViewType& Icol2Ccol,
62 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
63 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
64 const std::string& label = std::string(),
65 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
66
67 static inline void jacobi_A_B_reuse_kernel_wrapper(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
68 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
69 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
70 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
71 const LocalOrdinalViewType& Acol2Brow,
72 const LocalOrdinalViewType& Acol2Irow,
73 const LocalOrdinalViewType& Bcol2Ccol,
74 const LocalOrdinalViewType& Icol2Ccol,
75 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
76 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
77 const std::string& label = std::string(),
78 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
79
80 static inline void jacobi_A_B_newmatrix_KokkosKernels(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
81 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
82 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
83 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
84 const LocalOrdinalViewType& Acol2Brow,
85 const LocalOrdinalViewType& Acol2Irow,
86 const LocalOrdinalViewType& Bcol2Ccol,
87 const LocalOrdinalViewType& Icol2Ccol,
88 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
89 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
90 const std::string& label = std::string(),
91 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
92};
93
94/*********************************************************************************************************/
95// AB NewMatrix Kernel wrappers (KokkosKernels/HIP Version)
96template <class Scalar,
97 class LocalOrdinal,
98 class GlobalOrdinal,
99 class LocalOrdinalViewType>
100void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
101 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
102 const LocalOrdinalViewType& Acol2Brow,
103 const LocalOrdinalViewType& Acol2Irow,
104 const LocalOrdinalViewType& Bcol2Ccol,
105 const LocalOrdinalViewType& Icol2Ccol,
106 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
107 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
108 const std::string& label,
109 const Teuchos::RCP<Teuchos::ParameterList>& params) {
110 // Node-specific code
111 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
112 std::string nodename("HIP");
113
114 // Lots and lots of typedefs
115 using Teuchos::RCP;
116 using Teuchos::rcp;
118 typedef typename KCRS::device_type device_t;
119 typedef typename KCRS::StaticCrsGraphType graph_t;
120 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
121 using int_view_t = Kokkos::View<int*, typename lno_view_t::array_layout, typename lno_view_t::memory_space, typename lno_view_t::memory_traits>;
122 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
123 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
124 typedef typename KCRS::values_type::non_const_type scalar_view_t;
125 // typedef typename graph_t::row_map_type::const_type lno_view_t_const;
126
127 RCP<Tpetra::Details::ProfilingRegion> MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix HIPWrapper"));
128
129 // Options
130 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
131 std::string myalg("SPGEMM_KK_MEMORY");
132 if (!params.is_null()) {
133 if (params->isParameter("hip: algorithm"))
134 myalg = params->get("hip: algorithm", myalg);
135 if (params->isParameter("hip: team work size"))
136 team_work_size = params->get("hip: team work size", team_work_size);
137 }
138
139 // KokkosKernelsHandle
140 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
141 typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
142 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>
143 KernelHandle;
144 using IntKernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<
145 typename int_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
146 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
147
148 // Grab the Kokkos::SparseCrsMatrices
149 const KCRS Amat = Aview.origMatrix->getLocalMatrixDevice();
150 const KCRS Bmat = Bview.origMatrix->getLocalMatrixDevice();
151
152 c_lno_view_t Arowptr = Amat.graph.row_map,
153 Browptr = Bmat.graph.row_map;
154 const lno_nnz_view_t Acolind = Amat.graph.entries,
155 Bcolind = Bmat.graph.entries;
156 const scalar_view_t Avals = Amat.values,
157 Bvals = Bmat.values;
158
159 // Get the algorithm mode
160 std::string alg = nodename + std::string(" algorithm");
161 // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
162 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
163 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
164
165 // Merge the B and Bimport matrices
166 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
167
168 MM = Teuchos::null;
169 MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix HIP Core"));
170
171 // Do the multiply on whatever we've got
172 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
173 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
174 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
175
176 // regardless of whether integer row ptrs are used, need to ultimately produce the row pointer, entries, and values of the expected types
177 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_lno_row"), AnumRows + 1);
178 lno_nnz_view_t entriesC;
179 scalar_view_t valuesC;
180
181 Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
182 const bool useIntRowptrs =
183 irph.shouldUseIntRowptrs() &&
184 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
185
186 if (useIntRowptrs) {
187 IntKernelHandle kh;
188 kh.create_spgemm_handle(alg_enum);
189 kh.set_team_work_size(team_work_size);
190
191 int_view_t int_row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_int_row"), AnumRows + 1);
192
193 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
194 auto Bint = irph.getIntRowptrMatrix(Bmerged);
195
196 {
197 Tpetra::Details::ProfilingRegion MM2("TpetraExt: MMM: Newmatrix KokkosKernels symbolic int");
198 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Aint.graph.row_map, Aint.graph.entries, false, Bint.graph.row_map, Bint.graph.entries, false, int_row_mapC);
199 }
200 Tpetra::Details::ProfilingRegion MM2("TpetraExt: MMM: Newmatrix KokkosKernels numeric int");
201
202 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
203 if (c_nnz_size) {
204 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
205 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
206 }
207 KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Aint.graph.row_map, Aint.graph.entries, Aint.values, false, Bint.graph.row_map, Bint.graph.entries, Bint.values, false, int_row_mapC, entriesC, valuesC);
208 // transfer the integer rowptrs back to the correct rowptr type
209 Kokkos::parallel_for(
210 int_row_mapC.size(), KOKKOS_LAMBDA(int i) { row_mapC(i) = int_row_mapC(i); });
211 kh.destroy_spgemm_handle();
212 } else {
213 KernelHandle kh;
214 kh.create_spgemm_handle(alg_enum);
215 kh.set_team_work_size(team_work_size);
216
217 {
218 Tpetra::Details::ProfilingRegion MM2("TpetraExt: MMM: Newmatrix KokkosKernels symbolic non-int");
219 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Amat.graph.row_map, Amat.graph.entries, false, Bmerged.graph.row_map, Bmerged.graph.entries, false, row_mapC);
220 }
221
222 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
223 if (c_nnz_size) {
224 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
225 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
226 }
227
228 Tpetra::Details::ProfilingRegion MM2("TpetraExt: MMM: Newmatrix KokkosKernels numeric non-int");
229 KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Amat.graph.row_map, Amat.graph.entries, Amat.values, false, Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false, row_mapC, entriesC, valuesC);
230 kh.destroy_spgemm_handle();
231 }
232
233 MM = Teuchos::null;
234 MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix HIP Sort"));
235
236 // Sort & set values
237 if (params.is_null() || params->get("sort entries", true))
238 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
239 C.setAllValues(row_mapC, entriesC, valuesC);
240
241 MM = Teuchos::null;
242 MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix HIP ESFC"));
243
244 // Final Fillcomplete
245 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
246 labelList->set("Timer Label", label);
247 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
248 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
249 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
250}
251
252/*********************************************************************************************************/
253template <class Scalar,
254 class LocalOrdinal,
255 class GlobalOrdinal,
256 class LocalOrdinalViewType>
257void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
258 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
259 const LocalOrdinalViewType& targetMapToOrigRow_dev,
260 const LocalOrdinalViewType& targetMapToImportRow_dev,
261 const LocalOrdinalViewType& Bcol2Ccol_dev,
262 const LocalOrdinalViewType& Icol2Ccol_dev,
263 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
264 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
265 const std::string& label,
266 const Teuchos::RCP<Teuchos::ParameterList>& params) {
267 // FIXME: Right now, this is a cut-and-paste of the serial kernel
268 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
269 using Teuchos::RCP;
270 using Teuchos::rcp;
271
272 // By default, if A*B results in an entry that is not supported by the graph of C, we throw.
273 // This option allows to override this behavior and silently ignores such entries.
274 bool throwOnInsert = true;
275 if (!params.is_null() && params->isType<bool>("MM Throw For Non-Existent Entries"))
276 throwOnInsert = params->get<bool>("MM Throw For Non-Existent Entries");
277
278 RCP<Tpetra::Details::ProfilingRegion> MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Reuse SerialCore"));
279
280 // Lots and lots of typedefs
281 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
282 typedef typename KCRS::StaticCrsGraphType graph_t;
283 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
284 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
285 typedef typename KCRS::values_type::non_const_type scalar_view_t;
286
287 typedef Scalar SC;
288 typedef LocalOrdinal LO;
289 typedef GlobalOrdinal GO;
290 typedef Node NO;
291 typedef Map<LO, GO, NO> map_type;
292 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
293 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
294 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
295
296 // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
297 // KDDKDD UVM Ideally, this function would run on device and use
298 // KDDKDD UVM KokkosKernels instead of this host implementation.
299 auto targetMapToOrigRow =
300 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
301 targetMapToOrigRow_dev);
302 auto targetMapToImportRow =
303 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
304 targetMapToImportRow_dev);
305 auto Bcol2Ccol =
306 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
307 Bcol2Ccol_dev);
308 auto Icol2Ccol =
309 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
310 Icol2Ccol_dev);
311
312 // Sizes
313 RCP<const map_type> Ccolmap = C.getColMap();
314 size_t m = Aview.origMatrix->getLocalNumRows();
315 size_t n = Ccolmap->getLocalNumElements();
316
317 // Grab the Kokkos::SparseCrsMatrices & inner stuff
318 const KCRS Amat = Aview.origMatrix->getLocalMatrixHost();
319 const KCRS Bmat = Bview.origMatrix->getLocalMatrixHost();
320 const KCRS Cmat = C.getLocalMatrixHost();
321
322 c_lno_view_t Arowptr = Amat.graph.row_map,
323 Browptr = Bmat.graph.row_map,
324 Crowptr = Cmat.graph.row_map;
325 const lno_nnz_view_t Acolind = Amat.graph.entries,
326 Bcolind = Bmat.graph.entries,
327 Ccolind = Cmat.graph.entries;
328 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
329 scalar_view_t Cvals = Cmat.values;
330
331 c_lno_view_t Irowptr;
332 lno_nnz_view_t Icolind;
333 scalar_view_t Ivals;
334 if (!Bview.importMatrix.is_null()) {
335 auto lclB = Bview.importMatrix->getLocalMatrixHost();
336 Irowptr = lclB.graph.row_map;
337 Icolind = lclB.graph.entries;
338 Ivals = lclB.values;
339 }
340
341 auto MM2 = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix SerialCore - Compare"));
342
343 // Classic csr assembly (low memory edition)
344 // mfh 27 Sep 2016: The c_status array is an implementation detail
345 // of the local sparse matrix-matrix multiply routine.
346
347 // The status array will contain the index into colind where this entry was last deposited.
348 // c_status[i] < CSR_ip - not in the row yet
349 // c_status[i] >= CSR_ip - this is the entry where you can find the data
350 // We start with this filled with INVALID's indicating that there are no entries yet.
351 // Sadly, this complicates the code due to the fact that size_t's are unsigned.
352 std::vector<size_t> c_status(n, ST_INVALID);
353
354 // For each row of A/C
355 size_t CSR_ip = 0, OLD_ip = 0;
356 for (size_t i = 0; i < m; i++) {
357 // First fill the c_status array w/ locations where we're allowed to
358 // generate nonzeros for this row
359 OLD_ip = Crowptr[i];
360 CSR_ip = Crowptr[i + 1];
361 for (size_t k = OLD_ip; k < CSR_ip; k++) {
362 c_status[Ccolind[k]] = k;
363
364 // Reset values in the row of C
365 Cvals[k] = SC_ZERO;
366 }
367
368 for (size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
369 LO Aik = Acolind[k];
370 const SC Aval = Avals[k];
371 if (Aval == SC_ZERO)
372 continue;
373
374 if (targetMapToOrigRow[Aik] != LO_INVALID) {
375 // Local matrix
376 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
377
378 for (size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
379 LO Bkj = Bcolind[j];
380 LO Cij = Bcol2Ccol[Bkj];
381
382 const bool badInsert = c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip;
383 if (!badInsert)
384 Cvals[c_status[Cij]] += Aval * Bvals[j];
385 else if (throwOnInsert)
386 TEUCHOS_TEST_FOR_EXCEPTION(badInsert,
387 std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph "
388 << "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
389 }
390
391 } else {
392 // Remote matrix
393 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
394 for (size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
395 LO Ikj = Icolind[j];
396 LO Cij = Icol2Ccol[Ikj];
397
398 const bool badInsert = c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip;
399 if (!badInsert)
400 Cvals[c_status[Cij]] += Aval * Ivals[j];
401 else if (throwOnInsert)
402 TEUCHOS_TEST_FOR_EXCEPTION(badInsert,
403 std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph "
404 << "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
405 }
406 }
407 }
408 }
409
410 C.fillComplete(C.getDomainMap(), C.getRangeMap());
411}
412
413/*********************************************************************************************************/
414template <class Scalar,
415 class LocalOrdinal,
416 class GlobalOrdinal,
417 class LocalOrdinalViewType>
418void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
419 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
420 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
421 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
422 const LocalOrdinalViewType& Acol2Brow,
423 const LocalOrdinalViewType& Acol2Irow,
424 const LocalOrdinalViewType& Bcol2Ccol,
425 const LocalOrdinalViewType& Icol2Ccol,
426 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
427 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
428 const std::string& label,
429 const Teuchos::RCP<Teuchos::ParameterList>& params) {
430 // Node-specific code
431 using Teuchos::RCP;
432
433 // Options
434 // int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer // unreferenced
435 std::string myalg("KK");
436 if (!params.is_null()) {
437 if (params->isParameter("hip: jacobi algorithm"))
438 myalg = params->get("hip: jacobi algorithm", myalg);
439 }
440
441 if (myalg == "MSAK") {
442 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
443 } else if (myalg == "KK") {
444 jacobi_A_B_newmatrix_KokkosKernels(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
445 } else {
446 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
447 }
448
449 Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Newmatrix HIPESFC");
450
451 // Final Fillcomplete
452 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
453 labelList->set("Timer Label", label);
454 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
455
456 // NOTE: MSAK already fillCompletes, so we have to check here
457 if (!C.isFillComplete()) {
458 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
459 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
460 }
461}
462
463/*********************************************************************************************************/
464template <class Scalar,
465 class LocalOrdinal,
466 class GlobalOrdinal,
467 class LocalOrdinalViewType>
468void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
469 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
470 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
471 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
472 const LocalOrdinalViewType& targetMapToOrigRow_dev,
473 const LocalOrdinalViewType& targetMapToImportRow_dev,
474 const LocalOrdinalViewType& Bcol2Ccol_dev,
475 const LocalOrdinalViewType& Icol2Ccol_dev,
476 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
477 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
478 const std::string& label,
479 const Teuchos::RCP<Teuchos::ParameterList>& params) {
480 // FIXME: Right now, this is a cut-and-paste of the serial kernel
481 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
482
483 using Teuchos::RCP;
484 using Teuchos::rcp;
485 RCP<Tpetra::Details::ProfilingRegion> MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Reuse HIPCore"));
486
487 // Lots and lots of typedefs
488 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
489 typedef typename KCRS::StaticCrsGraphType graph_t;
490 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
491 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
492 typedef typename KCRS::values_type::non_const_type scalar_view_t;
493 typedef typename scalar_view_t::memory_space scalar_memory_space;
494
495 typedef Scalar SC;
496 typedef LocalOrdinal LO;
497 typedef GlobalOrdinal GO;
498 typedef Node NO;
499 typedef Map<LO, GO, NO> map_type;
500 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
501 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
502 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
503
504 // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
505 // KDDKDD UVM Ideally, this function would run on device and use
506 // KDDKDD UVM KokkosKernels instead of this host implementation.
507 auto targetMapToOrigRow =
508 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
509 targetMapToOrigRow_dev);
510 auto targetMapToImportRow =
511 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
512 targetMapToImportRow_dev);
513 auto Bcol2Ccol =
514 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
515 Bcol2Ccol_dev);
516 auto Icol2Ccol =
517 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
518 Icol2Ccol_dev);
519
520 // Sizes
521 RCP<const map_type> Ccolmap = C.getColMap();
522 size_t m = Aview.origMatrix->getLocalNumRows();
523 size_t n = Ccolmap->getLocalNumElements();
524
525 // Grab the Kokkos::SparseCrsMatrices & inner stuff
526 const KCRS Amat = Aview.origMatrix->getLocalMatrixHost();
527 const KCRS Bmat = Bview.origMatrix->getLocalMatrixHost();
528 const KCRS Cmat = C.getLocalMatrixHost();
529
530 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
531 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
532 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
533 scalar_view_t Cvals = Cmat.values;
534
535 c_lno_view_t Irowptr;
536 lno_nnz_view_t Icolind;
537 scalar_view_t Ivals;
538 if (!Bview.importMatrix.is_null()) {
539 auto lclB = Bview.importMatrix->getLocalMatrixHost();
540 Irowptr = lclB.graph.row_map;
541 Icolind = lclB.graph.entries;
542 Ivals = lclB.values;
543 }
544
545 // Jacobi-specific inner stuff
546 auto Dvals =
547 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
548
549 RCP<Tpetra::Details::ProfilingRegion> MM2 = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Reuse HIPCore - Compare"));
550
551 // The status array will contain the index into colind where this entry was last deposited.
552 // c_status[i] < CSR_ip - not in the row yet
553 // c_status[i] >= CSR_ip - this is the entry where you can find the data
554 // We start with this filled with INVALID's indicating that there are no entries yet.
555 // Sadly, this complicates the code due to the fact that size_t's are unsigned.
556 std::vector<size_t> c_status(n, ST_INVALID);
557
558 // For each row of A/C
559 size_t CSR_ip = 0, OLD_ip = 0;
560 for (size_t i = 0; i < m; i++) {
561 // First fill the c_status array w/ locations where we're allowed to
562 // generate nonzeros for this row
563 OLD_ip = Crowptr[i];
564 CSR_ip = Crowptr[i + 1];
565 for (size_t k = OLD_ip; k < CSR_ip; k++) {
566 c_status[Ccolind[k]] = k;
567
568 // Reset values in the row of C
569 Cvals[k] = SC_ZERO;
570 }
571
572 SC minusOmegaDval = -omega * Dvals(i, 0);
573
574 // Entries of B
575 for (size_t j = Browptr[i]; j < Browptr[i + 1]; j++) {
576 Scalar Bval = Bvals[j];
577 if (Bval == SC_ZERO)
578 continue;
579 LO Bij = Bcolind[j];
580 LO Cij = Bcol2Ccol[Bij];
581
582 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
583 std::runtime_error, "Trying to insert a new entry into a static graph");
584
585 Cvals[c_status[Cij]] = Bvals[j];
586 }
587
588 // Entries of -omega * Dinv * A * B
589 for (size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
590 LO Aik = Acolind[k];
591 const SC Aval = Avals[k];
592 if (Aval == SC_ZERO)
593 continue;
594
595 if (targetMapToOrigRow[Aik] != LO_INVALID) {
596 // Local matrix
597 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
598
599 for (size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
600 LO Bkj = Bcolind[j];
601 LO Cij = Bcol2Ccol[Bkj];
602
603 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
604 std::runtime_error, "Trying to insert a new entry into a static graph");
605
606 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
607 }
608
609 } else {
610 // Remote matrix
611 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
612 for (size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
613 LO Ikj = Icolind[j];
614 LO Cij = Icol2Ccol[Ikj];
615
616 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
617 std::runtime_error, "Trying to insert a new entry into a static graph");
618
619 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
620 }
621 }
622 }
623 }
624
625 MM = Teuchos::null;
626 MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Reuse ESFC"));
627
628 C.fillComplete(C.getDomainMap(), C.getRangeMap());
629}
630
631/*********************************************************************************************************/
632template <class Scalar,
633 class LocalOrdinal,
634 class GlobalOrdinal,
635 class LocalOrdinalViewType>
636void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
637 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
638 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
639 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
640 const LocalOrdinalViewType& Acol2Brow,
641 const LocalOrdinalViewType& Acol2Irow,
642 const LocalOrdinalViewType& Bcol2Ccol,
643 const LocalOrdinalViewType& Icol2Ccol,
644 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
645 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
646 const std::string& label,
647 const Teuchos::RCP<Teuchos::ParameterList>& params) {
648 // Check if the diagonal entries exist in debug mode
649 const bool debug = Tpetra::Details::Behavior::debug();
650 if (debug) {
651 auto rowMap = Aview.origMatrix->getRowMap();
652 Tpetra::Vector<Scalar> diags(rowMap);
653 Aview.origMatrix->getLocalDiagCopy(diags);
654 size_t diagLength = rowMap->getLocalNumElements();
655 Teuchos::Array<Scalar> diagonal(diagLength);
656 diags.get1dCopy(diagonal());
657
658 for (size_t i = 0; i < diagLength; ++i) {
659 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
660 std::runtime_error,
661 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl
662 << "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
663 }
664 }
665
666 using Teuchos::RCP;
667 using Teuchos::rcp;
668 RCP<Tpetra::Details::ProfilingRegion> MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Newmatrix KokkosKernels"));
669
670 // Usings
671 using device_t = typename Tpetra::KokkosCompat::KokkosHIPWrapperNode::device_type;
673 using graph_t = typename matrix_t::StaticCrsGraphType;
674 using lno_view_t = typename graph_t::row_map_type::non_const_type;
675 using int_view_t = Kokkos::View<int*,
676 typename lno_view_t::array_layout,
677 typename lno_view_t::memory_space,
678 typename lno_view_t::memory_traits>;
679 using c_lno_view_t = typename graph_t::row_map_type::const_type;
680 using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
681 using scalar_view_t = typename matrix_t::values_type::non_const_type;
682
683 // KokkosKernels handle
684 using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
685 typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
686 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
687 using int_handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
688 typename int_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
689 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
690
691 // Merge the B and Bimport matrices
692 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
693
694 // Get the properties and arrays of input matrices
695 const matrix_t Amat = Aview.origMatrix->getLocalMatrixDevice();
696 const matrix_t Bmat = Bview.origMatrix->getLocalMatrixDevice();
697
698 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
699 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
700 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
701
702 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
703 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
704 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
705
706 // Arrays of the output matrix
707 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("row_mapC"), AnumRows + 1);
708 lno_nnz_view_t entriesC;
709 scalar_view_t valuesC;
710
711 // Options
712 int team_work_size = 16;
713 std::string myalg("SPGEMM_KK_MEMORY");
714 if (!params.is_null()) {
715 if (params->isParameter("hip: algorithm"))
716 myalg = params->get("hip: algorithm", myalg);
717 if (params->isParameter("hip: team work size"))
718 team_work_size = params->get("hip: team work size", team_work_size);
719 }
720
721 // Get the algorithm mode
722 std::string nodename("HIP");
723 std::string alg = nodename + std::string(" algorithm");
724 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
725 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
726
727 Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
728 const bool useIntRowptrs =
729 irph.shouldUseIntRowptrs() &&
730 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
731
732 const Scalar jacobiOmega = omega * Teuchos::ScalarTraits<Scalar>::one();
733
734 if (useIntRowptrs) {
735 int_handle_t kh;
736 kh.create_spgemm_handle(alg_enum);
737 kh.set_team_work_size(team_work_size);
738
739 int_view_t int_row_mapC(Kokkos::ViewAllocateWithoutInitializing("int_row_mapC"), AnumRows + 1);
740
741 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
742 auto Bint = irph.getIntRowptrMatrix(Bmerged);
743 {
744 Tpetra::Details::ProfilingRegion MM2("TpetraExt: Jacobi: Newmatrix KokkosKernels symbolic int");
745
746 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
747 Aint.graph.row_map, Aint.graph.entries, false,
748 Bint.graph.row_map, Bint.graph.entries, false,
749 int_row_mapC);
750 }
751
752 Tpetra::Details::ProfilingRegion MM2("TpetraExt: Jacobi: Newmatrix KokkosKernels numeric int");
753
754 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
755 if (c_nnz_size) {
756 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
757 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
758 }
759
760 // even though there is no TPL for this, we have to use the same handle that was used in the symbolic phase,
761 // so need to have a special int-typed call for this as well.
762 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
763 Aint.graph.row_map, Aint.graph.entries, Amat.values, false,
764 Bint.graph.row_map, Bint.graph.entries, Bint.values, false,
765 int_row_mapC, entriesC, valuesC,
766 jacobiOmega, Dinv.getLocalViewDevice(Access::ReadOnly));
767 // transfer the integer rowptrs back to the correct rowptr type
768 Kokkos::parallel_for(
769 int_row_mapC.size(), KOKKOS_LAMBDA(int i) { row_mapC(i) = int_row_mapC(i); });
770 kh.destroy_spgemm_handle();
771 } else {
772 handle_t kh;
773 kh.create_spgemm_handle(alg_enum);
774 kh.set_team_work_size(team_work_size);
775
776 {
777 Tpetra::Details::ProfilingRegion MM2("TpetraExt: Jacobi: Newmatrix KokkosKernels symbolic non-int");
778
779 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
780 Amat.graph.row_map, Amat.graph.entries, false,
781 Bmerged.graph.row_map, Bmerged.graph.entries, false,
782 row_mapC);
783 }
784
785 Tpetra::Details::ProfilingRegion MM2("TpetraExt: Jacobi: Newmatrix KokkosKernels numeric non-int");
786
787 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
788 if (c_nnz_size) {
789 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
790 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
791 }
792 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
793 Amat.graph.row_map, Amat.graph.entries, Amat.values, false,
794 Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false,
795 row_mapC, entriesC, valuesC,
796 jacobiOmega, Dinv.getLocalViewDevice(Access::ReadOnly));
797 kh.destroy_spgemm_handle();
798 }
799
800 MM = Teuchos::null;
801 MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Newmatrix HIPSort"));
802
803 // Sort & set values
804 if (params.is_null() || params->get("sort entries", true))
805 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
806 C.setAllValues(row_mapC, entriesC, valuesC);
807
808 MM = Teuchos::null;
809 MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Newmatrix HIPESFC"));
810
811 // Final Fillcomplete
812 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
813 labelList->set("Timer Label", label);
814 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
815 Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
816 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
817}
818
819} // namespace MMdetails
820} // namespace Tpetra
821
822#endif // HIP
823
824#endif
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Struct that holds views of the contents of a CrsMatrix.
Teuchos::RCP< const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > origMatrix
The original matrix.
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.