Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
TpetraExt_MatrixMatrix_HIP.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_MATRIXMATRIX_HIP_DEF_HPP
11#define TPETRA_MATRIXMATRIX_HIP_DEF_HPP
12
13#include "Tpetra_Details_IntRowPtrHelper.hpp"
14
15#ifdef HAVE_TPETRA_INST_HIP
16namespace Tpetra {
17namespace MMdetails {
18
19/*********************************************************************************************************/
20// MMM KernelWrappers for Partial Specialization to HIP
21template <class Scalar,
22 class LocalOrdinal,
23 class GlobalOrdinal,
24 class LocalOrdinalViewType>
25struct KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType> {
26 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
27 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
28 const LocalOrdinalViewType& Acol2Brow,
29 const LocalOrdinalViewType& Acol2Irow,
30 const LocalOrdinalViewType& Bcol2Ccol,
31 const LocalOrdinalViewType& Icol2Ccol,
32 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
33 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
34 const std::string& label = std::string(),
35 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
36
37 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
38 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
39 const LocalOrdinalViewType& Acol2Brow,
40 const LocalOrdinalViewType& Acol2Irow,
41 const LocalOrdinalViewType& Bcol2Ccol,
42 const LocalOrdinalViewType& Icol2Ccol,
43 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
44 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
45 const std::string& label = std::string(),
46 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
47};
48
49// Jacobi KernelWrappers for Partial Specialization to HIP
50template <class Scalar,
51 class LocalOrdinal,
52 class GlobalOrdinal, class LocalOrdinalViewType>
53struct KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType> {
54 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
55 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
56 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
57 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
58 const LocalOrdinalViewType& Acol2Brow,
59 const LocalOrdinalViewType& Acol2Irow,
60 const LocalOrdinalViewType& Bcol2Ccol,
61 const LocalOrdinalViewType& Icol2Ccol,
62 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
63 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
64 const std::string& label = std::string(),
65 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
66
67 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
68 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
69 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
70 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
71 const LocalOrdinalViewType& Acol2Brow,
72 const LocalOrdinalViewType& Acol2Irow,
73 const LocalOrdinalViewType& Bcol2Ccol,
74 const LocalOrdinalViewType& Icol2Ccol,
75 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
76 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
77 const std::string& label = std::string(),
78 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
79
80 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
81 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
82 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
83 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
84 const LocalOrdinalViewType& Acol2Brow,
85 const LocalOrdinalViewType& Acol2Irow,
86 const LocalOrdinalViewType& Bcol2Ccol,
87 const LocalOrdinalViewType& Icol2Ccol,
88 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
89 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
90 const std::string& label = std::string(),
91 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
92};
93
94/*********************************************************************************************************/
95// AB NewMatrix Kernel wrappers (KokkosKernels/HIP Version)
96template <class Scalar,
97 class LocalOrdinal,
98 class GlobalOrdinal,
99 class LocalOrdinalViewType>
100void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
101 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
102 const LocalOrdinalViewType& Acol2Brow,
103 const LocalOrdinalViewType& Acol2Irow,
104 const LocalOrdinalViewType& Bcol2Ccol,
105 const LocalOrdinalViewType& Icol2Ccol,
106 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
107 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
108 const std::string& label,
109 const Teuchos::RCP<Teuchos::ParameterList>& params) {
110 // Node-specific code
111 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
112 std::string nodename("HIP");
113
114 // Lots and lots of typedefs
115 using Teuchos::RCP;
116 using Teuchos::rcp;
118 typedef typename KCRS::device_type device_t;
119 typedef typename KCRS::StaticCrsGraphType graph_t;
120 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
121 using int_view_t = Kokkos::View<int*, typename lno_view_t::array_layout, typename lno_view_t::memory_space, typename lno_view_t::memory_traits>;
122 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
123 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
124 typedef typename KCRS::values_type::non_const_type scalar_view_t;
125 // typedef typename graph_t::row_map_type::const_type lno_view_t_const;
126
127 RCP<Tpetra::Details::ProfilingRegion> MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix HIPWrapper"));
128
129 // Options
130 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
131 std::string myalg("SPGEMM_KK_MEMORY");
132 if (!params.is_null()) {
133 if (params->isParameter("hip: algorithm"))
134 myalg = params->get("hip: algorithm", myalg);
135 if (params->isParameter("hip: team work size"))
136 team_work_size = params->get("hip: team work size", team_work_size);
137 }
138
139 // KokkosKernelsHandle
140 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
141 typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
142 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>
143 KernelHandle;
144 using IntKernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<
145 typename int_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
146 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
147
148 // Grab the Kokkos::SparseCrsMatrices
149 const KCRS& Amat = Aview.origMatrix->getLocalMatrixDevice();
150 const KCRS& Bmat = Bview.origMatrix->getLocalMatrixDevice();
151
152 c_lno_view_t Arowptr = Amat.graph.row_map,
153 Browptr = Bmat.graph.row_map;
154 const lno_nnz_view_t Acolind = Amat.graph.entries,
155 Bcolind = Bmat.graph.entries;
156 const scalar_view_t Avals = Amat.values,
157 Bvals = Bmat.values;
158
159 // Get the algorithm mode
160 std::string alg = nodename + std::string(" algorithm");
161 // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
162 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
163 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
164
165 // Merge the B and Bimport matrices
166 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
167
168 MM = Teuchos::null;
169 MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix HIP Core"));
170
171 // Do the multiply on whatever we've got
172 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
173 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
174 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
175
176 // regardless of whether integer row ptrs are used, need to ultimately produce the row pointer, entries, and values of the expected types
177 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_lno_row"), AnumRows + 1);
178 lno_nnz_view_t entriesC;
179 scalar_view_t valuesC;
180
181 Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
182 const bool useIntRowptrs =
183 irph.shouldUseIntRowptrs() &&
184 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
185
186 if (useIntRowptrs) {
187 IntKernelHandle kh;
188 kh.create_spgemm_handle(alg_enum);
189 kh.set_team_work_size(team_work_size);
190
191 int_view_t int_row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_int_row"), AnumRows + 1);
192
193 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
194 auto Bint = irph.getIntRowptrMatrix(Bmerged);
195
196 {
197 Tpetra::Details::ProfilingRegion MM2("TpetraExt: MMM: Newmatrix KokkosKernels symbolic int");
198 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Aint.graph.row_map, Aint.graph.entries, false, Bint.graph.row_map, Bint.graph.entries, false, int_row_mapC);
199 }
200 Tpetra::Details::ProfilingRegion MM2("TpetraExt: MMM: Newmatrix KokkosKernels numeric int");
201
202 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
203 if (c_nnz_size) {
204 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
205 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
206 }
207 KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Aint.graph.row_map, Aint.graph.entries, Aint.values, false, Bint.graph.row_map, Bint.graph.entries, Bint.values, false, int_row_mapC, entriesC, valuesC);
208 // transfer the integer rowptrs back to the correct rowptr type
209 Kokkos::parallel_for(
210 int_row_mapC.size(), KOKKOS_LAMBDA(int i) { row_mapC(i) = int_row_mapC(i); });
211 kh.destroy_spgemm_handle();
212 } else {
213 KernelHandle kh;
214 kh.create_spgemm_handle(alg_enum);
215 kh.set_team_work_size(team_work_size);
216
217 {
218 Tpetra::Details::ProfilingRegion MM2("TpetraExt: MMM: Newmatrix KokkosKernels symbolic non-int");
219 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Amat.graph.row_map, Amat.graph.entries, false, Bmerged.graph.row_map, Bmerged.graph.entries, false, row_mapC);
220 }
221
222 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
223 if (c_nnz_size) {
224 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
225 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
226 }
227
228 Tpetra::Details::ProfilingRegion MM2("TpetraExt: MMM: Newmatrix KokkosKernels numeric non-int");
229 KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Amat.graph.row_map, Amat.graph.entries, Amat.values, false, Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false, row_mapC, entriesC, valuesC);
230 kh.destroy_spgemm_handle();
231 }
232
233 MM = Teuchos::null;
234 MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix HIP Sort"));
235
236 // Sort & set values
237 if (params.is_null() || params->get("sort entries", true))
238 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
239 C.setAllValues(row_mapC, entriesC, valuesC);
240
241 MM = Teuchos::null;
242 MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix HIP ESFC"));
243
244 // Final Fillcomplete
245 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
246 labelList->set("Timer Label", label);
247 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
248 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
249 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
250}
251
252/*********************************************************************************************************/
253template <class Scalar,
254 class LocalOrdinal,
255 class GlobalOrdinal,
256 class LocalOrdinalViewType>
257void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
258 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
259 const LocalOrdinalViewType& targetMapToOrigRow_dev,
260 const LocalOrdinalViewType& targetMapToImportRow_dev,
261 const LocalOrdinalViewType& Bcol2Ccol_dev,
262 const LocalOrdinalViewType& Icol2Ccol_dev,
263 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
264 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
265 const std::string& label,
266 const Teuchos::RCP<Teuchos::ParameterList>& params) {
267 // FIXME: Right now, this is a cut-and-paste of the serial kernel
268 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
269 using Teuchos::RCP;
270 using Teuchos::rcp;
271
272 RCP<Tpetra::Details::ProfilingRegion> MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Reuse SerialCore"));
273
274 // Lots and lots of typedefs
275 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
276 typedef typename KCRS::StaticCrsGraphType graph_t;
277 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
278 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
279 typedef typename KCRS::values_type::non_const_type scalar_view_t;
280
281 typedef Scalar SC;
282 typedef LocalOrdinal LO;
283 typedef GlobalOrdinal GO;
284 typedef Node NO;
285 typedef Map<LO, GO, NO> map_type;
286 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
287 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
288 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
289
290 // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
291 // KDDKDD UVM Ideally, this function would run on device and use
292 // KDDKDD UVM KokkosKernels instead of this host implementation.
293 auto targetMapToOrigRow =
294 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
295 targetMapToOrigRow_dev);
296 auto targetMapToImportRow =
297 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
298 targetMapToImportRow_dev);
299 auto Bcol2Ccol =
300 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
301 Bcol2Ccol_dev);
302 auto Icol2Ccol =
303 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
304 Icol2Ccol_dev);
305
306 // Sizes
307 RCP<const map_type> Ccolmap = C.getColMap();
308 size_t m = Aview.origMatrix->getLocalNumRows();
309 size_t n = Ccolmap->getLocalNumElements();
310
311 // Grab the Kokkos::SparseCrsMatrices & inner stuff
312 const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
313 const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
314 const KCRS& Cmat = C.getLocalMatrixHost();
315
316 c_lno_view_t Arowptr = Amat.graph.row_map,
317 Browptr = Bmat.graph.row_map,
318 Crowptr = Cmat.graph.row_map;
319 const lno_nnz_view_t Acolind = Amat.graph.entries,
320 Bcolind = Bmat.graph.entries,
321 Ccolind = Cmat.graph.entries;
322 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
323 scalar_view_t Cvals = Cmat.values;
324
325 c_lno_view_t Irowptr;
326 lno_nnz_view_t Icolind;
327 scalar_view_t Ivals;
328 if (!Bview.importMatrix.is_null()) {
329 auto lclB = Bview.importMatrix->getLocalMatrixHost();
330 Irowptr = lclB.graph.row_map;
331 Icolind = lclB.graph.entries;
332 Ivals = lclB.values;
333 }
334
335 auto MM2 = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix SerialCore - Compare"));
336
337 // Classic csr assembly (low memory edition)
338 // mfh 27 Sep 2016: The c_status array is an implementation detail
339 // of the local sparse matrix-matrix multiply routine.
340
341 // The status array will contain the index into colind where this entry was last deposited.
342 // c_status[i] < CSR_ip - not in the row yet
343 // c_status[i] >= CSR_ip - this is the entry where you can find the data
344 // We start with this filled with INVALID's indicating that there are no entries yet.
345 // Sadly, this complicates the code due to the fact that size_t's are unsigned.
346 std::vector<size_t> c_status(n, ST_INVALID);
347
348 // For each row of A/C
349 size_t CSR_ip = 0, OLD_ip = 0;
350 for (size_t i = 0; i < m; i++) {
351 // First fill the c_status array w/ locations where we're allowed to
352 // generate nonzeros for this row
353 OLD_ip = Crowptr[i];
354 CSR_ip = Crowptr[i + 1];
355 for (size_t k = OLD_ip; k < CSR_ip; k++) {
356 c_status[Ccolind[k]] = k;
357
358 // Reset values in the row of C
359 Cvals[k] = SC_ZERO;
360 }
361
362 for (size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
363 LO Aik = Acolind[k];
364 const SC Aval = Avals[k];
365 if (Aval == SC_ZERO)
366 continue;
367
368 if (targetMapToOrigRow[Aik] != LO_INVALID) {
369 // Local matrix
370 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
371
372 for (size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
373 LO Bkj = Bcolind[j];
374 LO Cij = Bcol2Ccol[Bkj];
375
376 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
377 std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph "
378 << "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
379
380 Cvals[c_status[Cij]] += Aval * Bvals[j];
381 }
382
383 } else {
384 // Remote matrix
385 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
386 for (size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
387 LO Ikj = Icolind[j];
388 LO Cij = Icol2Ccol[Ikj];
389
390 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
391 std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph "
392 << "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
393
394 Cvals[c_status[Cij]] += Aval * Ivals[j];
395 }
396 }
397 }
398 }
399
400 C.fillComplete(C.getDomainMap(), C.getRangeMap());
401}
402
403/*********************************************************************************************************/
404template <class Scalar,
405 class LocalOrdinal,
406 class GlobalOrdinal,
407 class LocalOrdinalViewType>
408void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
409 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
410 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
411 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
412 const LocalOrdinalViewType& Acol2Brow,
413 const LocalOrdinalViewType& Acol2Irow,
414 const LocalOrdinalViewType& Bcol2Ccol,
415 const LocalOrdinalViewType& Icol2Ccol,
416 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
417 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
418 const std::string& label,
419 const Teuchos::RCP<Teuchos::ParameterList>& params) {
420 // Node-specific code
421 using Teuchos::RCP;
422
423 // Options
424 // int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer // unreferenced
425 std::string myalg("KK");
426 if (!params.is_null()) {
427 if (params->isParameter("hip: jacobi algorithm"))
428 myalg = params->get("hip: jacobi algorithm", myalg);
429 }
430
431 if (myalg == "MSAK") {
432 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
433 } else if (myalg == "KK") {
434 jacobi_A_B_newmatrix_KokkosKernels(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
435 } else {
436 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
437 }
438
439 Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Newmatrix HIPESFC");
440
441 // Final Fillcomplete
442 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
443 labelList->set("Timer Label", label);
444 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
445
446 // NOTE: MSAK already fillCompletes, so we have to check here
447 if (!C.isFillComplete()) {
448 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
449 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
450 }
451}
452
453/*********************************************************************************************************/
454template <class Scalar,
455 class LocalOrdinal,
456 class GlobalOrdinal,
457 class LocalOrdinalViewType>
458void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
459 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
460 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
461 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
462 const LocalOrdinalViewType& targetMapToOrigRow_dev,
463 const LocalOrdinalViewType& targetMapToImportRow_dev,
464 const LocalOrdinalViewType& Bcol2Ccol_dev,
465 const LocalOrdinalViewType& Icol2Ccol_dev,
466 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
467 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
468 const std::string& label,
469 const Teuchos::RCP<Teuchos::ParameterList>& params) {
470 // FIXME: Right now, this is a cut-and-paste of the serial kernel
471 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
472
473 using Teuchos::RCP;
474 using Teuchos::rcp;
475 RCP<Tpetra::Details::ProfilingRegion> MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Reuse HIPCore"));
476
477 // Lots and lots of typedefs
478 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
479 typedef typename KCRS::StaticCrsGraphType graph_t;
480 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
481 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
482 typedef typename KCRS::values_type::non_const_type scalar_view_t;
483 typedef typename scalar_view_t::memory_space scalar_memory_space;
484
485 typedef Scalar SC;
486 typedef LocalOrdinal LO;
487 typedef GlobalOrdinal GO;
488 typedef Node NO;
489 typedef Map<LO, GO, NO> map_type;
490 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
491 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
492 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
493
494 // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
495 // KDDKDD UVM Ideally, this function would run on device and use
496 // KDDKDD UVM KokkosKernels instead of this host implementation.
497 auto targetMapToOrigRow =
498 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
499 targetMapToOrigRow_dev);
500 auto targetMapToImportRow =
501 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
502 targetMapToImportRow_dev);
503 auto Bcol2Ccol =
504 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
505 Bcol2Ccol_dev);
506 auto Icol2Ccol =
507 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
508 Icol2Ccol_dev);
509
510 // Sizes
511 RCP<const map_type> Ccolmap = C.getColMap();
512 size_t m = Aview.origMatrix->getLocalNumRows();
513 size_t n = Ccolmap->getLocalNumElements();
514
515 // Grab the Kokkos::SparseCrsMatrices & inner stuff
516 const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
517 const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
518 const KCRS& Cmat = C.getLocalMatrixHost();
519
520 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
521 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
522 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
523 scalar_view_t Cvals = Cmat.values;
524
525 c_lno_view_t Irowptr;
526 lno_nnz_view_t Icolind;
527 scalar_view_t Ivals;
528 if (!Bview.importMatrix.is_null()) {
529 auto lclB = Bview.importMatrix->getLocalMatrixHost();
530 Irowptr = lclB.graph.row_map;
531 Icolind = lclB.graph.entries;
532 Ivals = lclB.values;
533 }
534
535 // Jacobi-specific inner stuff
536 auto Dvals =
537 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
538
539 RCP<Tpetra::Details::ProfilingRegion> MM2 = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Reuse HIPCore - Compare"));
540
541 // The status array will contain the index into colind where this entry was last deposited.
542 // c_status[i] < CSR_ip - not in the row yet
543 // c_status[i] >= CSR_ip - this is the entry where you can find the data
544 // We start with this filled with INVALID's indicating that there are no entries yet.
545 // Sadly, this complicates the code due to the fact that size_t's are unsigned.
546 std::vector<size_t> c_status(n, ST_INVALID);
547
548 // For each row of A/C
549 size_t CSR_ip = 0, OLD_ip = 0;
550 for (size_t i = 0; i < m; i++) {
551 // First fill the c_status array w/ locations where we're allowed to
552 // generate nonzeros for this row
553 OLD_ip = Crowptr[i];
554 CSR_ip = Crowptr[i + 1];
555 for (size_t k = OLD_ip; k < CSR_ip; k++) {
556 c_status[Ccolind[k]] = k;
557
558 // Reset values in the row of C
559 Cvals[k] = SC_ZERO;
560 }
561
562 SC minusOmegaDval = -omega * Dvals(i, 0);
563
564 // Entries of B
565 for (size_t j = Browptr[i]; j < Browptr[i + 1]; j++) {
566 Scalar Bval = Bvals[j];
567 if (Bval == SC_ZERO)
568 continue;
569 LO Bij = Bcolind[j];
570 LO Cij = Bcol2Ccol[Bij];
571
572 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
573 std::runtime_error, "Trying to insert a new entry into a static graph");
574
575 Cvals[c_status[Cij]] = Bvals[j];
576 }
577
578 // Entries of -omega * Dinv * A * B
579 for (size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
580 LO Aik = Acolind[k];
581 const SC Aval = Avals[k];
582 if (Aval == SC_ZERO)
583 continue;
584
585 if (targetMapToOrigRow[Aik] != LO_INVALID) {
586 // Local matrix
587 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
588
589 for (size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
590 LO Bkj = Bcolind[j];
591 LO Cij = Bcol2Ccol[Bkj];
592
593 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
594 std::runtime_error, "Trying to insert a new entry into a static graph");
595
596 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
597 }
598
599 } else {
600 // Remote matrix
601 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
602 for (size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
603 LO Ikj = Icolind[j];
604 LO Cij = Icol2Ccol[Ikj];
605
606 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
607 std::runtime_error, "Trying to insert a new entry into a static graph");
608
609 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
610 }
611 }
612 }
613 }
614
615 MM = Teuchos::null;
616 MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Reuse ESFC"));
617
618 C.fillComplete(C.getDomainMap(), C.getRangeMap());
619}
620
621/*********************************************************************************************************/
622template <class Scalar,
623 class LocalOrdinal,
624 class GlobalOrdinal,
625 class LocalOrdinalViewType>
626void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
627 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
628 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
629 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
630 const LocalOrdinalViewType& Acol2Brow,
631 const LocalOrdinalViewType& Acol2Irow,
632 const LocalOrdinalViewType& Bcol2Ccol,
633 const LocalOrdinalViewType& Icol2Ccol,
634 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
635 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
636 const std::string& label,
637 const Teuchos::RCP<Teuchos::ParameterList>& params) {
638 // Check if the diagonal entries exist in debug mode
639 const bool debug = Tpetra::Details::Behavior::debug();
640 if (debug) {
641 auto rowMap = Aview.origMatrix->getRowMap();
642 Tpetra::Vector<Scalar> diags(rowMap);
643 Aview.origMatrix->getLocalDiagCopy(diags);
644 size_t diagLength = rowMap->getLocalNumElements();
645 Teuchos::Array<Scalar> diagonal(diagLength);
646 diags.get1dCopy(diagonal());
647
648 for (size_t i = 0; i < diagLength; ++i) {
649 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
650 std::runtime_error,
651 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl
652 << "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
653 }
654 }
655
656 using Teuchos::RCP;
657 using Teuchos::rcp;
658 RCP<Tpetra::Details::ProfilingRegion> MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Newmatrix KokkosKernels"));
659
660 // Usings
661 using device_t = typename Tpetra::KokkosCompat::KokkosHIPWrapperNode::device_type;
663 using graph_t = typename matrix_t::StaticCrsGraphType;
664 using lno_view_t = typename graph_t::row_map_type::non_const_type;
665 using int_view_t = Kokkos::View<int*,
666 typename lno_view_t::array_layout,
667 typename lno_view_t::memory_space,
668 typename lno_view_t::memory_traits>;
669 using c_lno_view_t = typename graph_t::row_map_type::const_type;
670 using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
671 using scalar_view_t = typename matrix_t::values_type::non_const_type;
672
673 // KokkosKernels handle
674 using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
675 typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
676 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
677 using int_handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
678 typename int_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
679 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
680
681 // Merge the B and Bimport matrices
682 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
683
684 // Get the properties and arrays of input matrices
685 const matrix_t& Amat = Aview.origMatrix->getLocalMatrixDevice();
686 const matrix_t& Bmat = Bview.origMatrix->getLocalMatrixDevice();
687
688 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
689 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
690 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
691
692 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
693 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
694 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
695
696 // Arrays of the output matrix
697 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("row_mapC"), AnumRows + 1);
698 lno_nnz_view_t entriesC;
699 scalar_view_t valuesC;
700
701 // Options
702 int team_work_size = 16;
703 std::string myalg("SPGEMM_KK_MEMORY");
704 if (!params.is_null()) {
705 if (params->isParameter("hip: algorithm"))
706 myalg = params->get("hip: algorithm", myalg);
707 if (params->isParameter("hip: team work size"))
708 team_work_size = params->get("hip: team work size", team_work_size);
709 }
710
711 // Get the algorithm mode
712 std::string nodename("HIP");
713 std::string alg = nodename + std::string(" algorithm");
714 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
715 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
716
717 Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
718 const bool useIntRowptrs =
719 irph.shouldUseIntRowptrs() &&
720 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
721
722 if (useIntRowptrs) {
723 int_handle_t kh;
724 kh.create_spgemm_handle(alg_enum);
725 kh.set_team_work_size(team_work_size);
726
727 int_view_t int_row_mapC(Kokkos::ViewAllocateWithoutInitializing("int_row_mapC"), AnumRows + 1);
728
729 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
730 auto Bint = irph.getIntRowptrMatrix(Bmerged);
731 {
732 Tpetra::Details::ProfilingRegion MM2("TpetraExt: Jacobi: Newmatrix KokkosKernels symbolic int");
733
734 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
735 Aint.graph.row_map, Aint.graph.entries, false,
736 Bint.graph.row_map, Bint.graph.entries, false,
737 int_row_mapC);
738 }
739
740 Tpetra::Details::ProfilingRegion MM2("TpetraExt: Jacobi: Newmatrix KokkosKernels numeric int");
741
742 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
743 if (c_nnz_size) {
744 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
745 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
746 }
747
748 // even though there is no TPL for this, we have to use the same handle that was used in the symbolic phase,
749 // so need to have a special int-typed call for this as well.
750 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
751 Aint.graph.row_map, Aint.graph.entries, Amat.values, false,
752 Bint.graph.row_map, Bint.graph.entries, Bint.values, false,
753 int_row_mapC, entriesC, valuesC,
754 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
755 // transfer the integer rowptrs back to the correct rowptr type
756 Kokkos::parallel_for(
757 int_row_mapC.size(), KOKKOS_LAMBDA(int i) { row_mapC(i) = int_row_mapC(i); });
758 kh.destroy_spgemm_handle();
759 } else {
760 handle_t kh;
761 kh.create_spgemm_handle(alg_enum);
762 kh.set_team_work_size(team_work_size);
763
764 {
765 Tpetra::Details::ProfilingRegion MM2("TpetraExt: Jacobi: Newmatrix KokkosKernels symbolic non-int");
766
767 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
768 Amat.graph.row_map, Amat.graph.entries, false,
769 Bmerged.graph.row_map, Bmerged.graph.entries, false,
770 row_mapC);
771 }
772
773 Tpetra::Details::ProfilingRegion MM2("TpetraExt: Jacobi: Newmatrix KokkosKernels numeric non-int");
774
775 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
776 if (c_nnz_size) {
777 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
778 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
779 }
780 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
781 Amat.graph.row_map, Amat.graph.entries, Amat.values, false,
782 Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false,
783 row_mapC, entriesC, valuesC,
784 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
785 kh.destroy_spgemm_handle();
786 }
787
788 MM = Teuchos::null;
789 MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Newmatrix HIPSort"));
790
791 // Sort & set values
792 if (params.is_null() || params->get("sort entries", true))
793 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
794 C.setAllValues(row_mapC, entriesC, valuesC);
795
796 MM = Teuchos::null;
797 MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Newmatrix HIPESFC"));
798
799 // Final Fillcomplete
800 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
801 labelList->set("Timer Label", label);
802 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
803 Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
804 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
805}
806
807} // namespace MMdetails
808} // namespace Tpetra
809
810#endif // HIP
811
812#endif
Struct that holds views of the contents of a CrsMatrix.
Teuchos::RCP< const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > origMatrix
The original matrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.