Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
TpetraExt_MatrixMatrix_OpenMP.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
11#define TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
12
13#ifdef HAVE_TPETRA_INST_OPENMP
14namespace Tpetra {
15namespace MMdetails {
16
17/*********************************************************************************************************/
18// MMM KernelWrappers for Partial Specialization to OpenMP
19template <class Scalar,
20 class LocalOrdinal,
21 class GlobalOrdinal, class LocalOrdinalViewType>
22struct KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
23 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
24 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
25 const LocalOrdinalViewType& Acol2Brow,
26 const LocalOrdinalViewType& Acol2Irow,
27 const LocalOrdinalViewType& Bcol2Ccol,
28 const LocalOrdinalViewType& Icol2Ccol,
29 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
30 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
31 const std::string& label = std::string(),
32 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
33
34 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
35 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
36 const LocalOrdinalViewType& Acol2Brow,
37 const LocalOrdinalViewType& Acol2Irow,
38 const LocalOrdinalViewType& Bcol2Ccol,
39 const LocalOrdinalViewType& Icol2Ccol,
40 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
41 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
42 const std::string& label = std::string(),
43 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
44};
45
46// Jacobi KernelWrappers for Partial Specialization to OpenMP
47template <class Scalar,
48 class LocalOrdinal,
49 class GlobalOrdinal, class LocalOrdinalViewType>
50struct KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
51 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
52 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
53 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
54 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
55 const LocalOrdinalViewType& Acol2Brow,
56 const LocalOrdinalViewType& Acol2Irow,
57 const LocalOrdinalViewType& Bcol2Ccol,
58 const LocalOrdinalViewType& Icol2Ccol,
59 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
60 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
61 const std::string& label = std::string(),
62 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
63
64 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
65 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
66 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
67 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
68 const LocalOrdinalViewType& Acol2Brow,
69 const LocalOrdinalViewType& Acol2Irow,
70 const LocalOrdinalViewType& Bcol2Ccol,
71 const LocalOrdinalViewType& Icol2Ccol,
72 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
73 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
74 const std::string& label = std::string(),
75 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
76
77 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
78 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
79 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
80 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
81 const LocalOrdinalViewType& Acol2Brow,
82 const LocalOrdinalViewType& Acol2Irow,
83 const LocalOrdinalViewType& Bcol2Ccol,
84 const LocalOrdinalViewType& Icol2Ccol,
85 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
86 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
87 const std::string& label = std::string(),
88 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
89};
90
91// Triple-Product KernelWrappers for Partial Specialization to OpenMP
92template <class Scalar,
93 class LocalOrdinal,
94 class GlobalOrdinal, class LocalOrdinalViewType>
95struct KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
96 static inline void mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
97 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
98 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
99 const LocalOrdinalViewType& Acol2Prow,
100 const LocalOrdinalViewType& Acol2PIrow,
101 const LocalOrdinalViewType& Pcol2Ccol,
102 const LocalOrdinalViewType& PIcol2Ccol,
103 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
104 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
105 const std::string& label = std::string(),
106 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
107
108 static inline void mult_R_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
109 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
110 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
111 const LocalOrdinalViewType& Acol2Prow,
112 const LocalOrdinalViewType& Acol2PIrow,
113 const LocalOrdinalViewType& Pcol2Ccol,
114 const LocalOrdinalViewType& PIcol2Ccol,
115 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
116 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
117 const std::string& label = std::string(),
118 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
119
120 static inline void mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
121 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
122 const LocalOrdinalViewType& Acol2Prow,
123 const LocalOrdinalViewType& Acol2PIrow,
124 const LocalOrdinalViewType& Pcol2Ccol,
125 const LocalOrdinalViewType& PIcol2Ccol,
126 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
127 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
128 const std::string& label = std::string(),
129 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
130
131 static inline void mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
132 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
133 const LocalOrdinalViewType& Acol2Prow,
134 const LocalOrdinalViewType& Acol2PIrow,
135 const LocalOrdinalViewType& Pcol2Ccol,
136 const LocalOrdinalViewType& PIcol2Ccol,
137 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
138 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
139 const std::string& label = std::string(),
140 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
141};
142
143/*********************************************************************************************************/
144template <class Scalar,
145 class LocalOrdinal,
146 class GlobalOrdinal,
147 class LocalOrdinalViewType>
148void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
149 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
150 const LocalOrdinalViewType& Acol2Brow,
151 const LocalOrdinalViewType& Acol2Irow,
152 const LocalOrdinalViewType& Bcol2Ccol,
153 const LocalOrdinalViewType& Icol2Ccol,
154 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
155 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
156 const std::string& label,
157 const Teuchos::RCP<Teuchos::ParameterList>& params) {
158#ifdef HAVE_TPETRA_MMM_TIMINGS
159 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
160 using Teuchos::TimeMonitor;
161 Teuchos::RCP<TimeMonitor> MM;
162#endif
163
164 // Node-specific code
165 std::string nodename("OpenMP");
166
167 // Lots and lots of typedefs
168 using Teuchos::RCP;
170 typedef typename KCRS::device_type device_t;
171 typedef typename KCRS::StaticCrsGraphType graph_t;
172 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
173 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
174 typedef typename KCRS::values_type::non_const_type scalar_view_t;
175
176 // Options
177 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
178 std::string myalg("SPGEMM_KK_MEMORY");
179
180 if (!params.is_null()) {
181 if (params->isParameter("openmp: algorithm"))
182 myalg = params->get("openmp: algorithm", myalg);
183 if (params->isParameter("openmp: team work size"))
184 team_work_size = params->get("openmp: team work size", team_work_size);
185 }
186
187 if (myalg == "LTG") {
188 // Use the LTG kernel if requested
189 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_newmatrix_LowThreadGustavsonKernel(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
190 } else {
191 // Use the Kokkos-Kernels OpenMP Kernel
192#ifdef HAVE_TPETRA_MMM_TIMINGS
193 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPWrapper"))));
194#endif
195 // KokkosKernelsHandle
196 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
197 typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
198 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>
199 KernelHandle;
200
201 // Grab the Kokkos::SparseCrsMatrices
202 const KCRS Ak = Aview.origMatrix->getLocalMatrixDevice();
203 // const KCRS & Bk = Bview.origMatrix->getLocalMatrixDevice();
204
205 // Get the algorithm mode
206 std::string alg = nodename + std::string(" algorithm");
207 // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
208 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
209 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
210
211 // Merge the B and Bimport matrices
212 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
213
214#ifdef HAVE_TPETRA_MMM_TIMINGS
215 MM = Teuchos::null;
216 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPCore"))));
217#endif
218
219 // Do the multiply on whatever we've got
220 typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
221 // typename KernelHandle::nnz_lno_t BnumRows = Bmerged->numRows();
222 // typename KernelHandle::nnz_lno_t BnumCols = Bmerged->numCols();
223 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
224 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
225
226 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
227 lno_nnz_view_t entriesC;
228 scalar_view_t valuesC;
229 KernelHandle kh;
230 kh.create_spgemm_handle(alg_enum);
231 kh.set_team_work_size(team_work_size);
232 // KokkosSparse::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged->graph.row_map,Bmerged->graph.entries,false,row_mapC);
233 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Ak.graph.row_map, Ak.graph.entries, false, Bmerged.graph.row_map, Bmerged.graph.entries, false, row_mapC);
234
235 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
236 if (c_nnz_size) {
237 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
238 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
239 }
240 // KokkosSparse::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged->graph.row_map,Bmerged->graph.entries,Bmerged->values,false,row_mapC,entriesC,valuesC);
241 KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Ak.graph.row_map, Ak.graph.entries, Ak.values, false, Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false, row_mapC, entriesC, valuesC);
242 kh.destroy_spgemm_handle();
243
244#ifdef HAVE_TPETRA_MMM_TIMINGS
245 MM = Teuchos::null;
246 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPSort"))));
247#endif
248 // Sort & set values
249 if (params.is_null() || params->get("sort entries", true))
250 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
251 C.setAllValues(row_mapC, entriesC, valuesC);
252
253 } // end OMP KokkosKernels loop
254
255#ifdef HAVE_TPETRA_MMM_TIMINGS
256 MM = Teuchos::null;
257 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPESFC"))));
258#endif
259
260 // Final Fillcomplete
261 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
262 labelList->set("Timer Label", label);
263 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
264 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
265 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
266
267#if 0
268 {
269 Teuchos::ArrayRCP< const size_t > Crowptr;
270 Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
271 Teuchos::ArrayRCP< const Scalar > Cvalues;
272 C.getAllValues(Crowptr,Ccolind,Cvalues);
273
274 // DEBUG
275 int MyPID = C->getComm()->getRank();
276 printf("[%d] Crowptr = ",MyPID);
277 for(size_t i=0; i<(size_t) Crowptr.size(); i++) {
278 printf("%3d ",(int)Crowptr.getConst()[i]);
279 }
280 printf("\n");
281 printf("[%d] Ccolind = ",MyPID);
282 for(size_t i=0; i<(size_t)Ccolind.size(); i++) {
283 printf("%3d ",(int)Ccolind.getConst()[i]);
284 }
285 printf("\n");
286 fflush(stdout);
287 // END DEBUG
288 }
289#endif
290}
291
292/*********************************************************************************************************/
293template <class Scalar,
294 class LocalOrdinal,
295 class GlobalOrdinal,
296 class LocalOrdinalViewType>
297void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
298 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
299 const LocalOrdinalViewType& Acol2Brow,
300 const LocalOrdinalViewType& Acol2Irow,
301 const LocalOrdinalViewType& Bcol2Ccol,
302 const LocalOrdinalViewType& Icol2Ccol,
303 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
304 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
305 const std::string& label,
306 const Teuchos::RCP<Teuchos::ParameterList>& params) {
307#ifdef HAVE_TPETRA_MMM_TIMINGS
308 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
309 using Teuchos::TimeMonitor;
310 Teuchos::RCP<TimeMonitor> MM;
311#endif
312
313 // Lots and lots of typedefs
314 using Teuchos::RCP;
315
316 // Options
317 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
318 std::string myalg("LTG");
319 if (!params.is_null()) {
320 if (params->isParameter("openmp: algorithm"))
321 myalg = params->get("openmp: algorithm", myalg);
322 if (params->isParameter("openmp: team work size"))
323 team_work_size = params->get("openmp: team work size", team_work_size);
324 }
325
326 if (myalg == "LTG") {
327 // Use the LTG kernel if requested
328 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_reuse_LowThreadGustavsonKernel(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
329 } else {
330 throw std::runtime_error("Tpetra::MatrixMatrix::MMM reuse unknown kernel");
331 }
332
333#ifdef HAVE_TPETRA_MMM_TIMINGS
334 MM = Teuchos::null;
335 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse OpenMPESFC"))));
336#endif
337 C.fillComplete(C.getDomainMap(), C.getRangeMap());
338}
339
340/*********************************************************************************************************/
341template <class Scalar,
342 class LocalOrdinal,
343 class GlobalOrdinal,
344 class LocalOrdinalViewType>
345void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
346 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
347 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
348 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
349 const LocalOrdinalViewType& Acol2Brow,
350 const LocalOrdinalViewType& Acol2Irow,
351 const LocalOrdinalViewType& Bcol2Ccol,
352 const LocalOrdinalViewType& Icol2Ccol,
353 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
354 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
355 const std::string& label,
356 const Teuchos::RCP<Teuchos::ParameterList>& params) {
357#ifdef HAVE_TPETRA_MMM_TIMINGS
358 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
359 using Teuchos::TimeMonitor;
360 Teuchos::RCP<TimeMonitor> MM;
361#endif
362
363 // Node-specific code
364 using Teuchos::RCP;
365
366 // Options
367 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
368 std::string myalg("LTG");
369 if (!params.is_null()) {
370 if (params->isParameter("openmp: jacobi algorithm"))
371 myalg = params->get("openmp: jacobi algorithm", myalg);
372 if (params->isParameter("openmp: team work size"))
373 team_work_size = params->get("openmp: team work size", team_work_size);
374 }
375
376 if (myalg == "LTG") {
377 // Use the LTG kernel if requested
378 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_LowThreadGustavsonKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
379 } else if (myalg == "MSAK") {
380 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
381 } else if (myalg == "KK") {
382 jacobi_A_B_newmatrix_KokkosKernels(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
383 } else {
384 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
385 }
386
387#ifdef HAVE_TPETRA_MMM_TIMINGS
388 MM = Teuchos::null;
389 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
390#endif
391
392 // Final Fillcomplete
393 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
394 labelList->set("Timer Label", label);
395 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
396
397 // NOTE: MSAK already fillCompletes, so we have to check here
398 if (!C.isFillComplete()) {
399 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
400 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
401 }
402}
403
404/*********************************************************************************************************/
405template <class Scalar,
406 class LocalOrdinal,
407 class GlobalOrdinal,
408 class LocalOrdinalViewType>
409void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
410 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
411 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
412 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
413 const LocalOrdinalViewType& Acol2Brow,
414 const LocalOrdinalViewType& Acol2Irow,
415 const LocalOrdinalViewType& Bcol2Ccol,
416 const LocalOrdinalViewType& Icol2Ccol,
417 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
418 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
419 const std::string& label,
420 const Teuchos::RCP<Teuchos::ParameterList>& params) {
421#ifdef HAVE_TPETRA_MMM_TIMINGS
422 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
423 using Teuchos::TimeMonitor;
424 Teuchos::RCP<TimeMonitor> MM;
425#endif
426
427 // Lots and lots of typedefs
428 using Teuchos::RCP;
429
430 // Options
431 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
432 std::string myalg("LTG");
433 if (!params.is_null()) {
434 if (params->isParameter("openmp: jacobi algorithm"))
435 myalg = params->get("openmp: jacobi algorithm", myalg);
436 if (params->isParameter("openmp: team work size"))
437 team_work_size = params->get("openmp: team work size", team_work_size);
438 }
439
440 if (myalg == "LTG") {
441 // Use the LTG kernel if requested
442 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_reuse_LowThreadGustavsonKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
443 } else {
444 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi reuse unknown kernel");
445 }
446
447#ifdef HAVE_TPETRA_MMM_TIMINGS
448 MM = Teuchos::null;
449 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse OpenMPESFC"))));
450#endif
451 C.fillComplete(C.getDomainMap(), C.getRangeMap());
452}
453
454/*********************************************************************************************************/
455template <class Scalar,
456 class LocalOrdinal,
457 class GlobalOrdinal,
458 class LocalOrdinalViewType>
459void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
460 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
461 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
462 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
463 const LocalOrdinalViewType& Acol2Brow,
464 const LocalOrdinalViewType& Acol2Irow,
465 const LocalOrdinalViewType& Bcol2Ccol,
466 const LocalOrdinalViewType& Icol2Ccol,
467 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
468 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
469 const std::string& label,
470 const Teuchos::RCP<Teuchos::ParameterList>& params) {
471#ifdef HAVE_TPETRA_MMM_TIMINGS
472 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
473 using Teuchos::TimeMonitor;
474 Teuchos::RCP<TimeMonitor> MM;
475#endif
476
477 // Check if the diagonal entries exist in debug mode
478 const bool debug = Tpetra::Details::Behavior::debug();
479 if (debug) {
480 auto rowMap = Aview.origMatrix->getRowMap();
482 Aview.origMatrix->getLocalDiagCopy(diags);
483 size_t diagLength = rowMap->getLocalNumElements();
484 Teuchos::Array<Scalar> diagonal(diagLength);
485 diags.get1dCopy(diagonal());
486
487 for (size_t i = 0; i < diagLength; ++i) {
488 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
489 std::runtime_error,
490 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl
491 << "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
492 }
493 }
494
495 // Usings
496 using device_t = typename Tpetra::KokkosCompat::KokkosOpenMPWrapperNode::device_type;
498 using graph_t = typename matrix_t::StaticCrsGraphType;
499 using lno_view_t = typename graph_t::row_map_type::non_const_type;
500 using c_lno_view_t = typename graph_t::row_map_type::const_type;
501 using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
502 using scalar_view_t = typename matrix_t::values_type::non_const_type;
503
504 // KokkosKernels handle
505 using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
506 typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
507 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
508
509 // Get the rowPtr, colInd and vals of importMatrix
510 c_lno_view_t Irowptr;
511 lno_nnz_view_t Icolind;
512 scalar_view_t Ivals;
513 if (!Bview.importMatrix.is_null()) {
514 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
515 Irowptr = lclB.graph.row_map;
516 Icolind = lclB.graph.entries;
517 Ivals = lclB.values;
518 }
519
520 // Merge the B and Bimport matrices
521 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
522
523 // Get the properties and arrays of input matrices
524 const matrix_t Amat = Aview.origMatrix->getLocalMatrixDevice();
525 const matrix_t Bmat = Bview.origMatrix->getLocalMatrixDevice();
526
527 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
528 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
529 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
530
531 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
532 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
533 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
534
535 // Arrays of the output matrix
536 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
537 lno_nnz_view_t entriesC;
538 scalar_view_t valuesC;
539
540 // Options
541 int team_work_size = 16;
542 std::string myalg("SPGEMM_KK_MEMORY");
543 if (!params.is_null()) {
544 if (params->isParameter("cuda: algorithm"))
545 myalg = params->get("cuda: algorithm", myalg);
546 if (params->isParameter("cuda: team work size"))
547 team_work_size = params->get("cuda: team work size", team_work_size);
548 }
549
550 // Get the algorithm mode
551 std::string nodename("OpenMP");
552 std::string alg = nodename + std::string(" algorithm");
553 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
554 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
555
556 // KokkosKernels call
557 handle_t kh;
558 kh.create_spgemm_handle(alg_enum);
559 kh.set_team_work_size(team_work_size);
560
561 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
562 Arowptr, Acolind, false,
563 Browptr, Bcolind, false,
564 row_mapC);
565
566 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
567 if (c_nnz_size) {
568 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
569 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
570 }
571
572 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
573 Arowptr, Acolind, Avals, false,
574 Browptr, Bcolind, Bvals, false,
575 row_mapC, entriesC, valuesC,
576 omega, Dinv.getLocalViewDevice(Tpetra::Access::ReadOnly));
577 kh.destroy_spgemm_handle();
578
579#ifdef HAVE_TPETRA_MMM_TIMINGS
580 MM = Teuchos::null;
581 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPSort"))));
582#endif
583
584 // Sort & set values
585 if (params.is_null() || params->get("sort entries", true))
586 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
587 C.setAllValues(row_mapC, entriesC, valuesC);
588
589#ifdef HAVE_TPETRA_MMM_TIMINGS
590 MM = Teuchos::null;
591 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
592#endif
593
594 // Final Fillcomplete
595 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
596 labelList->set("Timer Label", label);
597 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
598 Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
599 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
600}
601
602/*********************************************************************************************************/
603template <class Scalar,
604 class LocalOrdinal,
605 class GlobalOrdinal,
606 class LocalOrdinalViewType>
607void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
608 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
609 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
610 const LocalOrdinalViewType& Acol2Prow,
611 const LocalOrdinalViewType& Acol2PIrow,
612 const LocalOrdinalViewType& Pcol2Accol,
613 const LocalOrdinalViewType& PIcol2Accol,
614 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
615 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
616 const std::string& label,
617 const Teuchos::RCP<Teuchos::ParameterList>& params) {
618#ifdef HAVE_TPETRA_MMM_TIMINGS
619 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
620 using Teuchos::TimeMonitor;
621 Teuchos::RCP<TimeMonitor> MM;
622#endif
623
624 // Node-specific code
625 std::string nodename("OpenMP");
626
627 // Options
628 std::string myalg("LTG");
629
630 if (!params.is_null()) {
631 if (params->isParameter("openmp: rap algorithm"))
632 myalg = params->get("openmp: rap algorithm", myalg);
633 }
634
635 if (myalg == "LTG") {
636 // Use the LTG kernel if requested
637 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol, PIcol2Accol, Ac, Acimport, label, params);
638 } else {
639 throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
640 }
641}
642
643/*********************************************************************************************************/
644template <class Scalar,
645 class LocalOrdinal,
646 class GlobalOrdinal,
647 class LocalOrdinalViewType>
648void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_R_A_P_reuse_kernel_wrapper(
649 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
650 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
651 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
652
653 const LocalOrdinalViewType& Acol2Prow,
654 const LocalOrdinalViewType& Acol2Irow,
655 const LocalOrdinalViewType& Pcol2Ccol,
656 const LocalOrdinalViewType& Icol2Ccol,
657 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
658 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
659 const std::string& label,
660 const Teuchos::RCP<Teuchos::ParameterList>& params) {
661#ifdef HAVE_TPETRA_MMM_TIMINGS
662 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
663 using Teuchos::TimeMonitor;
664 Teuchos::RCP<TimeMonitor> MM;
665#endif
666
667 // Lots and lots of typedefs
668 using Teuchos::RCP;
669
670 // Options
671 std::string myalg("LTG");
672 if (!params.is_null()) {
673 if (params->isParameter("openmp: rap algorithm"))
674 myalg = params->get("openmp: rap algorithm", myalg);
675 }
676
677 if (myalg == "LTG") {
678 // Use the LTG kernel if requested
679 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2Irow, Pcol2Ccol, Icol2Ccol, C, Cimport, label, params);
680 } else {
681 throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
682 }
683
684#ifdef HAVE_TPETRA_MMM_TIMINGS
685 MM = Teuchos::null;
686 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("RAP Reuse OpenMPESFC"))));
687#endif
688 C.fillComplete(C.getDomainMap(), C.getRangeMap());
689}
690
691/*********************************************************************************************************/
692template <class Scalar,
693 class LocalOrdinal,
694 class GlobalOrdinal,
695 class LocalOrdinalViewType>
696void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
697
698 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
699 const LocalOrdinalViewType& Acol2Prow,
700 const LocalOrdinalViewType& Acol2PIrow,
701 const LocalOrdinalViewType& Pcol2Accol,
702 const LocalOrdinalViewType& PIcol2Accol,
703 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
704 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
705 const std::string& label,
706 const Teuchos::RCP<Teuchos::ParameterList>& params) {
707#ifdef HAVE_TPETRA_MMM_TIMINGS
708 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
709 using Teuchos::TimeMonitor;
710 Teuchos::RCP<TimeMonitor> MM;
711#endif
712
713 // Node-specific code
714 std::string nodename("OpenMP");
715
716 // Options
717 std::string myalg("LTG");
718
719 if (!params.is_null()) {
720 if (params->isParameter("openmp: ptap algorithm"))
721 myalg = params->get("openmp: ptap algorithm", myalg);
722 }
723
724 if (myalg == "LTG") {
725#ifdef HAVE_TPETRA_MMM_TIMINGS
726 MM = Teuchos::null;
727 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
728#endif
729
730 using Teuchos::ParameterList;
731 using Teuchos::RCP;
732 using LO = LocalOrdinal;
733 using GO = GlobalOrdinal;
734 using SC = Scalar;
735
736 // We don't need a kernel-level PTAP, we just transpose here
737 using transposer_type =
738 RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
739 transposer_type transposer(Pview.origMatrix, label + "XP: ");
740 RCP<ParameterList> transposeParams(new ParameterList);
741 if (!params.is_null()) {
742 transposeParams->set("compute global constants",
743 params->get("compute global constants: temporaries",
744 false));
745 }
746 transposeParams->set("sort", false);
747 RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
748 transposer.createTransposeLocal(transposeParams);
749 CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
750 Rview.origMatrix = Ptrans;
751
752 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel;
753 mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
754 PIcol2Accol, Ac, Acimport, label, params);
755 } else {
756 throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P newmatrix unknown kernel");
757 }
758}
759
760/*********************************************************************************************************/
761template <class Scalar,
762 class LocalOrdinal,
763 class GlobalOrdinal,
764 class LocalOrdinalViewType>
765void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
766
767 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
768 const LocalOrdinalViewType& Acol2Prow,
769 const LocalOrdinalViewType& Acol2PIrow,
770 const LocalOrdinalViewType& Pcol2Accol,
771 const LocalOrdinalViewType& PIcol2Accol,
772 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
773 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
774 const std::string& label,
775 const Teuchos::RCP<Teuchos::ParameterList>& params) {
776#ifdef HAVE_TPETRA_MMM_TIMINGS
777 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
778 using Teuchos::TimeMonitor;
779 Teuchos::RCP<TimeMonitor> MM;
780#endif
781
782 // Node-specific code
783 std::string nodename("OpenMP");
784
785 // Options
786 std::string myalg("LTG");
787
788 if (!params.is_null()) {
789 if (params->isParameter("openmp: ptap algorithm"))
790 myalg = params->get("openmp: ptap algorithm", myalg);
791 }
792
793 if (myalg == "LTG") {
794#ifdef HAVE_TPETRA_MMM_TIMINGS
795 MM = Teuchos::null;
796 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
797#endif
798
799 using Teuchos::ParameterList;
800 using Teuchos::RCP;
801 using LO = LocalOrdinal;
802 using GO = GlobalOrdinal;
803 using SC = Scalar;
804
805 // We don't need a kernel-level PTAP, we just transpose here
806 using transposer_type =
807 RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
808 transposer_type transposer(Pview.origMatrix, label + "XP: ");
809 RCP<ParameterList> transposeParams(new ParameterList);
810 if (!params.is_null()) {
811 transposeParams->set("compute global constants",
812 params->get("compute global constants: temporaries",
813 false));
814 }
815 transposeParams->set("sort", false);
816 RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
817 transposer.createTransposeLocal(transposeParams);
818 CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
819 Rview.origMatrix = Ptrans;
820
821 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel;
822 mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
823 PIcol2Accol, Ac, Acimport, label, params);
824 } else {
825 throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P reuse unknown kernel");
826 }
827 Ac.fillComplete(Ac.getDomainMap(), Ac.getRangeMap());
828}
829
830} // namespace MMdetails
831} // namespace Tpetra
832
833#endif // OpenMP
834
835#endif
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Struct that holds views of the contents of a CrsMatrix.
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.