Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
TpetraExt_MatrixMatrix_OpenMP.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
11#define TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
12
13#ifdef HAVE_TPETRA_INST_OPENMP
14namespace Tpetra {
15namespace MMdetails {
16
17/*********************************************************************************************************/
18// MMM KernelWrappers for Partial Specialization to OpenMP
19template <class Scalar,
20 class LocalOrdinal,
21 class GlobalOrdinal, class LocalOrdinalViewType>
22struct KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
23 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
24 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
25 const LocalOrdinalViewType& Acol2Brow,
26 const LocalOrdinalViewType& Acol2Irow,
27 const LocalOrdinalViewType& Bcol2Ccol,
28 const LocalOrdinalViewType& Icol2Ccol,
29 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
30 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
31 const std::string& label = std::string(),
32 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
33
34 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
35 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
36 const LocalOrdinalViewType& Acol2Brow,
37 const LocalOrdinalViewType& Acol2Irow,
38 const LocalOrdinalViewType& Bcol2Ccol,
39 const LocalOrdinalViewType& Icol2Ccol,
40 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
41 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
42 const std::string& label = std::string(),
43 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
44};
45
46// Jacobi KernelWrappers for Partial Specialization to OpenMP
47template <class Scalar,
48 class LocalOrdinal,
49 class GlobalOrdinal, class LocalOrdinalViewType>
50struct KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
51 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
52 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
53 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
54 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
55 const LocalOrdinalViewType& Acol2Brow,
56 const LocalOrdinalViewType& Acol2Irow,
57 const LocalOrdinalViewType& Bcol2Ccol,
58 const LocalOrdinalViewType& Icol2Ccol,
59 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
60 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
61 const std::string& label = std::string(),
62 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
63
64 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
65 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
66 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
67 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
68 const LocalOrdinalViewType& Acol2Brow,
69 const LocalOrdinalViewType& Acol2Irow,
70 const LocalOrdinalViewType& Bcol2Ccol,
71 const LocalOrdinalViewType& Icol2Ccol,
72 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
73 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
74 const std::string& label = std::string(),
75 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
76
77 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
78 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
79 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
80 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
81 const LocalOrdinalViewType& Acol2Brow,
82 const LocalOrdinalViewType& Acol2Irow,
83 const LocalOrdinalViewType& Bcol2Ccol,
84 const LocalOrdinalViewType& Icol2Ccol,
85 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
86 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
87 const std::string& label = std::string(),
88 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
89};
90
91// Triple-Product KernelWrappers for Partial Specialization to OpenMP
92template <class Scalar,
93 class LocalOrdinal,
94 class GlobalOrdinal, class LocalOrdinalViewType>
95struct KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
96 static inline void mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
97 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
98 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
99 const LocalOrdinalViewType& Acol2Prow,
100 const LocalOrdinalViewType& Acol2PIrow,
101 const LocalOrdinalViewType& Pcol2Ccol,
102 const LocalOrdinalViewType& PIcol2Ccol,
103 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
104 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
105 const std::string& label = std::string(),
106 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
107
108 static inline void mult_R_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
109 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
110 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
111 const LocalOrdinalViewType& Acol2Prow,
112 const LocalOrdinalViewType& Acol2PIrow,
113 const LocalOrdinalViewType& Pcol2Ccol,
114 const LocalOrdinalViewType& PIcol2Ccol,
115 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
116 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
117 const std::string& label = std::string(),
118 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
119
120 static inline void mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
121 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
122 const LocalOrdinalViewType& Acol2Prow,
123 const LocalOrdinalViewType& Acol2PIrow,
124 const LocalOrdinalViewType& Pcol2Ccol,
125 const LocalOrdinalViewType& PIcol2Ccol,
126 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
127 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
128 const std::string& label = std::string(),
129 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
130
131 static inline void mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
132 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
133 const LocalOrdinalViewType& Acol2Prow,
134 const LocalOrdinalViewType& Acol2PIrow,
135 const LocalOrdinalViewType& Pcol2Ccol,
136 const LocalOrdinalViewType& PIcol2Ccol,
137 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
138 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
139 const std::string& label = std::string(),
140 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
141};
142
143/*********************************************************************************************************/
144template <class Scalar,
145 class LocalOrdinal,
146 class GlobalOrdinal,
147 class LocalOrdinalViewType>
148void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
149 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
150 const LocalOrdinalViewType& Acol2Brow,
151 const LocalOrdinalViewType& Acol2Irow,
152 const LocalOrdinalViewType& Bcol2Ccol,
153 const LocalOrdinalViewType& Icol2Ccol,
154 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
155 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
156 const std::string& label,
157 const Teuchos::RCP<Teuchos::ParameterList>& params) {
158#ifdef HAVE_TPETRA_MMM_TIMINGS
159 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
160 using Teuchos::TimeMonitor;
161 Teuchos::RCP<TimeMonitor> MM;
162#endif
163
164 // Node-specific code
165 std::string nodename("OpenMP");
166
167 // Lots and lots of typedefs
168 using Teuchos::RCP;
170 typedef typename KCRS::device_type device_t;
171 typedef typename KCRS::StaticCrsGraphType graph_t;
172 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
173 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
174 typedef typename KCRS::values_type::non_const_type scalar_view_t;
175
176 // Options
177 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
178 std::string myalg("SPGEMM_KK_MEMORY");
179
180 if (!params.is_null()) {
181 if (params->isParameter("openmp: algorithm"))
182 myalg = params->get("openmp: algorithm", myalg);
183 if (params->isParameter("openmp: team work size"))
184 team_work_size = params->get("openmp: team work size", team_work_size);
185 }
186
187 if (myalg == "LTG") {
188 // Use the LTG kernel if requested
189 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_newmatrix_LowThreadGustavsonKernel(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
190 } else {
191 // Use the Kokkos-Kernels OpenMP Kernel
192#ifdef HAVE_TPETRA_MMM_TIMINGS
193 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPWrapper"))));
194#endif
195 // KokkosKernelsHandle
196 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
197 typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
198 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>
199 KernelHandle;
200
201 // Grab the Kokkos::SparseCrsMatrices
202 const KCRS& Ak = Aview.origMatrix->getLocalMatrixDevice();
203 // const KCRS & Bk = Bview.origMatrix->getLocalMatrixDevice();
204
205 // Get the algorithm mode
206 std::string alg = nodename + std::string(" algorithm");
207 // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
208 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
209 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
210
211 // Merge the B and Bimport matrices
212 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
213
214#ifdef HAVE_TPETRA_MMM_TIMINGS
215 MM = Teuchos::null;
216 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPCore"))));
217#endif
218
219 // Do the multiply on whatever we've got
220 typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
221 // typename KernelHandle::nnz_lno_t BnumRows = Bmerged->numRows();
222 // typename KernelHandle::nnz_lno_t BnumCols = Bmerged->numCols();
223 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
224 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
225
226 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
227 lno_nnz_view_t entriesC;
228 scalar_view_t valuesC;
229 KernelHandle kh;
230 kh.create_spgemm_handle(alg_enum);
231 kh.set_team_work_size(team_work_size);
232 // KokkosSparse::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged->graph.row_map,Bmerged->graph.entries,false,row_mapC);
233 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Ak.graph.row_map, Ak.graph.entries, false, Bmerged.graph.row_map, Bmerged.graph.entries, false, row_mapC);
234
235 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
236 if (c_nnz_size) {
237 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
238 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
239 }
240 // KokkosSparse::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged->graph.row_map,Bmerged->graph.entries,Bmerged->values,false,row_mapC,entriesC,valuesC);
241 KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Ak.graph.row_map, Ak.graph.entries, Ak.values, false, Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false, row_mapC, entriesC, valuesC);
242 kh.destroy_spgemm_handle();
243
244#ifdef HAVE_TPETRA_MMM_TIMINGS
245 MM = Teuchos::null;
246 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPSort"))));
247#endif
248 // Sort & set values
249 if (params.is_null() || params->get("sort entries", true))
250 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
251 C.setAllValues(row_mapC, entriesC, valuesC);
252
253 } // end OMP KokkosKernels loop
254
255#ifdef HAVE_TPETRA_MMM_TIMINGS
256 MM = Teuchos::null;
257 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPESFC"))));
258#endif
259
260 // Final Fillcomplete
261 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
262 labelList->set("Timer Label", label);
263 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
264 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
265 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
266
267#if 0
268 {
269 Teuchos::ArrayRCP< const size_t > Crowptr;
270 Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
271 Teuchos::ArrayRCP< const Scalar > Cvalues;
272 C.getAllValues(Crowptr,Ccolind,Cvalues);
273
274 // DEBUG
275 int MyPID = C->getComm()->getRank();
276 printf("[%d] Crowptr = ",MyPID);
277 for(size_t i=0; i<(size_t) Crowptr.size(); i++) {
278 printf("%3d ",(int)Crowptr.getConst()[i]);
279 }
280 printf("\n");
281 printf("[%d] Ccolind = ",MyPID);
282 for(size_t i=0; i<(size_t)Ccolind.size(); i++) {
283 printf("%3d ",(int)Ccolind.getConst()[i]);
284 }
285 printf("\n");
286 fflush(stdout);
287 // END DEBUG
288 }
289#endif
290}
291
292/*********************************************************************************************************/
293template <class Scalar,
294 class LocalOrdinal,
295 class GlobalOrdinal,
296 class LocalOrdinalViewType>
297void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
298 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
299 const LocalOrdinalViewType& Acol2Brow,
300 const LocalOrdinalViewType& Acol2Irow,
301 const LocalOrdinalViewType& Bcol2Ccol,
302 const LocalOrdinalViewType& Icol2Ccol,
303 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
304 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
305 const std::string& label,
306 const Teuchos::RCP<Teuchos::ParameterList>& params) {
307#ifdef HAVE_TPETRA_MMM_TIMINGS
308 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
309 using Teuchos::TimeMonitor;
310 Teuchos::RCP<TimeMonitor> MM;
311#endif
312
313 // Lots and lots of typedefs
314 using Teuchos::RCP;
315
316 // Options
317 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
318 std::string myalg("LTG");
319 if (!params.is_null()) {
320 if (params->isParameter("openmp: algorithm"))
321 myalg = params->get("openmp: algorithm", myalg);
322 if (params->isParameter("openmp: team work size"))
323 team_work_size = params->get("openmp: team work size", team_work_size);
324 }
325
326 if (myalg == "LTG") {
327 // Use the LTG kernel if requested
328 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_reuse_LowThreadGustavsonKernel(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
329 } else {
330 throw std::runtime_error("Tpetra::MatrixMatrix::MMM reuse unknown kernel");
331 }
332
333#ifdef HAVE_TPETRA_MMM_TIMINGS
334 MM = Teuchos::null;
335 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse OpenMPESFC"))));
336#endif
337 C.fillComplete(C.getDomainMap(), C.getRangeMap());
338}
339
340/*********************************************************************************************************/
341template <class Scalar,
342 class LocalOrdinal,
343 class GlobalOrdinal,
344 class LocalOrdinalViewType>
345void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
346 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
347 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
348 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
349 const LocalOrdinalViewType& Acol2Brow,
350 const LocalOrdinalViewType& Acol2Irow,
351 const LocalOrdinalViewType& Bcol2Ccol,
352 const LocalOrdinalViewType& Icol2Ccol,
353 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
354 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
355 const std::string& label,
356 const Teuchos::RCP<Teuchos::ParameterList>& params) {
357#ifdef HAVE_TPETRA_MMM_TIMINGS
358 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
359 using Teuchos::TimeMonitor;
360 Teuchos::RCP<TimeMonitor> MM;
361#endif
362
363 // Node-specific code
364 using Teuchos::RCP;
365
366 // Options
367 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
368 std::string myalg("LTG");
369 if (!params.is_null()) {
370 if (params->isParameter("openmp: jacobi algorithm"))
371 myalg = params->get("openmp: jacobi algorithm", myalg);
372 if (params->isParameter("openmp: team work size"))
373 team_work_size = params->get("openmp: team work size", team_work_size);
374 }
375
376 if (myalg == "LTG") {
377 // Use the LTG kernel if requested
378 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_LowThreadGustavsonKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
379 } else if (myalg == "MSAK") {
380 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
381 } else if (myalg == "KK") {
382 jacobi_A_B_newmatrix_KokkosKernels(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
383 } else {
384 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
385 }
386
387#ifdef HAVE_TPETRA_MMM_TIMINGS
388 MM = Teuchos::null;
389 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
390#endif
391
392 // Final Fillcomplete
393 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
394 labelList->set("Timer Label", label);
395 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
396
397 // NOTE: MSAK already fillCompletes, so we have to check here
398 if (!C.isFillComplete()) {
399 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
400 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
401 }
402}
403
404/*********************************************************************************************************/
405template <class Scalar,
406 class LocalOrdinal,
407 class GlobalOrdinal,
408 class LocalOrdinalViewType>
409void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
410 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
411 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
412 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
413 const LocalOrdinalViewType& Acol2Brow,
414 const LocalOrdinalViewType& Acol2Irow,
415 const LocalOrdinalViewType& Bcol2Ccol,
416 const LocalOrdinalViewType& Icol2Ccol,
417 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
418 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
419 const std::string& label,
420 const Teuchos::RCP<Teuchos::ParameterList>& params) {
421#ifdef HAVE_TPETRA_MMM_TIMINGS
422 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
423 using Teuchos::TimeMonitor;
424 Teuchos::RCP<TimeMonitor> MM;
425#endif
426
427 // Lots and lots of typedefs
428 using Teuchos::RCP;
429
430 // Options
431 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
432 std::string myalg("LTG");
433 if (!params.is_null()) {
434 if (params->isParameter("openmp: jacobi algorithm"))
435 myalg = params->get("openmp: jacobi algorithm", myalg);
436 if (params->isParameter("openmp: team work size"))
437 team_work_size = params->get("openmp: team work size", team_work_size);
438 }
439
440 if (myalg == "LTG") {
441 // Use the LTG kernel if requested
442 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_reuse_LowThreadGustavsonKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
443 } else {
444 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi reuse unknown kernel");
445 }
446
447#ifdef HAVE_TPETRA_MMM_TIMINGS
448 MM = Teuchos::null;
449 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse OpenMPESFC"))));
450#endif
451 C.fillComplete(C.getDomainMap(), C.getRangeMap());
452}
453
454/*********************************************************************************************************/
455template <class Scalar,
456 class LocalOrdinal,
457 class GlobalOrdinal,
458 class LocalOrdinalViewType>
459void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
460 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
461 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
462 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
463 const LocalOrdinalViewType& Acol2Brow,
464 const LocalOrdinalViewType& Acol2Irow,
465 const LocalOrdinalViewType& Bcol2Ccol,
466 const LocalOrdinalViewType& Icol2Ccol,
467 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
468 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
469 const std::string& label,
470 const Teuchos::RCP<Teuchos::ParameterList>& params) {
471#ifdef HAVE_TPETRA_MMM_TIMINGS
472 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
473 using Teuchos::TimeMonitor;
474 Teuchos::RCP<TimeMonitor> MM;
475#endif
476
477 // Check if the diagonal entries exist in debug mode
478 const bool debug = Tpetra::Details::Behavior::debug();
479 if (debug) {
480 auto rowMap = Aview.origMatrix->getRowMap();
482 Aview.origMatrix->getLocalDiagCopy(diags);
483 size_t diagLength = rowMap->getLocalNumElements();
484 Teuchos::Array<Scalar> diagonal(diagLength);
485 diags.get1dCopy(diagonal());
486
487 for (size_t i = 0; i < diagLength; ++i) {
488 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
489 std::runtime_error,
490 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl
491 << "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
492 }
493 }
494
495 // Usings
496 using device_t = typename Tpetra::KokkosCompat::KokkosOpenMPWrapperNode::device_type;
498 using graph_t = typename matrix_t::StaticCrsGraphType;
499 using lno_view_t = typename graph_t::row_map_type::non_const_type;
500 using c_lno_view_t = typename graph_t::row_map_type::const_type;
501 using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
502 using scalar_view_t = typename matrix_t::values_type::non_const_type;
503
504 // KokkosKernels handle
505 using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
506 typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
507 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
508
509 // Get the rowPtr, colInd and vals of importMatrix
510 c_lno_view_t Irowptr;
511 lno_nnz_view_t Icolind;
512 scalar_view_t Ivals;
513 if (!Bview.importMatrix.is_null()) {
514 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
515 Irowptr = lclB.graph.row_map;
516 Icolind = lclB.graph.entries;
517 Ivals = lclB.values;
518 }
519
520 // Merge the B and Bimport matrices
521 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
522
523 // Get the properties and arrays of input matrices
524 const matrix_t& Amat = Aview.origMatrix->getLocalMatrixDevice();
525 const matrix_t& Bmat = Bview.origMatrix->getLocalMatrixDevice();
526
527 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
528 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
529 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
530
531 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
532 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
533 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
534
535 // Arrays of the output matrix
536 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
537 lno_nnz_view_t entriesC;
538 scalar_view_t valuesC;
539
540 // Options
541 int team_work_size = 16;
542 std::string myalg("SPGEMM_KK_MEMORY");
543 if (!params.is_null()) {
544 if (params->isParameter("cuda: algorithm"))
545 myalg = params->get("cuda: algorithm", myalg);
546 if (params->isParameter("cuda: team work size"))
547 team_work_size = params->get("cuda: team work size", team_work_size);
548 }
549
550 // Get the algorithm mode
551 std::string nodename("OpenMP");
552 std::string alg = nodename + std::string(" algorithm");
553 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
554 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
555
556 // KokkosKernels call
557 handle_t kh;
558 kh.create_spgemm_handle(alg_enum);
559 kh.set_team_work_size(team_work_size);
560
561 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
562 Arowptr, Acolind, false,
563 Browptr, Bcolind, false,
564 row_mapC);
565
566 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
567 if (c_nnz_size) {
568 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
569 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
570 }
571
572 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
573 Arowptr, Acolind, Avals, false,
574 Browptr, Bcolind, Bvals, false,
575 row_mapC, entriesC, valuesC,
576 omega, Dinv.getLocalViewDevice(Tpetra::Access::ReadOnly));
577 kh.destroy_spgemm_handle();
578
579#ifdef HAVE_TPETRA_MMM_TIMINGS
580 MM = Teuchos::null;
581 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPSort"))));
582#endif
583
584 // Sort & set values
585 if (params.is_null() || params->get("sort entries", true))
586 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
587 C.setAllValues(row_mapC, entriesC, valuesC);
588
589#ifdef HAVE_TPETRA_MMM_TIMINGS
590 MM = Teuchos::null;
591 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
592#endif
593
594 // Final Fillcomplete
595 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
596 labelList->set("Timer Label", label);
597 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
598 Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
599 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
600}
601
602/*********************************************************************************************************/
603template <class Scalar,
604 class LocalOrdinal,
605 class GlobalOrdinal,
606 class LocalOrdinalViewType>
607void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
608 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
609 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
610 const LocalOrdinalViewType& Acol2Prow,
611 const LocalOrdinalViewType& Acol2PIrow,
612 const LocalOrdinalViewType& Pcol2Accol,
613 const LocalOrdinalViewType& PIcol2Accol,
614 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
615 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
616 const std::string& label,
617 const Teuchos::RCP<Teuchos::ParameterList>& params) {
618#ifdef HAVE_TPETRA_MMM_TIMINGS
619 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
620 using Teuchos::TimeMonitor;
621 Teuchos::RCP<TimeMonitor> MM;
622#endif
623
624 // Node-specific code
625 std::string nodename("OpenMP");
626
627 // Options
628 std::string myalg("LTG");
629
630 if (!params.is_null()) {
631 if (params->isParameter("openmp: rap algorithm"))
632 myalg = params->get("openmp: rap algorithm", myalg);
633 }
634
635 if (myalg == "LTG") {
636 // Use the LTG kernel if requested
637 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol, PIcol2Accol, Ac, Acimport, label, params);
638 } else {
639 throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
640 }
641}
642
643/*********************************************************************************************************/
644template <class Scalar,
645 class LocalOrdinal,
646 class GlobalOrdinal,
647 class LocalOrdinalViewType>
648void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_R_A_P_reuse_kernel_wrapper(
649 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
650 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
651 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
652
653 const LocalOrdinalViewType& Acol2Prow,
654 const LocalOrdinalViewType& Acol2Irow,
655 const LocalOrdinalViewType& Pcol2Ccol,
656 const LocalOrdinalViewType& Icol2Ccol,
657 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
658 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
659 const std::string& label,
660 const Teuchos::RCP<Teuchos::ParameterList>& params) {
661#ifdef HAVE_TPETRA_MMM_TIMINGS
662 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
663 using Teuchos::TimeMonitor;
664 Teuchos::RCP<TimeMonitor> MM;
665#endif
666
667 // Lots and lots of typedefs
668 using Teuchos::RCP;
669
670 // Options
671 std::string myalg("LTG");
672 if (!params.is_null()) {
673 if (params->isParameter("openmp: rap algorithm"))
674 myalg = params->get("openmp: rap algorithm", myalg);
675 }
676
677 if (myalg == "LTG") {
678 // Use the LTG kernel if requested
679 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2Irow, Pcol2Ccol, Icol2Ccol, C, Cimport, label, params);
680 } else {
681 throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
682 }
683
684#ifdef HAVE_TPETRA_MMM_TIMINGS
685 MM = Teuchos::null;
686 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("RAP Reuse OpenMPESFC"))));
687#endif
688 C.fillComplete(C.getDomainMap(), C.getRangeMap());
689}
690
691/*********************************************************************************************************/
692template <class Scalar,
693 class LocalOrdinal,
694 class GlobalOrdinal,
695 class LocalOrdinalViewType>
696void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
697
698 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
699 const LocalOrdinalViewType& Acol2Prow,
700 const LocalOrdinalViewType& Acol2PIrow,
701 const LocalOrdinalViewType& Pcol2Accol,
702 const LocalOrdinalViewType& PIcol2Accol,
703 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
704 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
705 const std::string& label,
706 const Teuchos::RCP<Teuchos::ParameterList>& params) {
707#ifdef HAVE_TPETRA_MMM_TIMINGS
708 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
709 using Teuchos::TimeMonitor;
710 Teuchos::RCP<TimeMonitor> MM;
711#endif
712
713 // Node-specific code
714 std::string nodename("OpenMP");
715
716 // Options
717 std::string myalg("LTG");
718
719 if (!params.is_null()) {
720 if (params->isParameter("openmp: ptap algorithm"))
721 myalg = params->get("openmp: ptap algorithm", myalg);
722 }
723
724 if (myalg == "LTG") {
725#ifdef HAVE_TPETRA_MMM_TIMINGS
726 MM = Teuchos::null;
727 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
728#endif
729
730 using Teuchos::ParameterList;
731 using Teuchos::RCP;
732 using LO = LocalOrdinal;
733 using GO = GlobalOrdinal;
734 using SC = Scalar;
735
736 // We don't need a kernel-level PTAP, we just transpose here
737 using transposer_type =
738 RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
739 transposer_type transposer(Pview.origMatrix, label + "XP: ");
740 RCP<ParameterList> transposeParams(new ParameterList);
741 if (!params.is_null()) {
742 transposeParams->set("compute global constants",
743 params->get("compute global constants: temporaries",
744 false));
745 }
746 transposeParams->set("sort", false);
747 RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
748 transposer.createTransposeLocal(transposeParams);
749 CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
750 Rview.origMatrix = Ptrans;
751
752 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel;
753 mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
754 PIcol2Accol, Ac, Acimport, label, params);
755 } else {
756 throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P newmatrix unknown kernel");
757 }
758}
759
760/*********************************************************************************************************/
761template <class Scalar,
762 class LocalOrdinal,
763 class GlobalOrdinal,
764 class LocalOrdinalViewType>
765void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
766
767 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
768 const LocalOrdinalViewType& Acol2Prow,
769 const LocalOrdinalViewType& Acol2PIrow,
770 const LocalOrdinalViewType& Pcol2Accol,
771 const LocalOrdinalViewType& PIcol2Accol,
772 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
773 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
774 const std::string& label,
775 const Teuchos::RCP<Teuchos::ParameterList>& params) {
776#ifdef HAVE_TPETRA_MMM_TIMINGS
777 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
778 using Teuchos::TimeMonitor;
779 Teuchos::RCP<TimeMonitor> MM;
780#endif
781
782 // Node-specific code
783 std::string nodename("OpenMP");
784
785 // Options
786 std::string myalg("LTG");
787
788 if (!params.is_null()) {
789 if (params->isParameter("openmp: ptap algorithm"))
790 myalg = params->get("openmp: ptap algorithm", myalg);
791 }
792
793 if (myalg == "LTG") {
794#ifdef HAVE_TPETRA_MMM_TIMINGS
795 MM = Teuchos::null;
796 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
797#endif
798
799 using Teuchos::ParameterList;
800 using Teuchos::RCP;
801 using LO = LocalOrdinal;
802 using GO = GlobalOrdinal;
803 using SC = Scalar;
804
805 // We don't need a kernel-level PTAP, we just transpose here
806 using transposer_type =
807 RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
808 transposer_type transposer(Pview.origMatrix, label + "XP: ");
809 RCP<ParameterList> transposeParams(new ParameterList);
810 if (!params.is_null()) {
811 transposeParams->set("compute global constants",
812 params->get("compute global constants: temporaries",
813 false));
814 }
815 transposeParams->set("sort", false);
816 RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
817 transposer.createTransposeLocal(transposeParams);
818 CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
819 Rview.origMatrix = Ptrans;
820
821 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel;
822 mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
823 PIcol2Accol, Ac, Acimport, label, params);
824 } else {
825 throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P reuse unknown kernel");
826 }
827 Ac.fillComplete(Ac.getDomainMap(), Ac.getRangeMap());
828}
829
830} // namespace MMdetails
831} // namespace Tpetra
832
833#endif // OpenMP
834
835#endif
Struct that holds views of the contents of a CrsMatrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.