Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
TpetraExt_MatrixMatrix_OpenMP.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
11#define TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
12
13#ifdef HAVE_TPETRA_INST_OPENMP
14namespace Tpetra {
15namespace MMdetails {
16
17/*********************************************************************************************************/
18// MMM KernelWrappers for Partial Specialization to OpenMP
19template <class Scalar,
20 class LocalOrdinal,
21 class GlobalOrdinal, class LocalOrdinalViewType>
22struct KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
23 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
24 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
25 const LocalOrdinalViewType& Acol2Brow,
26 const LocalOrdinalViewType& Acol2Irow,
27 const LocalOrdinalViewType& Bcol2Ccol,
28 const LocalOrdinalViewType& Icol2Ccol,
29 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
30 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
31 const std::string& label = std::string(),
32 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
33
34 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
35 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
36 const LocalOrdinalViewType& Acol2Brow,
37 const LocalOrdinalViewType& Acol2Irow,
38 const LocalOrdinalViewType& Bcol2Ccol,
39 const LocalOrdinalViewType& Icol2Ccol,
40 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
41 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
42 const std::string& label = std::string(),
43 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
44};
45
46// Jacobi KernelWrappers for Partial Specialization to OpenMP
47template <class Scalar,
48 class LocalOrdinal,
49 class GlobalOrdinal, class LocalOrdinalViewType>
50struct KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
51 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
52 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
53 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
54 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
55 const LocalOrdinalViewType& Acol2Brow,
56 const LocalOrdinalViewType& Acol2Irow,
57 const LocalOrdinalViewType& Bcol2Ccol,
58 const LocalOrdinalViewType& Icol2Ccol,
59 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
60 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
61 const std::string& label = std::string(),
62 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
63
64 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
65 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
66 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
67 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
68 const LocalOrdinalViewType& Acol2Brow,
69 const LocalOrdinalViewType& Acol2Irow,
70 const LocalOrdinalViewType& Bcol2Ccol,
71 const LocalOrdinalViewType& Icol2Ccol,
72 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
73 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
74 const std::string& label = std::string(),
75 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
76
77 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
78 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
79 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
80 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
81 const LocalOrdinalViewType& Acol2Brow,
82 const LocalOrdinalViewType& Acol2Irow,
83 const LocalOrdinalViewType& Bcol2Ccol,
84 const LocalOrdinalViewType& Icol2Ccol,
85 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
86 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
87 const std::string& label = std::string(),
88 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
89};
90
91// Triple-Product KernelWrappers for Partial Specialization to OpenMP
92template <class Scalar,
93 class LocalOrdinal,
94 class GlobalOrdinal, class LocalOrdinalViewType>
95struct KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
96 static inline void mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
97 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
98 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
99 const LocalOrdinalViewType& Acol2Prow,
100 const LocalOrdinalViewType& Acol2PIrow,
101 const LocalOrdinalViewType& Pcol2Ccol,
102 const LocalOrdinalViewType& PIcol2Ccol,
103 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
104 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
105 const std::string& label = std::string(),
106 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
107
108 static inline void mult_R_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
109 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
110 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
111 const LocalOrdinalViewType& Acol2Prow,
112 const LocalOrdinalViewType& Acol2PIrow,
113 const LocalOrdinalViewType& Pcol2Ccol,
114 const LocalOrdinalViewType& PIcol2Ccol,
115 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
116 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
117 const std::string& label = std::string(),
118 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
119
120 static inline void mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
121 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
122 const LocalOrdinalViewType& Acol2Prow,
123 const LocalOrdinalViewType& Acol2PIrow,
124 const LocalOrdinalViewType& Pcol2Ccol,
125 const LocalOrdinalViewType& PIcol2Ccol,
126 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
127 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
128 const std::string& label = std::string(),
129 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
130
131 static inline void mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
132 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
133 const LocalOrdinalViewType& Acol2Prow,
134 const LocalOrdinalViewType& Acol2PIrow,
135 const LocalOrdinalViewType& Pcol2Ccol,
136 const LocalOrdinalViewType& PIcol2Ccol,
137 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
138 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
139 const std::string& label = std::string(),
140 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
141};
142
143/*********************************************************************************************************/
144template <class Scalar,
145 class LocalOrdinal,
146 class GlobalOrdinal,
147 class LocalOrdinalViewType>
148void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
149 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
150 const LocalOrdinalViewType& Acol2Brow,
151 const LocalOrdinalViewType& Acol2Irow,
152 const LocalOrdinalViewType& Bcol2Ccol,
153 const LocalOrdinalViewType& Icol2Ccol,
154 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
155 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
156 const std::string& label,
157 const Teuchos::RCP<Teuchos::ParameterList>& params) {
158#ifdef HAVE_TPETRA_MMM_TIMINGS
159 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
160 using Teuchos::TimeMonitor;
161 Teuchos::RCP<TimeMonitor> MM;
162#endif
163
164 // Node-specific code
165 std::string nodename("OpenMP");
166
167 // Lots and lots of typedefs
168 using Teuchos::RCP;
170 typedef typename KCRS::device_type device_t;
171 typedef typename KCRS::StaticCrsGraphType graph_t;
172 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
173 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
174 typedef typename KCRS::values_type::non_const_type scalar_view_t;
175
176 // Options
177 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
178 std::string myalg("SPGEMM_KK_MEMORY");
179
180 if (!params.is_null()) {
181 if (params->isParameter("openmp: algorithm"))
182 myalg = params->get("openmp: algorithm", myalg);
183 if (params->isParameter("openmp: team work size"))
184 team_work_size = params->get("openmp: team work size", team_work_size);
185 }
186
187 if (myalg == "LTG") {
188 // Use the LTG kernel if requested
189 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_newmatrix_LowThreadGustavsonKernel(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
190 } else {
191 // Use the Kokkos-Kernels OpenMP Kernel
192#ifdef HAVE_TPETRA_MMM_TIMINGS
193 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPWrapper"))));
194#endif
195 // KokkosKernelsHandle
196 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
197 typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
198 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>
199 KernelHandle;
200
201 // Grab the Kokkos::SparseCrsMatrices
202 const KCRS Ak = Aview.origMatrix->getLocalMatrixDevice();
203 // const KCRS & Bk = Bview.origMatrix->getLocalMatrixDevice();
204
205 // Get the algorithm mode
206 std::string alg = nodename + std::string(" algorithm");
207 // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
208 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
209 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
210
211 // Merge the B and Bimport matrices
212 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
213
214#ifdef HAVE_TPETRA_MMM_TIMINGS
215 MM = Teuchos::null;
216 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPCore"))));
217#endif
218
219 // Do the multiply on whatever we've got
220 typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
221 // typename KernelHandle::nnz_lno_t BnumRows = Bmerged->numRows();
222 // typename KernelHandle::nnz_lno_t BnumCols = Bmerged->numCols();
223 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
224 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
225
226 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
227 lno_nnz_view_t entriesC;
228 scalar_view_t valuesC;
229 KernelHandle kh;
230 kh.create_spgemm_handle(alg_enum);
231 kh.set_team_work_size(team_work_size);
232 // KokkosSparse::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged->graph.row_map,Bmerged->graph.entries,false,row_mapC);
233 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Ak.graph.row_map, Ak.graph.entries, false, Bmerged.graph.row_map, Bmerged.graph.entries, false, row_mapC);
234
235 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
236 if (c_nnz_size) {
237 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
238 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
239 }
240 // KokkosSparse::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged->graph.row_map,Bmerged->graph.entries,Bmerged->values,false,row_mapC,entriesC,valuesC);
241 KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Ak.graph.row_map, Ak.graph.entries, Ak.values, false, Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false, row_mapC, entriesC, valuesC);
242 kh.destroy_spgemm_handle();
243
244#ifdef HAVE_TPETRA_MMM_TIMINGS
245 MM = Teuchos::null;
246 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPSort"))));
247#endif
248 // Sort & set values
249 if (params.is_null() || params->get("sort entries", true)) {
250 // Tpetra's OpenMP SpGEMM results in almost sorted matrices. Use shell sort.
251 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC, ::KokkosSparse::SortAlgorithm::SHELL);
252 }
253 C.setAllValues(row_mapC, entriesC, valuesC);
254
255 } // end OMP KokkosKernels loop
256
257#ifdef HAVE_TPETRA_MMM_TIMINGS
258 MM = Teuchos::null;
259 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPESFC"))));
260#endif
261
262 // Final Fillcomplete
263 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
264 labelList->set("Timer Label", label);
265 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
266 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
267 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
268
269#if 0
270 {
271 Teuchos::ArrayRCP< const size_t > Crowptr;
272 Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
273 Teuchos::ArrayRCP< const Scalar > Cvalues;
274 C.getAllValues(Crowptr,Ccolind,Cvalues);
275
276 // DEBUG
277 int MyPID = C->getComm()->getRank();
278 printf("[%d] Crowptr = ",MyPID);
279 for(size_t i=0; i<(size_t) Crowptr.size(); i++) {
280 printf("%3d ",(int)Crowptr.getConst()[i]);
281 }
282 printf("\n");
283 printf("[%d] Ccolind = ",MyPID);
284 for(size_t i=0; i<(size_t)Ccolind.size(); i++) {
285 printf("%3d ",(int)Ccolind.getConst()[i]);
286 }
287 printf("\n");
288 fflush(stdout);
289 // END DEBUG
290 }
291#endif
292}
293
294/*********************************************************************************************************/
295template <class Scalar,
296 class LocalOrdinal,
297 class GlobalOrdinal,
298 class LocalOrdinalViewType>
299void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
300 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
301 const LocalOrdinalViewType& Acol2Brow,
302 const LocalOrdinalViewType& Acol2Irow,
303 const LocalOrdinalViewType& Bcol2Ccol,
304 const LocalOrdinalViewType& Icol2Ccol,
305 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
306 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
307 const std::string& label,
308 const Teuchos::RCP<Teuchos::ParameterList>& params) {
309#ifdef HAVE_TPETRA_MMM_TIMINGS
310 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
311 using Teuchos::TimeMonitor;
312 Teuchos::RCP<TimeMonitor> MM;
313#endif
314
315 // Lots and lots of typedefs
316 using Teuchos::RCP;
317
318 // Options
319 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
320 std::string myalg("LTG");
321 if (!params.is_null()) {
322 if (params->isParameter("openmp: algorithm"))
323 myalg = params->get("openmp: algorithm", myalg);
324 if (params->isParameter("openmp: team work size"))
325 team_work_size = params->get("openmp: team work size", team_work_size);
326 }
327
328 if (myalg == "LTG") {
329 // Use the LTG kernel if requested
330 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_reuse_LowThreadGustavsonKernel(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
331 } else {
332 throw std::runtime_error("Tpetra::MatrixMatrix::MMM reuse unknown kernel");
333 }
334
335#ifdef HAVE_TPETRA_MMM_TIMINGS
336 MM = Teuchos::null;
337 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse OpenMPESFC"))));
338#endif
339 C.fillComplete(C.getDomainMap(), C.getRangeMap());
340}
341
342/*********************************************************************************************************/
343template <class Scalar,
344 class LocalOrdinal,
345 class GlobalOrdinal,
346 class LocalOrdinalViewType>
347void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
348 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
349 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
350 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
351 const LocalOrdinalViewType& Acol2Brow,
352 const LocalOrdinalViewType& Acol2Irow,
353 const LocalOrdinalViewType& Bcol2Ccol,
354 const LocalOrdinalViewType& Icol2Ccol,
355 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
356 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
357 const std::string& label,
358 const Teuchos::RCP<Teuchos::ParameterList>& params) {
359#ifdef HAVE_TPETRA_MMM_TIMINGS
360 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
361 using Teuchos::TimeMonitor;
362 Teuchos::RCP<TimeMonitor> MM;
363#endif
364
365 // Node-specific code
366 using Teuchos::RCP;
367
368 // Options
369 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
370 std::string myalg("LTG");
371 if (!params.is_null()) {
372 if (params->isParameter("openmp: jacobi algorithm"))
373 myalg = params->get("openmp: jacobi algorithm", myalg);
374 if (params->isParameter("openmp: team work size"))
375 team_work_size = params->get("openmp: team work size", team_work_size);
376 }
377
378 if (myalg == "LTG") {
379 // Use the LTG kernel if requested
380 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_LowThreadGustavsonKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
381 } else if (myalg == "MSAK") {
382 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
383 } else if (myalg == "KK") {
384 jacobi_A_B_newmatrix_KokkosKernels(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
385 } else {
386 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
387 }
388
389#ifdef HAVE_TPETRA_MMM_TIMINGS
390 MM = Teuchos::null;
391 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
392#endif
393
394 // Final Fillcomplete
395 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
396 labelList->set("Timer Label", label);
397 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
398
399 // NOTE: MSAK already fillCompletes, so we have to check here
400 if (!C.isFillComplete()) {
401 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
402 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
403 }
404}
405
406/*********************************************************************************************************/
407template <class Scalar,
408 class LocalOrdinal,
409 class GlobalOrdinal,
410 class LocalOrdinalViewType>
411void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
412 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
413 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
414 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
415 const LocalOrdinalViewType& Acol2Brow,
416 const LocalOrdinalViewType& Acol2Irow,
417 const LocalOrdinalViewType& Bcol2Ccol,
418 const LocalOrdinalViewType& Icol2Ccol,
419 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
420 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
421 const std::string& label,
422 const Teuchos::RCP<Teuchos::ParameterList>& params) {
423#ifdef HAVE_TPETRA_MMM_TIMINGS
424 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
425 using Teuchos::TimeMonitor;
426 Teuchos::RCP<TimeMonitor> MM;
427#endif
428
429 // Lots and lots of typedefs
430 using Teuchos::RCP;
431
432 // Options
433 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
434 std::string myalg("LTG");
435 if (!params.is_null()) {
436 if (params->isParameter("openmp: jacobi algorithm"))
437 myalg = params->get("openmp: jacobi algorithm", myalg);
438 if (params->isParameter("openmp: team work size"))
439 team_work_size = params->get("openmp: team work size", team_work_size);
440 }
441
442 if (myalg == "LTG") {
443 // Use the LTG kernel if requested
444 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_reuse_LowThreadGustavsonKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
445 } else {
446 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi reuse unknown kernel");
447 }
448
449#ifdef HAVE_TPETRA_MMM_TIMINGS
450 MM = Teuchos::null;
451 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse OpenMPESFC"))));
452#endif
453 C.fillComplete(C.getDomainMap(), C.getRangeMap());
454}
455
456/*********************************************************************************************************/
457template <class Scalar,
458 class LocalOrdinal,
459 class GlobalOrdinal,
460 class LocalOrdinalViewType>
461void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
462 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
463 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
464 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
465 const LocalOrdinalViewType& Acol2Brow,
466 const LocalOrdinalViewType& Acol2Irow,
467 const LocalOrdinalViewType& Bcol2Ccol,
468 const LocalOrdinalViewType& Icol2Ccol,
469 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
470 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
471 const std::string& label,
472 const Teuchos::RCP<Teuchos::ParameterList>& params) {
473#ifdef HAVE_TPETRA_MMM_TIMINGS
474 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
475 using Teuchos::TimeMonitor;
476 Teuchos::RCP<TimeMonitor> MM;
477#endif
478
479 // Check if the diagonal entries exist in debug mode
480 const bool debug = Tpetra::Details::Behavior::debug();
481 if (debug) {
482 auto rowMap = Aview.origMatrix->getRowMap();
484 Aview.origMatrix->getLocalDiagCopy(diags);
485 size_t diagLength = rowMap->getLocalNumElements();
486 Teuchos::Array<Scalar> diagonal(diagLength);
487 diags.get1dCopy(diagonal());
488
489 for (size_t i = 0; i < diagLength; ++i) {
490 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
491 std::runtime_error,
492 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl
493 << "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
494 }
495 }
496
497 // Usings
498 using device_t = typename Tpetra::KokkosCompat::KokkosOpenMPWrapperNode::device_type;
500 using graph_t = typename matrix_t::StaticCrsGraphType;
501 using lno_view_t = typename graph_t::row_map_type::non_const_type;
502 using c_lno_view_t = typename graph_t::row_map_type::const_type;
503 using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
504 using scalar_view_t = typename matrix_t::values_type::non_const_type;
505
506 // KokkosKernels handle
507 using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
508 typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
509 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
510
511 // Get the rowPtr, colInd and vals of importMatrix
512 c_lno_view_t Irowptr;
513 lno_nnz_view_t Icolind;
514 scalar_view_t Ivals;
515 if (!Bview.importMatrix.is_null()) {
516 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
517 Irowptr = lclB.graph.row_map;
518 Icolind = lclB.graph.entries;
519 Ivals = lclB.values;
520 }
521
522 // Merge the B and Bimport matrices
523 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
524
525 // Get the properties and arrays of input matrices
526 const matrix_t Amat = Aview.origMatrix->getLocalMatrixDevice();
527 const matrix_t Bmat = Bview.origMatrix->getLocalMatrixDevice();
528
529 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
530 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
531 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
532
533 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
534 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
535 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
536
537 // Arrays of the output matrix
538 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
539 lno_nnz_view_t entriesC;
540 scalar_view_t valuesC;
541
542 // Options
543 int team_work_size = 16;
544 std::string myalg("SPGEMM_KK_MEMORY");
545 if (!params.is_null()) {
546 if (params->isParameter("cuda: algorithm"))
547 myalg = params->get("cuda: algorithm", myalg);
548 if (params->isParameter("cuda: team work size"))
549 team_work_size = params->get("cuda: team work size", team_work_size);
550 }
551
552 // Get the algorithm mode
553 std::string nodename("OpenMP");
554 std::string alg = nodename + std::string(" algorithm");
555 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
556 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
557
558 // KokkosKernels call
559 handle_t kh;
560 kh.create_spgemm_handle(alg_enum);
561 kh.set_team_work_size(team_work_size);
562
563 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
564 Arowptr, Acolind, false,
565 Browptr, Bcolind, false,
566 row_mapC);
567
568 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
569 if (c_nnz_size) {
570 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
571 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
572 }
573
574 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
575 Arowptr, Acolind, Avals, false,
576 Browptr, Bcolind, Bvals, false,
577 row_mapC, entriesC, valuesC,
578 omega, Dinv.getLocalViewDevice(Tpetra::Access::ReadOnly));
579 kh.destroy_spgemm_handle();
580
581#ifdef HAVE_TPETRA_MMM_TIMINGS
582 MM = Teuchos::null;
583 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPSort"))));
584#endif
585
586 // Sort & set values
587 if (params.is_null() || params->get("sort entries", true)) {
588 // Tpetra's OpenMP SpGEMM results in almost sorted matrices. Use shell sort.
589 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC, ::KokkosSparse::SortAlgorithm::SHELL);
590 }
591 C.setAllValues(row_mapC, entriesC, valuesC);
592
593#ifdef HAVE_TPETRA_MMM_TIMINGS
594 MM = Teuchos::null;
595 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
596#endif
597
598 // Final Fillcomplete
599 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
600 labelList->set("Timer Label", label);
601 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
602 Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
603 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
604}
605
606/*********************************************************************************************************/
607template <class Scalar,
608 class LocalOrdinal,
609 class GlobalOrdinal,
610 class LocalOrdinalViewType>
611void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
612 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
613 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
614 const LocalOrdinalViewType& Acol2Prow,
615 const LocalOrdinalViewType& Acol2PIrow,
616 const LocalOrdinalViewType& Pcol2Accol,
617 const LocalOrdinalViewType& PIcol2Accol,
618 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
619 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
620 const std::string& label,
621 const Teuchos::RCP<Teuchos::ParameterList>& params) {
622#ifdef HAVE_TPETRA_MMM_TIMINGS
623 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
624 using Teuchos::TimeMonitor;
625 Teuchos::RCP<TimeMonitor> MM;
626#endif
627
628 // Node-specific code
629 std::string nodename("OpenMP");
630
631 // Options
632 std::string myalg("LTG");
633
634 if (!params.is_null()) {
635 if (params->isParameter("openmp: rap algorithm"))
636 myalg = params->get("openmp: rap algorithm", myalg);
637 }
638
639 if (myalg == "LTG") {
640 // Use the LTG kernel if requested
641 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol, PIcol2Accol, Ac, Acimport, label, params);
642 } else {
643 throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
644 }
645}
646
647/*********************************************************************************************************/
648template <class Scalar,
649 class LocalOrdinal,
650 class GlobalOrdinal,
651 class LocalOrdinalViewType>
652void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_R_A_P_reuse_kernel_wrapper(
653 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
654 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
655 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
656
657 const LocalOrdinalViewType& Acol2Prow,
658 const LocalOrdinalViewType& Acol2Irow,
659 const LocalOrdinalViewType& Pcol2Ccol,
660 const LocalOrdinalViewType& Icol2Ccol,
661 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
662 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
663 const std::string& label,
664 const Teuchos::RCP<Teuchos::ParameterList>& params) {
665#ifdef HAVE_TPETRA_MMM_TIMINGS
666 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
667 using Teuchos::TimeMonitor;
668 Teuchos::RCP<TimeMonitor> MM;
669#endif
670
671 // Lots and lots of typedefs
672 using Teuchos::RCP;
673
674 // Options
675 std::string myalg("LTG");
676 if (!params.is_null()) {
677 if (params->isParameter("openmp: rap algorithm"))
678 myalg = params->get("openmp: rap algorithm", myalg);
679 }
680
681 if (myalg == "LTG") {
682 // Use the LTG kernel if requested
683 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2Irow, Pcol2Ccol, Icol2Ccol, C, Cimport, label, params);
684 } else {
685 throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
686 }
687
688#ifdef HAVE_TPETRA_MMM_TIMINGS
689 MM = Teuchos::null;
690 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("RAP Reuse OpenMPESFC"))));
691#endif
692 C.fillComplete(C.getDomainMap(), C.getRangeMap());
693}
694
695/*********************************************************************************************************/
696template <class Scalar,
697 class LocalOrdinal,
698 class GlobalOrdinal,
699 class LocalOrdinalViewType>
700void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
701
702 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
703 const LocalOrdinalViewType& Acol2Prow,
704 const LocalOrdinalViewType& Acol2PIrow,
705 const LocalOrdinalViewType& Pcol2Accol,
706 const LocalOrdinalViewType& PIcol2Accol,
707 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
708 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
709 const std::string& label,
710 const Teuchos::RCP<Teuchos::ParameterList>& params) {
711#ifdef HAVE_TPETRA_MMM_TIMINGS
712 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
713 using Teuchos::TimeMonitor;
714 Teuchos::RCP<TimeMonitor> MM;
715#endif
716
717 // Node-specific code
718 std::string nodename("OpenMP");
719
720 // Options
721 std::string myalg("LTG");
722
723 if (!params.is_null()) {
724 if (params->isParameter("openmp: ptap algorithm"))
725 myalg = params->get("openmp: ptap algorithm", myalg);
726 }
727
728 if (myalg == "LTG") {
729#ifdef HAVE_TPETRA_MMM_TIMINGS
730 MM = Teuchos::null;
731 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
732#endif
733
734 using Teuchos::ParameterList;
735 using Teuchos::RCP;
736 using LO = LocalOrdinal;
737 using GO = GlobalOrdinal;
738 using SC = Scalar;
739
740 // We don't need a kernel-level PTAP, we just transpose here
741 using transposer_type =
742 RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
743 transposer_type transposer(Pview.origMatrix, label + "XP: ");
744 RCP<ParameterList> transposeParams(new ParameterList);
745 if (!params.is_null()) {
746 transposeParams->set("compute global constants",
747 params->get("compute global constants: temporaries",
748 false));
749 }
750 transposeParams->set("sort", false);
751 RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
752 transposer.createTransposeLocal(transposeParams);
753 CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
754 Rview.origMatrix = Ptrans;
755
756 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel;
757 mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
758 PIcol2Accol, Ac, Acimport, label, params);
759 } else {
760 throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P newmatrix unknown kernel");
761 }
762}
763
764/*********************************************************************************************************/
765template <class Scalar,
766 class LocalOrdinal,
767 class GlobalOrdinal,
768 class LocalOrdinalViewType>
769void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
770
771 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
772 const LocalOrdinalViewType& Acol2Prow,
773 const LocalOrdinalViewType& Acol2PIrow,
774 const LocalOrdinalViewType& Pcol2Accol,
775 const LocalOrdinalViewType& PIcol2Accol,
776 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
777 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
778 const std::string& label,
779 const Teuchos::RCP<Teuchos::ParameterList>& params) {
780#ifdef HAVE_TPETRA_MMM_TIMINGS
781 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
782 using Teuchos::TimeMonitor;
783 Teuchos::RCP<TimeMonitor> MM;
784#endif
785
786 // Node-specific code
787 std::string nodename("OpenMP");
788
789 // Options
790 std::string myalg("LTG");
791
792 if (!params.is_null()) {
793 if (params->isParameter("openmp: ptap algorithm"))
794 myalg = params->get("openmp: ptap algorithm", myalg);
795 }
796
797 if (myalg == "LTG") {
798#ifdef HAVE_TPETRA_MMM_TIMINGS
799 MM = Teuchos::null;
800 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
801#endif
802
803 using Teuchos::ParameterList;
804 using Teuchos::RCP;
805 using LO = LocalOrdinal;
806 using GO = GlobalOrdinal;
807 using SC = Scalar;
808
809 // We don't need a kernel-level PTAP, we just transpose here
810 using transposer_type =
811 RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
812 transposer_type transposer(Pview.origMatrix, label + "XP: ");
813 RCP<ParameterList> transposeParams(new ParameterList);
814 if (!params.is_null()) {
815 transposeParams->set("compute global constants",
816 params->get("compute global constants: temporaries",
817 false));
818 }
819 transposeParams->set("sort", false);
820 RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
821 transposer.createTransposeLocal(transposeParams);
822 CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
823 Rview.origMatrix = Ptrans;
824
825 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel;
826 mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
827 PIcol2Accol, Ac, Acimport, label, params);
828 } else {
829 throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P reuse unknown kernel");
830 }
831 Ac.fillComplete(Ac.getDomainMap(), Ac.getRangeMap());
832}
833
834} // namespace MMdetails
835} // namespace Tpetra
836
837#endif // OpenMP
838
839#endif
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Struct that holds views of the contents of a CrsMatrix.
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.