Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
TpetraExt_MatrixMatrix_OpenMP.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
11#define TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
12
13#ifdef HAVE_TPETRA_INST_OPENMP
14namespace Tpetra {
15namespace MMdetails {
16
17/*********************************************************************************************************/
18// MMM KernelWrappers for Partial Specialization to OpenMP
19template <class Scalar,
20 class LocalOrdinal,
21 class GlobalOrdinal, class LocalOrdinalViewType>
22struct KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
23 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
24 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
25 const LocalOrdinalViewType& Acol2Brow,
26 const LocalOrdinalViewType& Acol2Irow,
27 const LocalOrdinalViewType& Bcol2Ccol,
28 const LocalOrdinalViewType& Icol2Ccol,
29 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
30 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
31 const std::string& label = std::string(),
32 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
33
34 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
35 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
36 const LocalOrdinalViewType& Acol2Brow,
37 const LocalOrdinalViewType& Acol2Irow,
38 const LocalOrdinalViewType& Bcol2Ccol,
39 const LocalOrdinalViewType& Icol2Ccol,
40 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
41 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
42 const std::string& label = std::string(),
43 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
44};
45
46// Jacobi KernelWrappers for Partial Specialization to OpenMP
47template <class Scalar,
48 class LocalOrdinal,
49 class GlobalOrdinal, class LocalOrdinalViewType>
50struct KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
51 static inline void jacobi_A_B_newmatrix_kernel_wrapper(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
52 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
53 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
54 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
55 const LocalOrdinalViewType& Acol2Brow,
56 const LocalOrdinalViewType& Acol2Irow,
57 const LocalOrdinalViewType& Bcol2Ccol,
58 const LocalOrdinalViewType& Icol2Ccol,
59 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
60 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
61 const std::string& label = std::string(),
62 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
63
64 static inline void jacobi_A_B_reuse_kernel_wrapper(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
65 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
66 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
67 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
68 const LocalOrdinalViewType& Acol2Brow,
69 const LocalOrdinalViewType& Acol2Irow,
70 const LocalOrdinalViewType& Bcol2Ccol,
71 const LocalOrdinalViewType& Icol2Ccol,
72 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
73 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
74 const std::string& label = std::string(),
75 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
76
77 static inline void jacobi_A_B_newmatrix_KokkosKernels(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
78 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
79 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
80 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
81 const LocalOrdinalViewType& Acol2Brow,
82 const LocalOrdinalViewType& Acol2Irow,
83 const LocalOrdinalViewType& Bcol2Ccol,
84 const LocalOrdinalViewType& Icol2Ccol,
85 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
86 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
87 const std::string& label = std::string(),
88 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
89};
90
91// Triple-Product KernelWrappers for Partial Specialization to OpenMP
92template <class Scalar,
93 class LocalOrdinal,
94 class GlobalOrdinal, class LocalOrdinalViewType>
95struct KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
96 static inline void mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
97 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
98 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
99 const LocalOrdinalViewType& Acol2Prow,
100 const LocalOrdinalViewType& Acol2PIrow,
101 const LocalOrdinalViewType& Pcol2Ccol,
102 const LocalOrdinalViewType& PIcol2Ccol,
103 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
104 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
105 const std::string& label = std::string(),
106 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
107
108 static inline void mult_R_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
109 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
110 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
111 const LocalOrdinalViewType& Acol2Prow,
112 const LocalOrdinalViewType& Acol2PIrow,
113 const LocalOrdinalViewType& Pcol2Ccol,
114 const LocalOrdinalViewType& PIcol2Ccol,
115 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
116 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
117 const std::string& label = std::string(),
118 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
119
120 static inline void mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
121 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
122 const LocalOrdinalViewType& Acol2Prow,
123 const LocalOrdinalViewType& Acol2PIrow,
124 const LocalOrdinalViewType& Pcol2Ccol,
125 const LocalOrdinalViewType& PIcol2Ccol,
126 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
127 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
128 const std::string& label = std::string(),
129 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
130
131 static inline void mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
132 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
133 const LocalOrdinalViewType& Acol2Prow,
134 const LocalOrdinalViewType& Acol2PIrow,
135 const LocalOrdinalViewType& Pcol2Ccol,
136 const LocalOrdinalViewType& PIcol2Ccol,
137 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
138 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
139 const std::string& label = std::string(),
140 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
141};
142
143/*********************************************************************************************************/
144template <class Scalar,
145 class LocalOrdinal,
146 class GlobalOrdinal,
147 class LocalOrdinalViewType>
148void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
149 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
150 const LocalOrdinalViewType& Acol2Brow,
151 const LocalOrdinalViewType& Acol2Irow,
152 const LocalOrdinalViewType& Bcol2Ccol,
153 const LocalOrdinalViewType& Icol2Ccol,
154 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
155 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
156 const std::string& label,
157 const Teuchos::RCP<Teuchos::ParameterList>& params) {
158#ifdef HAVE_TPETRA_MMM_TIMINGS
159 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
160 using Teuchos::TimeMonitor;
161 Teuchos::RCP<TimeMonitor> MM;
162#endif
163
164 // Node-specific code
165 std::string nodename("OpenMP");
166
167 // Lots and lots of typedefs
168 using Teuchos::RCP;
170 typedef typename KCRS::device_type device_t;
171 typedef typename KCRS::StaticCrsGraphType graph_t;
172 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
173 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
174 typedef typename KCRS::values_type::non_const_type scalar_view_t;
175
176 // Options
177 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
178 std::string myalg("SPGEMM_KK_MEMORY");
179
180 if (!params.is_null()) {
181 if (params->isParameter("openmp: algorithm"))
182 myalg = params->get("openmp: algorithm", myalg);
183 if (params->isParameter("openmp: team work size"))
184 team_work_size = params->get("openmp: team work size", team_work_size);
185 }
186
187 if (myalg == "LTG") {
188 // Use the LTG kernel if requested
189 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_newmatrix_LowThreadGustavsonKernel(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
190 } else {
191 // Use the Kokkos-Kernels OpenMP Kernel
192#ifdef HAVE_TPETRA_MMM_TIMINGS
193 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPWrapper"))));
194#endif
195 // KokkosKernelsHandle
196 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
197 typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
198 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>
199 KernelHandle;
200
201 // Grab the Kokkos::SparseCrsMatrices
202 const KCRS Ak = Aview.origMatrix->getLocalMatrixDevice();
203 // const KCRS & Bk = Bview.origMatrix->getLocalMatrixDevice();
204
205 // Get the algorithm mode
206 std::string alg = nodename + std::string(" algorithm");
207 // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
208 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
209 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
210
211 // Merge the B and Bimport matrices
212 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
213
214#ifdef HAVE_TPETRA_MMM_TIMINGS
215 MM = Teuchos::null;
216 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPCore"))));
217#endif
218
219 // Do the multiply on whatever we've got
220 typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
221 // typename KernelHandle::nnz_lno_t BnumRows = Bmerged->numRows();
222 // typename KernelHandle::nnz_lno_t BnumCols = Bmerged->numCols();
223 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
224 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
225
226 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
227 lno_nnz_view_t entriesC;
228 scalar_view_t valuesC;
229 KernelHandle kh;
230 kh.create_spgemm_handle(alg_enum);
231 kh.set_team_work_size(team_work_size);
232 // KokkosSparse::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged->graph.row_map,Bmerged->graph.entries,false,row_mapC);
233 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Ak.graph.row_map, Ak.graph.entries, false, Bmerged.graph.row_map, Bmerged.graph.entries, false, row_mapC);
234
235 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
236 if (c_nnz_size) {
237 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
238 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
239 }
240 // KokkosSparse::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged->graph.row_map,Bmerged->graph.entries,Bmerged->values,false,row_mapC,entriesC,valuesC);
241 KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Ak.graph.row_map, Ak.graph.entries, Ak.values, false, Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false, row_mapC, entriesC, valuesC);
242 kh.destroy_spgemm_handle();
243
244#ifdef HAVE_TPETRA_MMM_TIMINGS
245 MM = Teuchos::null;
246 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPSort"))));
247#endif
248 // Sort & set values
249 if (params.is_null() || params->get("sort entries", true)) {
250 // Tpetra's OpenMP SpGEMM results in almost sorted matrices. Use shell sort.
251 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
252 }
253 C.setAllValues(row_mapC, entriesC, valuesC);
254
255 } // end OMP KokkosKernels loop
256
257#ifdef HAVE_TPETRA_MMM_TIMINGS
258 MM = Teuchos::null;
259 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPESFC"))));
260#endif
261
262 // Final Fillcomplete
263 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
264 labelList->set("Timer Label", label);
265 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
266 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
267 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
268
269#if 0
270 {
271 Teuchos::ArrayRCP< const size_t > Crowptr;
272 Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
273 Teuchos::ArrayRCP< const Scalar > Cvalues;
274 C.getAllValues(Crowptr,Ccolind,Cvalues);
275
276 // DEBUG
277 int MyPID = C->getComm()->getRank();
278 printf("[%d] Crowptr = ",MyPID);
279 for(size_t i=0; i<(size_t) Crowptr.size(); i++) {
280 printf("%3d ",(int)Crowptr.getConst()[i]);
281 }
282 printf("\n");
283 printf("[%d] Ccolind = ",MyPID);
284 for(size_t i=0; i<(size_t)Ccolind.size(); i++) {
285 printf("%3d ",(int)Ccolind.getConst()[i]);
286 }
287 printf("\n");
288 fflush(stdout);
289 // END DEBUG
290 }
291#endif
292}
293
294/*********************************************************************************************************/
295template <class Scalar,
296 class LocalOrdinal,
297 class GlobalOrdinal,
298 class LocalOrdinalViewType>
299void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
300 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
301 const LocalOrdinalViewType& Acol2Brow,
302 const LocalOrdinalViewType& Acol2Irow,
303 const LocalOrdinalViewType& Bcol2Ccol,
304 const LocalOrdinalViewType& Icol2Ccol,
305 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
306 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
307 const std::string& label,
308 const Teuchos::RCP<Teuchos::ParameterList>& params) {
309#ifdef HAVE_TPETRA_MMM_TIMINGS
310 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
311 using Teuchos::TimeMonitor;
312 Teuchos::RCP<TimeMonitor> MM;
313#endif
314
315 // Lots and lots of typedefs
316 using Teuchos::RCP;
317
318 // Options
319 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
320 std::string myalg("LTG");
321 if (!params.is_null()) {
322 if (params->isParameter("openmp: algorithm"))
323 myalg = params->get("openmp: algorithm", myalg);
324 if (params->isParameter("openmp: team work size"))
325 team_work_size = params->get("openmp: team work size", team_work_size);
326 }
327
328 if (myalg == "LTG") {
329 // Use the LTG kernel if requested
330 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_reuse_LowThreadGustavsonKernel(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
331 } else {
332 throw std::runtime_error("Tpetra::MatrixMatrix::MMM reuse unknown kernel");
333 }
334
335#ifdef HAVE_TPETRA_MMM_TIMINGS
336 MM = Teuchos::null;
337 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse OpenMPESFC"))));
338#endif
339 C.fillComplete(C.getDomainMap(), C.getRangeMap());
340}
341
342/*********************************************************************************************************/
343template <class Scalar,
344 class LocalOrdinal,
345 class GlobalOrdinal,
346 class LocalOrdinalViewType>
347void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
348 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
349 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
350 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
351 const LocalOrdinalViewType& Acol2Brow,
352 const LocalOrdinalViewType& Acol2Irow,
353 const LocalOrdinalViewType& Bcol2Ccol,
354 const LocalOrdinalViewType& Icol2Ccol,
355 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
356 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
357 const std::string& label,
358 const Teuchos::RCP<Teuchos::ParameterList>& params) {
359#ifdef HAVE_TPETRA_MMM_TIMINGS
360 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
361 using Teuchos::TimeMonitor;
362 Teuchos::RCP<TimeMonitor> MM;
363#endif
364
365 // Node-specific code
366 using Teuchos::RCP;
367
368 // Options
369 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
370 std::string myalg("LTG");
371 if (!params.is_null()) {
372 if (params->isParameter("openmp: jacobi algorithm"))
373 myalg = params->get("openmp: jacobi algorithm", myalg);
374 if (params->isParameter("openmp: team work size"))
375 team_work_size = params->get("openmp: team work size", team_work_size);
376 }
377
378 if (myalg == "LTG") {
379 // Use the LTG kernel if requested
380 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_LowThreadGustavsonKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
381 } else if (myalg == "MSAK") {
382 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
383 } else if (myalg == "KK") {
384 jacobi_A_B_newmatrix_KokkosKernels(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
385 } else {
386 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
387 }
388
389#ifdef HAVE_TPETRA_MMM_TIMINGS
390 MM = Teuchos::null;
391 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
392#endif
393
394 // Final Fillcomplete
395 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
396 labelList->set("Timer Label", label);
397 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
398
399 // NOTE: MSAK already fillCompletes, so we have to check here
400 if (!C.isFillComplete()) {
401 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
402 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
403 }
404}
405
406/*********************************************************************************************************/
407template <class Scalar,
408 class LocalOrdinal,
409 class GlobalOrdinal,
410 class LocalOrdinalViewType>
411void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
412 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
413 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
414 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
415 const LocalOrdinalViewType& Acol2Brow,
416 const LocalOrdinalViewType& Acol2Irow,
417 const LocalOrdinalViewType& Bcol2Ccol,
418 const LocalOrdinalViewType& Icol2Ccol,
419 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
420 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
421 const std::string& label,
422 const Teuchos::RCP<Teuchos::ParameterList>& params) {
423#ifdef HAVE_TPETRA_MMM_TIMINGS
424 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
425 using Teuchos::TimeMonitor;
426 Teuchos::RCP<TimeMonitor> MM;
427#endif
428
429 // Lots and lots of typedefs
430 using Teuchos::RCP;
431
432 // Options
433 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
434 std::string myalg("LTG");
435 if (!params.is_null()) {
436 if (params->isParameter("openmp: jacobi algorithm"))
437 myalg = params->get("openmp: jacobi algorithm", myalg);
438 if (params->isParameter("openmp: team work size"))
439 team_work_size = params->get("openmp: team work size", team_work_size);
440 }
441
442 if (myalg == "LTG") {
443 // Use the LTG kernel if requested
444 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_reuse_LowThreadGustavsonKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
445 } else {
446 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi reuse unknown kernel");
447 }
448
449#ifdef HAVE_TPETRA_MMM_TIMINGS
450 MM = Teuchos::null;
451 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse OpenMPESFC"))));
452#endif
453 C.fillComplete(C.getDomainMap(), C.getRangeMap());
454}
455
456/*********************************************************************************************************/
457template <class Scalar,
458 class LocalOrdinal,
459 class GlobalOrdinal,
460 class LocalOrdinalViewType>
461void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
462 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
463 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
464 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
465 const LocalOrdinalViewType& Acol2Brow,
466 const LocalOrdinalViewType& Acol2Irow,
467 const LocalOrdinalViewType& Bcol2Ccol,
468 const LocalOrdinalViewType& Icol2Ccol,
469 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
470 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
471 const std::string& label,
472 const Teuchos::RCP<Teuchos::ParameterList>& params) {
473#ifdef HAVE_TPETRA_MMM_TIMINGS
474 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
475 using Teuchos::TimeMonitor;
476 Teuchos::RCP<TimeMonitor> MM;
477#endif
478
479 // Check if the diagonal entries exist in debug mode
480 const bool debug = Tpetra::Details::Behavior::debug();
481 if (debug) {
482 auto rowMap = Aview.origMatrix->getRowMap();
484 Aview.origMatrix->getLocalDiagCopy(diags);
485 size_t diagLength = rowMap->getLocalNumElements();
486 Teuchos::Array<Scalar> diagonal(diagLength);
487 diags.get1dCopy(diagonal());
488
489 for (size_t i = 0; i < diagLength; ++i) {
490 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
491 std::runtime_error,
492 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl
493 << "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
494 }
495 }
496
497 // Usings
498 using device_t = typename Tpetra::KokkosCompat::KokkosOpenMPWrapperNode::device_type;
500 using graph_t = typename matrix_t::StaticCrsGraphType;
501 using lno_view_t = typename graph_t::row_map_type::non_const_type;
502 using c_lno_view_t = typename graph_t::row_map_type::const_type;
503 using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
504 using scalar_view_t = typename matrix_t::values_type::non_const_type;
505
506 // KokkosKernels handle
507 using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
508 typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
509 typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
510
511 // Get the rowPtr, colInd and vals of importMatrix
512 c_lno_view_t Irowptr;
513 lno_nnz_view_t Icolind;
514 scalar_view_t Ivals;
515 if (!Bview.importMatrix.is_null()) {
516 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
517 Irowptr = lclB.graph.row_map;
518 Icolind = lclB.graph.entries;
519 Ivals = lclB.values;
520 }
521
522 // Merge the B and Bimport matrices
523 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
524
525 // Get the properties and arrays of input matrices
526 const matrix_t Amat = Aview.origMatrix->getLocalMatrixDevice();
527 const matrix_t Bmat = Bview.origMatrix->getLocalMatrixDevice();
528
529 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
530 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
531 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
532
533 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
534 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
535 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
536
537 // Arrays of the output matrix
538 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
539 lno_nnz_view_t entriesC;
540 scalar_view_t valuesC;
541
542 // Options
543 int team_work_size = 16;
544 std::string myalg("SPGEMM_KK_MEMORY");
545 if (!params.is_null()) {
546 if (params->isParameter("cuda: algorithm"))
547 myalg = params->get("cuda: algorithm", myalg);
548 if (params->isParameter("cuda: team work size"))
549 team_work_size = params->get("cuda: team work size", team_work_size);
550 }
551
552 // Get the algorithm mode
553 std::string nodename("OpenMP");
554 std::string alg = nodename + std::string(" algorithm");
555 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
556 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
557
558 // KokkosKernels call
559 handle_t kh;
560 kh.create_spgemm_handle(alg_enum);
561 kh.set_team_work_size(team_work_size);
562
563 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
564 Arowptr, Acolind, false,
565 Browptr, Bcolind, false,
566 row_mapC);
567
568 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
569 if (c_nnz_size) {
570 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
571 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
572 }
573
574 const Scalar jacobiOmega = omega * Teuchos::ScalarTraits<Scalar>::one();
575 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
576 Arowptr, Acolind, Avals, false,
577 Browptr, Bcolind, Bvals, false,
578 row_mapC, entriesC, valuesC,
579 jacobiOmega, Dinv.getLocalViewDevice(Tpetra::Access::ReadOnly));
580 kh.destroy_spgemm_handle();
581
582#ifdef HAVE_TPETRA_MMM_TIMINGS
583 MM = Teuchos::null;
584 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPSort"))));
585#endif
586
587 // Sort & set values
588 if (params.is_null() || params->get("sort entries", true)) {
589 // Tpetra's OpenMP SpGEMM results in almost sorted matrices. Use shell sort.
590 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
591 }
592 C.setAllValues(row_mapC, entriesC, valuesC);
593
594#ifdef HAVE_TPETRA_MMM_TIMINGS
595 MM = Teuchos::null;
596 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
597#endif
598
599 // Final Fillcomplete
600 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
601 labelList->set("Timer Label", label);
602 if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
603 Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
604 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
605}
606
607/*********************************************************************************************************/
608template <class Scalar,
609 class LocalOrdinal,
610 class GlobalOrdinal,
611 class LocalOrdinalViewType>
612void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
613 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
614 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
615 const LocalOrdinalViewType& Acol2Prow,
616 const LocalOrdinalViewType& Acol2PIrow,
617 const LocalOrdinalViewType& Pcol2Accol,
618 const LocalOrdinalViewType& PIcol2Accol,
619 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
620 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
621 const std::string& label,
622 const Teuchos::RCP<Teuchos::ParameterList>& params) {
623#ifdef HAVE_TPETRA_MMM_TIMINGS
624 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
625 using Teuchos::TimeMonitor;
626 Teuchos::RCP<TimeMonitor> MM;
627#endif
628
629 // Node-specific code
630 std::string nodename("OpenMP");
631
632 // Options
633 std::string myalg("LTG");
634
635 if (!params.is_null()) {
636 if (params->isParameter("openmp: rap algorithm"))
637 myalg = params->get("openmp: rap algorithm", myalg);
638 }
639
640 if (myalg == "LTG") {
641 // Use the LTG kernel if requested
642 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol, PIcol2Accol, Ac, Acimport, label, params);
643 } else {
644 throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
645 }
646}
647
648/*********************************************************************************************************/
649template <class Scalar,
650 class LocalOrdinal,
651 class GlobalOrdinal,
652 class LocalOrdinalViewType>
653void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_R_A_P_reuse_kernel_wrapper(
654 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
655 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
656 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
657
658 const LocalOrdinalViewType& Acol2Prow,
659 const LocalOrdinalViewType& Acol2Irow,
660 const LocalOrdinalViewType& Pcol2Ccol,
661 const LocalOrdinalViewType& Icol2Ccol,
662 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
663 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
664 const std::string& label,
665 const Teuchos::RCP<Teuchos::ParameterList>& params) {
666#ifdef HAVE_TPETRA_MMM_TIMINGS
667 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
668 using Teuchos::TimeMonitor;
669 Teuchos::RCP<TimeMonitor> MM;
670#endif
671
672 // Lots and lots of typedefs
673 using Teuchos::RCP;
674
675 // Options
676 std::string myalg("LTG");
677 if (!params.is_null()) {
678 if (params->isParameter("openmp: rap algorithm"))
679 myalg = params->get("openmp: rap algorithm", myalg);
680 }
681
682 if (myalg == "LTG") {
683 // Use the LTG kernel if requested
684 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2Irow, Pcol2Ccol, Icol2Ccol, C, Cimport, label, params);
685 } else {
686 throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
687 }
688
689#ifdef HAVE_TPETRA_MMM_TIMINGS
690 MM = Teuchos::null;
691 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("RAP Reuse OpenMPESFC"))));
692#endif
693 C.fillComplete(C.getDomainMap(), C.getRangeMap());
694}
695
696/*********************************************************************************************************/
697template <class Scalar,
698 class LocalOrdinal,
699 class GlobalOrdinal,
700 class LocalOrdinalViewType>
701void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
702
703 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
704 const LocalOrdinalViewType& Acol2Prow,
705 const LocalOrdinalViewType& Acol2PIrow,
706 const LocalOrdinalViewType& Pcol2Accol,
707 const LocalOrdinalViewType& PIcol2Accol,
708 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
709 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
710 const std::string& label,
711 const Teuchos::RCP<Teuchos::ParameterList>& params) {
712#ifdef HAVE_TPETRA_MMM_TIMINGS
713 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
714 using Teuchos::TimeMonitor;
715 Teuchos::RCP<TimeMonitor> MM;
716#endif
717
718 // Node-specific code
719 std::string nodename("OpenMP");
720
721 // Options
722 std::string myalg("LTG");
723
724 if (!params.is_null()) {
725 if (params->isParameter("openmp: ptap algorithm"))
726 myalg = params->get("openmp: ptap algorithm", myalg);
727 }
728
729 if (myalg == "LTG") {
730#ifdef HAVE_TPETRA_MMM_TIMINGS
731 MM = Teuchos::null;
732 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
733#endif
734
735 using Teuchos::ParameterList;
736 using Teuchos::RCP;
737 using LO = LocalOrdinal;
738 using GO = GlobalOrdinal;
739 using SC = Scalar;
740
741 // We don't need a kernel-level PTAP, we just transpose here
742 using transposer_type =
743 RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
744 transposer_type transposer(Pview.origMatrix, label + "XP: ");
745 RCP<ParameterList> transposeParams(new ParameterList);
746 if (!params.is_null()) {
747 transposeParams->set("compute global constants",
748 params->get("compute global constants: temporaries",
749 false));
750 }
751 transposeParams->set("sort", false);
752 RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
753 transposer.createTransposeLocal(transposeParams);
754 CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
755 Rview.origMatrix = Ptrans;
756
757 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel;
758 mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
759 PIcol2Accol, Ac, Acimport, label, params);
760 } else {
761 throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P newmatrix unknown kernel");
762 }
763}
764
765/*********************************************************************************************************/
766template <class Scalar,
767 class LocalOrdinal,
768 class GlobalOrdinal,
769 class LocalOrdinalViewType>
770void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
771
772 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
773 const LocalOrdinalViewType& Acol2Prow,
774 const LocalOrdinalViewType& Acol2PIrow,
775 const LocalOrdinalViewType& Pcol2Accol,
776 const LocalOrdinalViewType& PIcol2Accol,
777 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
778 Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
779 const std::string& label,
780 const Teuchos::RCP<Teuchos::ParameterList>& params) {
781#ifdef HAVE_TPETRA_MMM_TIMINGS
782 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
783 using Teuchos::TimeMonitor;
784 Teuchos::RCP<TimeMonitor> MM;
785#endif
786
787 // Node-specific code
788 std::string nodename("OpenMP");
789
790 // Options
791 std::string myalg("LTG");
792
793 if (!params.is_null()) {
794 if (params->isParameter("openmp: ptap algorithm"))
795 myalg = params->get("openmp: ptap algorithm", myalg);
796 }
797
798 if (myalg == "LTG") {
799#ifdef HAVE_TPETRA_MMM_TIMINGS
800 MM = Teuchos::null;
801 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
802#endif
803
804 using Teuchos::ParameterList;
805 using Teuchos::RCP;
806 using LO = LocalOrdinal;
807 using GO = GlobalOrdinal;
808 using SC = Scalar;
809
810 // We don't need a kernel-level PTAP, we just transpose here
811 using transposer_type =
812 RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
813 transposer_type transposer(Pview.origMatrix, label + "XP: ");
814 RCP<ParameterList> transposeParams(new ParameterList);
815 if (!params.is_null()) {
816 transposeParams->set("compute global constants",
817 params->get("compute global constants: temporaries",
818 false));
819 }
820 transposeParams->set("sort", false);
821 RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
822 transposer.createTransposeLocal(transposeParams);
823 CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
824 Rview.origMatrix = Ptrans;
825
826 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel;
827 mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
828 PIcol2Accol, Ac, Acimport, label, params);
829 } else {
830 throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P reuse unknown kernel");
831 }
832 Ac.fillComplete(Ac.getDomainMap(), Ac.getRangeMap());
833}
834
835} // namespace MMdetails
836} // namespace Tpetra
837
838#endif // OpenMP
839
840#endif
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Struct that holds views of the contents of a CrsMatrix.
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.