Panzer Version of the Day
Loading...
Searching...
No Matches
Panzer_Interpolation.cpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Panzer: A partial differential equation assembly
4// engine for strongly coupled complex multiphysics systems
5//
6// Copyright 2011 NTESS and the Panzer contributors.
7// SPDX-License-Identifier: BSD-3-Clause
8// *****************************************************************************
9// @HEADER
10
14#include "Intrepid2_OrientationTools.hpp"
15#include "Intrepid2_LagrangianInterpolation.hpp"
16#ifdef PANZER_HAVE_EPETRA_STACK
17#include "Epetra_MpiComm.h"
18#endif
19
20// #define PANZER_INTERPOLATION_DEBUG_OUTPUT = 1
21
22namespace panzer {
23
24template <class Scalar,
25 class LocalOrdinal,
26 class GlobalOrdinal,
27 class Node>
28Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
29removeSmallEntries(Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& A,
30 typename Teuchos::ScalarTraits<Scalar>::magnitudeType tol) {
31 using crs_matrix = Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
32 using row_ptr_type = typename crs_matrix::local_graph_device_type::row_map_type::non_const_type;
33 using col_idx_type = typename crs_matrix::local_graph_device_type::entries_type::non_const_type;
34 using vals_type = typename crs_matrix::local_matrix_device_type::values_type;
35
36 using ATS = KokkosKernels::ArithTraits<Scalar>;
37 using impl_SC = typename ATS::val_type;
38 using impl_ATS = KokkosKernels::ArithTraits<impl_SC>;
39
40 auto lclA = A->getLocalMatrixDevice();
41
42 auto rowptr = row_ptr_type("rowptr", lclA.numRows() + 1);
43
44 Kokkos::parallel_for(
45 "removeSmallEntries::rowptr1",
46 Kokkos::RangePolicy<LocalOrdinal>(0, lclA.numRows()),
47 KOKKOS_LAMBDA(const LocalOrdinal rlid) {
48 auto row = lclA.row(rlid);
49 for (LocalOrdinal k = 0; k < row.length; ++k) {
50 if (impl_ATS::magnitude(row.value(k)) > tol) {
51 rowptr(rlid + 1) += 1;
52 }
53 }
54 });
55 LocalOrdinal nnz;
56 Kokkos::parallel_scan(
57 "removeSmallEntries::rowptr2",
58 Kokkos::RangePolicy<LocalOrdinal>(0, lclA.numRows()),
59 KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) {
60 partial_nnz += rowptr(rlid + 1);
61 if (is_final)
62 rowptr(rlid + 1) = partial_nnz;
63 },
64 nnz);
65
66 auto idx = col_idx_type("idx", nnz);
67 auto vals = vals_type("vals", nnz);
68
69 Kokkos::parallel_for(
70 "removeSmallEntries::indicesValues",
71 Kokkos::RangePolicy<LocalOrdinal>(0, lclA.numRows()),
72 KOKKOS_LAMBDA(const LocalOrdinal rlid) {
73 auto row = lclA.row(rlid);
74 auto I = rowptr(rlid);
75 for (LocalOrdinal k = 0; k < row.length; ++k) {
76 if (impl_ATS::magnitude(row.value(k)) > tol) {
77 idx(I) = row.colidx(k);
78 vals(I) = row.value(k);
79 I += 1;
80 }
81 }
82 });
83 Kokkos::fence();
84
85 auto newA = Teuchos::rcp(new crs_matrix(A->getRowMap(), A->getColMap(), rowptr, idx, vals));
86 newA->fillComplete(A->getDomainMap(),
87 A->getRangeMap());
88 return newA;
89}
90
91
92Teuchos::RCP<Thyra::LinearOpBase<double>> buildInterpolation(const Teuchos::RCP<const panzer::LinearObjFactory<panzer::Traits>> &linObjFactory,
93 const std::string &domain_basis_name, const std::string &range_basis_name,
94 Intrepid2::EOperator op, size_t worksetSize,
95 const bool matrixFree)
96{
97 using Teuchos::RCP;
98 using Teuchos::rcp;
99 using Teuchos::rcp_dynamic_cast;
100
101 using Scalar = double;
102
104#ifdef PANZER_HAVE_EPETRA_STACK
105 using epetraBlockedLinObjFactory = typename panzer::BlockedEpetraLinearObjFactory<panzer::Traits, LocalOrdinal>;
106#endif
107 using UGI = panzer::GlobalIndexer;
108
109 // must be able to cast to a block linear object factory
110 RCP<const tpetraBlockedLinObjFactory > tblof = rcp_dynamic_cast<const tpetraBlockedLinObjFactory >(linObjFactory);
111#ifdef PANZER_HAVE_EPETRA_STACK
112 RCP<const epetraBlockedLinObjFactory > eblof = rcp_dynamic_cast<const epetraBlockedLinObjFactory >(linObjFactory);
113#endif
114
115 RCP<const panzer::BlockedDOFManager> blockedDOFMngr;
116 if (tblof != Teuchos::null) {
117 blockedDOFMngr = tblof->getGlobalIndexer();
118#ifdef PANZER_HAVE_EPETRA_STACK
119 } else if (eblof != Teuchos::null) {
120 blockedDOFMngr = eblof->getGlobalIndexer();
121#endif
122 } else {
123 TEUCHOS_ASSERT(false);
124 }
125
126 // get global indexers for domain and range dofs
127 std::vector<RCP<UGI> > fieldDOFMngrs = blockedDOFMngr->getFieldDOFManagers();
128 int domainFieldNum = blockedDOFMngr->getFieldNum(domain_basis_name);
129 int rangeFieldNum = blockedDOFMngr->getFieldNum(range_basis_name);
130 int domainBlockIndex = blockedDOFMngr->getFieldBlock(domainFieldNum);
131 int rangeBlockIndex = blockedDOFMngr->getFieldBlock(rangeFieldNum);
132 RCP<panzer::DOFManager> domain_ugi = rcp_dynamic_cast<panzer::DOFManager>(blockedDOFMngr->getFieldDOFManagers()[domainBlockIndex], true);
133 RCP<panzer::DOFManager> range_ugi = rcp_dynamic_cast<panzer::DOFManager>(blockedDOFMngr->getFieldDOFManagers()[rangeBlockIndex], true);
134
135 RCP<const panzer::ConnManager> conn = blockedDOFMngr->getConnManager();
136
137 return buildInterpolation(conn, domain_ugi, range_ugi, domain_basis_name, range_basis_name, op, worksetSize,
138 /*forceVectorial=*/false,
139 /*useTpetra=*/tblof != Teuchos::null,
140 matrixFree);
141}
142
143
144Teuchos::RCP<Thyra::LinearOpBase<double> > buildInterpolation(const Teuchos::RCP<const panzer::ConnManager> &conn,
145 const Teuchos::RCP<panzer::DOFManager> &domain_ugi,
146 const Teuchos::RCP<panzer::DOFManager> &range_ugi,
147 const std::string& domain_basis_name,
148 const std::string& range_basis_name,
149 Intrepid2::EOperator op,
150 size_t worksetSize,
151 const bool force_vectorial,
152 const bool useTpetra,
153 const bool matrixFree)
154{
155 using Teuchos::RCP;
156 using Teuchos::rcp;
157 using Teuchos::rcp_dynamic_cast;
158
159 using Scalar = double;
160
161 using STS = Teuchos::ScalarTraits<Scalar>;
162 using KAT = KokkosKernels::ArithTraits<Scalar>;
163 using OT = Teuchos::OrdinalTraits<GlobalOrdinal>;
164
165 using DeviceSpace = PHX::Device;
166 using HostSpace = Kokkos::HostSpace;
167 using ots = Intrepid2::OrientationTools<DeviceSpace>;
168 using li = Intrepid2::LagrangianInterpolation<DeviceSpace>;
169 using DynRankDeviceView = Kokkos::DynRankView<double, DeviceSpace>;
170
171 using tp_graph = Tpetra::CrsGraph<LocalOrdinal, GlobalOrdinal>;
172 using tp_matrix = Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal>;
173 using tp_map = Tpetra::Map<LocalOrdinal, GlobalOrdinal>;
174#ifdef PANZER_HAVE_EPETRA_STACK
175 using ep_matrix = Epetra_CrsMatrix;
176 using ep_map = Epetra_Map;
177#endif
178
179 if (matrixFree) {
180 TEUCHOS_ASSERT(useTpetra);
181 TEUCHOS_ASSERT(!force_vectorial);
182 auto mfOp = rcp(new MatrixFreeInterpolationOp<Scalar,LocalOrdinal,GlobalOrdinal>(conn, domain_ugi, range_ugi, domain_basis_name, range_basis_name, op, worksetSize));
183 return Thyra::tpetraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,typename tp_matrix::node_type>(Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(mfOp->getRangeMap()),
184 Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(mfOp->getDomainMap()),
185 mfOp);
186 }
187
188 // get the domain and range bases
189 auto domain_fieldPattern = domain_ugi->getFieldPattern(domain_basis_name);
190 auto domain_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(domain_fieldPattern,true)->getIntrepidBasis();
191 auto range_fieldPattern = range_ugi->getFieldPattern(range_basis_name);
192 auto range_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(range_fieldPattern,true)->getIntrepidBasis();
193
194 // cardinalities
195 const size_t domainCardinality = domain_basis->getCardinality();
196 const size_t rangeCardinality = range_basis->getCardinality();
197
198 const int dim = range_basis->getBaseCellTopology().getDimension();
199
200 if (op == Intrepid2::OPERATOR_VALUE) {
201 TEUCHOS_ASSERT(rangeCardinality >= domainCardinality);
202 TEUCHOS_ASSERT_EQUALITY(domain_basis->getFunctionSpace(), range_basis->getFunctionSpace());
203 }
204
205 // Create the global interp matrix.
206 RCP<const tp_map> tp_rangemap;
207 RCP<const tp_map> tp_domainmap;
208 RCP<const tp_map> tp_rowmap;
209 RCP<const tp_map> tp_colmap;
210 RCP<tp_matrix> tp_interp_matrix;
211 typename tp_matrix::local_matrix_device_type lcl_tp_interp_matrix;
212#ifdef PANZER_HAVE_EPETRA_STACK
213 RCP<const ep_map> ep_rangemap;
214 RCP<const ep_map> ep_domainmap;
215 RCP<const ep_map> ep_rowmap;
216 RCP<const ep_map> ep_colmap;
217 RCP<ep_matrix> ep_interp_matrix;
218#endif
219
220 auto rangeElementLIDs_d = range_ugi->getLIDs();
221 auto domainElementLIDs_d = domain_ugi->getLIDs();
222
223 RCP<Thyra::LinearOpBase<Scalar> > thyra_interp;
224 size_t maxNumElementsPerBlock = 0;
225 LocalOrdinal minLocalIndex = 0;
226 LocalOrdinal maxLocalIndex = 0;
227 if (useTpetra) {
228 // build maps
229 std::vector<GlobalOrdinal> gids;
230 range_ugi->getOwnedIndices(gids);
231 tp_rowmap = rcp(new tp_map(OT::invalid(), gids.data(), static_cast<LocalOrdinal>(gids.size()), OT::zero(), range_ugi->getComm()));
232 tp_rangemap = tp_rowmap;
233 domain_ugi->getOwnedIndices(gids);
234 tp_domainmap = rcp(new tp_map(OT::invalid(), gids.data(), static_cast<LocalOrdinal>(gids.size()), OT::zero(), domain_ugi->getComm()));
235 domain_ugi->getOwnedAndGhostedIndices(gids);
236 tp_colmap = rcp(new tp_map(OT::invalid(), gids.data(), static_cast<LocalOrdinal>(gids.size()), OT::zero(), domain_ugi->getComm()));
237
238 minLocalIndex = tp_rowmap->getMinLocalIndex();
239 maxLocalIndex = tp_rowmap->getMaxLocalIndex();
240
241 // estimate number of entries per row
242 // This is an upper bound, as we are counting dofs that are on shared nodes, edges, faces more than once.
243 using dv = Kokkos::DualView<size_t*, typename tp_graph::device_type>;
244 dv numEntriesPerRow("numEntriesPerRow", tp_rowmap->getLocalNumElements());
245 {
246 auto numEntriesPerRow_d = numEntriesPerRow.view_device();
247
248 // loop over element blocks
249 std::vector<std::string> elementBlockIds;
250 range_ugi->getElementBlockIds(elementBlockIds);
251 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
252
253 // loop over elements
254 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
255 Kokkos::View<int *, HostSpace> elementIds_h(elementIds.data(), elementIds.size());
256 Kokkos::View<int *, DeviceSpace> elementIds_d("elementIds_d", elementIds_h.extent(0));
257 Kokkos::deep_copy(elementIds_d, elementIds_h);
258 maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
259
260 Kokkos::parallel_for("MiniEM_Interpolation::numEntriesPerRow",
261 Kokkos::RangePolicy<size_t, typename tp_matrix::node_type::execution_space>(0, elementIds.size()),
262 KOKKOS_LAMBDA(const size_t elemIter) {
263 auto elemId = elementIds_d(elemIter);
264
265 // get IDs for range dofs
266 auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
267
268 // loop over range LIDs
269 for(size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
270 const LocalOrdinal range_row = rangeLIDs_d(rangeIter);
271 const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
272 if (isOwned)
273 Kokkos::atomic_add(&numEntriesPerRow_d(range_row), domainCardinality);
274 } //end range LID loop
275 });
276 } // blocks loop
277 numEntriesPerRow.template modify<typename dv::t_dev>();
278 numEntriesPerRow.template sync<typename dv::t_host>();
279 }
280
281 // Set up graph
282 auto tp_interp_graph = rcp(new tp_graph(tp_rowmap, tp_colmap, numEntriesPerRow));
283
284 { // This runs on host
285 Kokkos::View<LocalOrdinal**, HostSpace> rangeElementLIDs_h("rangeElementLIDs_h", rangeElementLIDs_d.extent(0), rangeCardinality);
286 Kokkos::View<LocalOrdinal**, HostSpace> domainElementLIDs_h("domainElementLIDs_h", domainElementLIDs_d.extent(0), domainCardinality);
287 Kokkos::deep_copy(rangeElementLIDs_h, rangeElementLIDs_d);
288 Kokkos::deep_copy(domainElementLIDs_h, domainElementLIDs_d);
289
290 // loop over element blocks
291 std::vector<std::string> elementBlockIds;
292 range_ugi->getElementBlockIds(elementBlockIds);
293 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
294
295 // loop over elements
296 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
297 maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
298 for(std::size_t elemIter = 0; elemIter < elementIds.size(); ++elemIter) {
299 auto elemId = elementIds[elemIter];
300
301 // get IDs for range dofs
302 auto rangeLIDs_h = Kokkos::subview(rangeElementLIDs_h, elemId, Kokkos::ALL());
303 auto domainLIDs_h = Kokkos::subview(domainElementLIDs_h, elemId, Kokkos::ALL());
304
305 // loop over range LIDs
306 for(size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
307 const LocalOrdinal range_row = rangeLIDs_h(rangeIter);
308 const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
309 if (isOwned) {
310 Teuchos::ArrayView<LocalOrdinal> domainLIDs_av = Teuchos::ArrayView<LocalOrdinal>(domainLIDs_h.data(), domainLIDs_h.extent_int(0));
311 tp_interp_graph->insertLocalIndices(range_row, domainLIDs_av);
312 }
313 } //end range LID loop
314 } // elements loop
315 } // blocks loop
316 }
317
318 RCP<Teuchos::ParameterList> pl = rcp(new Teuchos::ParameterList());
319 pl->set("Optimize Storage", true);
320 tp_interp_graph->fillComplete(tp_domainmap, tp_rangemap, pl);
321
322 tp_interp_matrix = rcp(new tp_matrix(tp_interp_graph));
323 lcl_tp_interp_matrix = tp_interp_matrix->getLocalMatrixDevice();
324 }
325#ifdef PANZER_HAVE_EPETRA_STACK
326 else {
327
328 const RCP<const Teuchos::MpiComm<int> > mpiComm = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(range_ugi->getComm());
329 auto ep_comm = Teuchos::rcp(new Epetra_MpiComm(*mpiComm->getRawMpiComm()));
330 std::vector<GlobalOrdinal> gids;
331 range_ugi->getOwnedIndices(gids);
332 ep_rowmap = rcp(new ep_map(OT::invalid(), static_cast<LocalOrdinal>(gids.size()), gids.data(), OT::zero(), *ep_comm));
333 ep_rangemap = ep_rowmap;
334 domain_ugi->getOwnedIndices(gids);
335 ep_domainmap = rcp(new ep_map(OT::invalid(), static_cast<LocalOrdinal>(gids.size()), gids.data(), OT::zero(), *ep_comm));
336 domain_ugi->getOwnedAndGhostedIndices(gids);
337 ep_colmap = rcp(new ep_map(OT::invalid(), static_cast<LocalOrdinal>(gids.size()), gids.data(), OT::zero(), *ep_comm));
338
339 {
340 // loop over element blocks
341 std::vector<std::string> elementBlockIds;
342 range_ugi->getElementBlockIds(elementBlockIds);
343 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
344
345 // loop over elements
346 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
347 maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
348 }
349 }
350
351 // TODO: Fix this.
352 size_t nnzPerRowEstimate = 25*domainCardinality;
353
354 ep_interp_matrix = rcp(new ep_matrix(Copy, *ep_rowmap, *ep_colmap, static_cast<int>(nnzPerRowEstimate), /*StaticProfile=*/true));
355
356 RCP<const Thyra::LinearOpBase<double> > th_ep_interp = Thyra::epetraLinearOp(ep_interp_matrix,
357 Thyra::NOTRANS,
358 Thyra::EPETRA_OP_APPLY_APPLY,
359 Thyra::EPETRA_OP_ADJOINT_SUPPORTED,
360 Thyra::create_VectorSpace(ep_rangemap),
361 Thyra::create_VectorSpace(ep_domainmap));
362 thyra_interp = Teuchos::rcp_const_cast<Thyra::LinearOpBase<double> >(th_ep_interp);
363 }
364#endif
365
366 // allocate some views
367 int numCells;
368 if (maxNumElementsPerBlock > 0)
369 numCells = static_cast<int>(std::min(maxNumElementsPerBlock, worksetSize));
370 else
371 numCells = static_cast<int>(worksetSize);
372
373 DynRankDeviceView range_dofCoords_d("range_dofCoords_d", rangeCardinality, dim);
374 DynRankDeviceView basisCoeffsLIOriented_d("basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
375
376 // the ranks of these depend on dimension
377 DynRankDeviceView valuesAtDofCoordsNonOriented_d;
378 DynRankDeviceView valuesAtDofCoordsOriented_d;
379
380 if (!force_vectorial) {
381 // Let Intrepid2 give us the correctly dimensioned view, then build one with +1 ranks and extent(0) == numCells
382 auto temp = domain_basis->allocateOutputView(static_cast<int>(rangeCardinality), op);
383
384 // These view have dimensions
385 // numCells, numFields=domainCardinality, numPoints=rangeCardinality, (spatialDim)
386 //
387 if (temp.rank() == 3) {
388 valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
389 valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
390 } else {
391 valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
392 valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
393 }
394 } else {
395 valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", domainCardinality, rangeCardinality, dim);
396 valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, domainCardinality, rangeCardinality, dim);
397 }
398
399 int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
400 TEUCHOS_ASSERT((fieldRank == 0) || (fieldRank == 1));
401
402 auto entryFilterTol = 1e5*Teuchos::ScalarTraits<typename STS::magnitudeType>::eps();
403
404 // range dof coordinates
405 range_basis->getDofCoords(range_dofCoords_d);
406
407 // compute values of op * (domain basis) at range dof coords on reference element
408 domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
409
410 // get block ids
411 std::vector<std::string> elementBlockIds;
412 range_ugi->getElementBlockIds(elementBlockIds);
413
414 // get orientations for all blocks
415 std::map<std::string, std::vector<Intrepid2::Orientation> > orientations;
416 buildIntrepidOrientations(elementBlockIds, *conn, orientations);
417
418 // loop over element blocks
419 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
420
421 auto rangeOffsets_d = range_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
422 auto domainOffsets_d = domain_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
423#ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
424 std::cout << "rangeOffsets_d" << std::endl;
425 for (int i = 0; i < rangeCardinality; i++)
426 std::cout << rangeOffsets_d(i) << " ";
427 std::cout << std::endl;
428 std::cout << "domainOffsets_d" << std::endl;
429 for (int i = 0; i < domainCardinality; i++)
430 std::cout << domainOffsets_d(i) << " ";
431 std::cout << std::endl;
432#endif
433
434 // get element ids
435 Kokkos::View<int *, DeviceSpace> elementIds_d;
436 {
437 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
438 Kokkos::View<int *, HostSpace> elementIds_h(elementIds.data(), elementIds.size());
439 elementIds_d = Kokkos::View<int *, DeviceSpace>("elementIds_d", elementIds_h.extent(0));
440 Kokkos::deep_copy(elementIds_d, elementIds_h);
441 }
442
443 // get element orientations
444 typename Kokkos::DynRankView<Intrepid2::Orientation,DeviceSpace> elemOrts_d ("elemOrts_d", elementIds_d.extent(0));
445 {
446 // copy orientations to device
447 auto blockOrientations = orientations[elementBlockIds[blockIter]];
448 Kokkos::View<Intrepid2::Orientation*, HostSpace> elemOrts_h(blockOrientations.data(), blockOrientations.size());
449 Kokkos::deep_copy(elemOrts_d, elemOrts_h);
450 }
451
452 // loop over element worksets
453 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
454
455 int endCellRange =
456 std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
457 Teuchos::as<int>(elemIter));
458
459 // get subviews on workset
460 auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
461 auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
462 // Last workset might be shorter.
463 auto worksetRange = Kokkos::make_pair(0, endCellRange);
464 auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
465 auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
466
467 // apply orientations for domain basis
468 // shuffles things in the second dimension, i.e. wrt domain basis
469 ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
470 valuesAtDofCoordsNonOriented_d,
471 elemOrtsWorkset_d,
472 domain_basis.get());
473
474 // get basis coefficients of domain basis functions wrt range basis
475 for(size_t domainIter=0; domainIter<domainCardinality; domainIter++)
476 // Get basis coeffs wrt range basis on reference element.
477 // basisCoeffsLI has dimensions (numCells, numFields=rangeCardinality, domainCardinality)
478 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), domainIter),
479 Kokkos::subview(valuesAtDofCoordsOrientedWorkset_d, Kokkos::ALL(), domainIter, Kokkos::ALL(), Kokkos::ALL()),
480 range_basis.get(),
481 elemOrtsWorkset_d);
482
483#ifdef PANZER_HAVE_EPETRA_STACK
484 if (!useTpetra) { // Epetra fill
485
486 Kokkos::View<LocalOrdinal*,DeviceSpace> indices_d("indices", domainCardinality);
487 Kokkos::View<Scalar*, DeviceSpace> values_d ("values", domainCardinality);
488
489
490 for (int cellNo = 0; cellNo < endCellRange; cellNo++) {
491 auto elemId = elementIds_d(elemIter+cellNo);
492
493 // get IDs for range and domain dofs
494 auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
495 auto domainLIDs_d = Kokkos::subview(domainElementLIDs_d, elemId, Kokkos::ALL());
496
497 // loop over range LIDs
498 for(size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
499 const LocalOrdinal range_row = rangeLIDs_d(rangeOffsets_d(rangeIter));
500 const bool isOwned = ep_rowmap->MyLID(range_row);
501 if (isOwned) {
502 // filter entries for zeros
503 LocalOrdinal rowNNZ = 0;
504 for(size_t domainIter = 0; domainIter < domainCardinality; domainIter++) {
505 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
506 if (KAT::magnitude(val) > entryFilterTol) {
507 indices_d(rowNNZ) = domainLIDs_d(domainOffsets_d(domainIter));
508 values_d(rowNNZ) = val;
509 ++rowNNZ;
510 }
511 }
512
513 int ret = ep_interp_matrix->ReplaceMyValues(range_row, rowNNZ, values_d.data(), indices_d.data());
514 if (ret != 0) {
515 ret = ep_interp_matrix->InsertMyValues(range_row, rowNNZ, values_d.data(), indices_d.data());
516 TEUCHOS_ASSERT(ret == 0);
517 }
518 } //end if owned
519 } // end range LID loop
520 } //end workset loop
521 } // Epetra fill
522 else
523#endif
524 { // Tpetra fill
525 Kokkos::parallel_for(
526 "MiniEM_Interpolation::worksetLoop",
527 Kokkos::RangePolicy<int, typename tp_matrix::node_type::execution_space>(0, endCellRange),
528 KOKKOS_LAMBDA(const int cellNo) {
529 auto elemId = elementIds_d(elemIter+cellNo);
530
531 // get IDs for range and domain dofs
532 auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
533 auto domainLIDs_d = Kokkos::subview(domainElementLIDs_d, elemId, Kokkos::ALL());
534
535#ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
536 std::cout << "\n" << elemOrts_d(elemIter+cellNo).to_string() << std::endl;
537 std::cout << "rangeLIDs" << std::endl;
538 for (int i = 0; i < rangeCardinality; i++)
539 std::cout << rangeLIDs_d(i) << " ";
540 std::cout << std::endl << "domainLIDs" << std::endl;
541 for (int i = 0; i < domainCardinality; i++)
542 std::cout << domainLIDs_d(i) << " ";
543 std::cout << std::endl;
544#endif
545 // loop over range LIDs
546 for(size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
547 const LocalOrdinal range_row = rangeLIDs_d(rangeOffsets_d(rangeIter));
548 const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
549 if (isOwned) {
550 // filter entries for zeros
551 for(size_t domainIter = 0; domainIter < domainCardinality; domainIter++) {
552 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
553 if (KAT::magnitude(val) > entryFilterTol) {
554
555#if defined(PANZER_INTERPOLATION_DEBUG_OUTPUT) || defined(PANZER_DEBUG)
556 {
557 // Check that there is no entry yet or that we are overwriting it with the same value
558 auto row = lcl_tp_interp_matrix.rowConst(range_row);
559 for(LocalOrdinal kk = 0; kk<row.length; ++kk)
560 if (row.colidx(kk) == domainLIDs_d(domainOffsets_d(domainIter)))
561 if (!(KAT::magnitude(row.value(kk)-val) < entryFilterTol || KAT::magnitude(row.value(kk)) < entryFilterTol)) {
562#ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
563 std::cout << "Replacing (" << range_row << "," << row.colidx(kk) << ") = " << row.value(kk) << " with " << val << std::endl;
564#endif
565#ifdef PANZER_DEBUG
566 Kokkos::abort("MiniEM interpolation worksetLoop failed!");
567#endif
568 }
569 }
570#endif
571#ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
572 std::cout << "Setting (" << range_row << "," << domainLIDs_d(domainOffsets_d(domainIter)) << ") = " << val << std::endl;
573#endif
574 lcl_tp_interp_matrix.replaceValues(range_row, &(domainLIDs_d(domainOffsets_d(domainIter))), 1, &val, /*is_sorted=*/false, /*force_atomic=*/true);
575 }
576 } //end if owned
577 } // isOwned
578 } // end range LID loop
579 }); //end workset loop
580 } // Tpetra fill
581 } //end element loop
582 } //end element block loop
583
584 if (useTpetra) {
585 tp_interp_matrix->fillComplete(tp_domainmap, tp_rangemap);
586
587 if (op != Intrepid2::OPERATOR_VALUE) {
588 // Discrete gradient, curl, etc actually live on edges, faces, etc, so we have a lot of zeros in the matrix.
589 tp_interp_matrix = removeSmallEntries(tp_interp_matrix, entryFilterTol);
590 }
591
592 thyra_interp = Thyra::tpetraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,typename tp_matrix::node_type>(Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(tp_rangemap),
593 Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(tp_domainmap),
594 tp_interp_matrix);
595 }
596#ifdef PANZER_HAVE_EPETRA_STACK
597 else
598 ep_interp_matrix->FillComplete(*ep_domainmap, *ep_rangemap);
599#endif
600
601 return thyra_interp;
602}
603
604 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
606 MatrixFreeInterpolationOp(const Teuchos::RCP<const panzer::ConnManager> &conn,
607 const Teuchos::RCP<panzer::DOFManager> &_domain_ugi,
608 const Teuchos::RCP<panzer::DOFManager> &_range_ugi,
609 const std::string& _domain_basis_name,
610 const std::string& _range_basis_name,
611 Intrepid2::EOperator _op,
612 size_t _worksetSize) :
613 name(""),
614 domain_basis_name(_domain_basis_name),
615 range_basis_name(_range_basis_name),
616 op(_op),
617 worksetSize(_worksetSize),
618 domain_ugi(_domain_ugi),
619 range_ugi(_range_ugi)
620 {
621
622 using Teuchos::RCP;
623 using Teuchos::rcp;
624 using Teuchos::rcp_dynamic_cast;
625
626 using OT = Teuchos::OrdinalTraits<GlobalOrdinal>;
627
628 // typedef Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal> tp_matrix;
629 typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal> tp_map;
630
631 RCP<const tp_map> tp_rangemap;
632 RCP<const tp_map> tp_domainmap;
633 RCP<const tp_map> tp_rowmap;
634 RCP<const tp_map> tp_colmap;
635
636 {
637 // build maps
638 std::vector<GlobalOrdinal> gids;
639 range_ugi->getOwnedIndices(gids);
640 tp_rowmap = rcp(new tp_map(OT::invalid(), gids.data(), gids.size(), OT::zero(), range_ugi->getComm()));
641 tp_rangemap = tp_rowmap;
642 domain_ugi->getOwnedIndices(gids);
643 tp_domainmap = rcp(new tp_map(OT::invalid(), gids.data(), gids.size(), OT::zero(), domain_ugi->getComm()));
644 domain_ugi->getOwnedAndGhostedIndices(gids);
645 tp_colmap = rcp(new tp_map(OT::invalid(), gids.data(), gids.size(), OT::zero(), domain_ugi->getComm()));
646 }
647
648 domainMap_ = tp_domainmap;
649 rangeMap_ = tp_rangemap;
650 columnMap_ = tp_colmap;
651 import_ = rcp(new Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node>(domainMap_, columnMap_));
652
654
656
657 }
658
659 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
660 void
662 allocateColumnMapVector(size_t numVectors) {
663 colmapMV_ = rcp(new Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(columnMap_, numVectors));
664 }
665
666 // Pre-compute elements that own a DoF
667 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
668 void
670 precomputeOwnersAndOrientations(const Teuchos::RCP<const panzer::ConnManager> &conn) {
671 size_t maxNumElementsPerBlock = 0;
672 int numCells;
673 if (maxNumElementsPerBlock > 0)
674 numCells = std::min(maxNumElementsPerBlock, worksetSize);
675 else
676 numCells = worksetSize;
677
678 LocalOrdinal lclTargetSize = getRangeMap()->getLocalNumElements();
679 owner_d_ = Kokkos::View<LocalOrdinal*,DeviceSpace>("owner", lclTargetSize);
680
681 auto rangeLIDs_d = range_ugi->getLIDs();
682
683 auto owner_d = owner_d_;
684
685 // loop over element blocks
686 std::vector<std::string> elementBlockIds;
687 range_ugi->getElementBlockIds(elementBlockIds);
688 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
689
690 // loop over element worksets
691 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
692 Kokkos::View<int*,DeviceSpace>::host_mirror_type elementIds_h(elementIds.data(), elementIds.size());
693 Kokkos::View<int*,DeviceSpace> elementIds_d("elementIds_d", elementIds_h.extent(0));
694 Kokkos::deep_copy(elementIds_d, elementIds_h);
695 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
696 using range_type = Kokkos::RangePolicy<LocalOrdinal, DeviceSpace>;
697 Kokkos::parallel_for("miniEM::MatrixFreeInterpolationOp::cellLoop",
698 range_type(elemIter, std::min(elemIter+numCells,
699 elementIds_d.extent(0))),
700 KOKKOS_LAMBDA(const LocalOrdinal cellNo2) {
701 auto elemId = elementIds_d(cellNo2);
702
703 // loop over range LIDs
704 for(size_t rangeIter = 0; rangeIter < rangeLIDs_d.extent(1); ++rangeIter) {
705 LocalOrdinal range_row = rangeLIDs_d(elemId, rangeIter);
706 if (range_row < lclTargetSize)
707 owner_d(range_row) = elemId;
708 }
709 });
710 }
711 }
712
713 // get orientations for all blocks
714 std::map<std::string, std::vector<Intrepid2::Orientation> > orientations;
715 buildIntrepidOrientations(elementBlockIds, *conn, orientations);
716
717 using HostSpace = Kokkos::HostSpace;
718
719 // copy orientations to device
720 for (auto const& orientation : orientations) {
721 auto blockOrientations = orientation.second;
722 orientations_[orientation.first] = typename Kokkos::DynRankView<Intrepid2::Orientation,DeviceSpace>("elemOrts_d", blockOrientations.size());
723 Kokkos::View<Intrepid2::Orientation*, HostSpace> elemOrts_h(blockOrientations.data(), blockOrientations.size());
724 Kokkos::deep_copy(orientations_[orientation.first], elemOrts_h);
725 }
726 }
727
728 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
729 void
731 apply (const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
732 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
733 Teuchos::ETransp mode,
734 Scalar alpha,
735 Scalar beta) const {
736
737 if (mode == Teuchos::NO_TRANS) {
738 applyNonTransposed(X, Y, alpha, beta);
739 return;
740 } else if (mode == Teuchos::TRANS) {
741 applyTransposed(X, Y, alpha, beta);
742 return;
743 } else
744 TEUCHOS_ASSERT(false);
745 }
746
747 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
748 void
750 applyNonTransposed (const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
751 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
752 Scalar alpha,
753 Scalar beta) const {
754
755 using Teuchos::RCP;
756 using Teuchos::rcp;
757 using Teuchos::rcp_dynamic_cast;
758 using range_type = Kokkos::RangePolicy<LocalOrdinal, DeviceSpace>;
759
760 using ots = Intrepid2::OrientationTools<DeviceSpace>;
761 using li = Intrepid2::LagrangianInterpolation<DeviceSpace>;
762 using DynRankDeviceView = Kokkos::DynRankView<Scalar,DeviceSpace>;
763 using view_t = typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev;
764 using const_view_t = typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev::const_type;
765
766 Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer(std::string("Mini-EM: matrix-free apply no_trans ") + name));
767
768 using TST = Teuchos::ScalarTraits<Scalar>;
769 const Scalar ZERO = TST::zero();
770 if (beta == ZERO)
771 Y.putScalar(ZERO);
772 colmapMV_->doImport(X, *import_,Tpetra::INSERT);
773
774 const_view_t lclX = colmapMV_->getLocalViewDevice(Tpetra::Access::ReadOnly);
775 view_t lclY = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
776 size_t numVectors = lclY.extent(1);
777 LocalOrdinal lclTargetSize = getRangeMap()->getLocalNumElements();
778
779 // get the domain and range bases
780 auto domain_fieldPattern = domain_ugi->getFieldPattern(domain_basis_name);
781 auto domain_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(domain_fieldPattern,true)->getIntrepidBasis();
782 auto range_fieldPattern = range_ugi->getFieldPattern(range_basis_name);
783 auto range_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(range_fieldPattern,true)->getIntrepidBasis();
784
785 // cardinalities
786 const size_t domainCardinality = domain_basis->getCardinality();
787 const size_t rangeCardinality = range_basis->getCardinality();
788
789 size_t maxNumElementsPerBlock = 0;
790
791 const int dim = range_basis->getBaseCellTopology().getDimension();
792
793 if (op == Intrepid2::OPERATOR_VALUE) {
794 TEUCHOS_ASSERT(rangeCardinality >= domainCardinality);
795 TEUCHOS_ASSERT_EQUALITY(domain_basis->getFunctionSpace(), range_basis->getFunctionSpace());
796 }
797
798 // allocate some views
799 int numCells;
800 if (maxNumElementsPerBlock > 0)
801 numCells = std::min(maxNumElementsPerBlock, worksetSize);
802 else
803 numCells = worksetSize;
804 DynRankDeviceView range_dofCoords_d("range_dofCoords_d", rangeCardinality, dim);
805 DynRankDeviceView basisCoeffsLIOriented_d("basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
806
807 // the ranks of these depend on dimension
808 DynRankDeviceView valuesAtDofCoordsNonOriented_d;
809 DynRankDeviceView valuesAtDofCoordsOriented_d;
810 DynRankDeviceView reducedValuesAtDofCoordsOriented_d;
811
812 {
813 // Let Intrepid2 give us the correctly dimensioned view, then build one with +1 ranks and extent(0) == numCells
814 auto temp = domain_basis->allocateOutputView(rangeCardinality, op);
815
816 // These view have dimensions
817 // numCells, numFields=domainCardinality, numPoints=rangeCardinality, (spatialDim)
818 //
819 if (temp.rank() == 3) {
820 valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
821 valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
822 reducedValuesAtDofCoordsOriented_d = DynRankDeviceView("reducedValuesAtDofCoordsOriented_d", numCells, temp.extent(1), temp.extent(2), numVectors);
823 } else {
824 valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
825 valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
826 reducedValuesAtDofCoordsOriented_d = DynRankDeviceView("reducedValuesAtDofCoordsOriented_d", numCells, temp.extent(1), numVectors);
827 }
828 }
829
830 int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
831 TEUCHOS_ASSERT((fieldRank == 0) || (fieldRank == 1));
832
833 auto rangeLIDs_d = range_ugi->getLIDs();
834 auto domainLIDs_d = domain_ugi->getLIDs();
835
836 // range dof coordinates
837 range_basis->getDofCoords(range_dofCoords_d);
838
839 // compute values of op * (domain basis) at range dof coords on reference element
840 domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
841
842 // loop over element blocks
843 std::vector<std::string> elementBlockIds;
844 range_ugi->getElementBlockIds(elementBlockIds);
845 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
846
847 auto rangeOffsets_d = range_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
848 auto domainOffsets_d = domain_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
849
850 // get element ids
851 Kokkos::View<int *, DeviceSpace> elementIds_d;
852 {
853 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
854 Kokkos::View<int *, Kokkos::HostSpace> elementIds_h(elementIds.data(), elementIds.size());
855 elementIds_d = Kokkos::View<int *, DeviceSpace>("elementIds_d", elementIds_h.extent(0));
856 Kokkos::deep_copy(elementIds_d, elementIds_h);
857 }
858
859 // get element orientations
860 auto elemOrts_d = orientations_.at(elementBlockIds[blockIter]);
861
862 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
863
864 int endCellRange =
865 std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
866 Teuchos::as<int>(elemIter));
867
868 // get subviews on workset
869 auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
870 auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
871 // Last workset might be shorter.
872 auto worksetRange = Kokkos::make_pair(0, endCellRange);
873 auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
874 auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
875
876 // apply orientations for LO basis
877 // shuffles things in the second dimension, i.e. wrt LO basis
878 ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
879 valuesAtDofCoordsNonOriented_d,
880 elemOrtsWorkset_d,
881 domain_basis.get());
882
883 Kokkos::deep_copy(reducedValuesAtDofCoordsOriented_d, 0.0);
884
885 if (reducedValuesAtDofCoordsOriented_d.rank() == 4) {
886 Kokkos::parallel_for("miniEM:MatrixFreeInterpolationOp:cellLoop1",
887 range_type(0, std::min(numCells, elementIds_d.extent_int(0)-Teuchos::as<int>(elemIter))),
888 KOKKOS_LAMBDA(const LocalOrdinal cellNo) {
889 LocalOrdinal cellNo2 = elemIter+cellNo;
890 LocalOrdinal elemId = elementIds_d(cellNo2);
891 for(size_t domainIter=0; domainIter<domainCardinality; domainIter++) {
892 LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
893 for(size_t rangeIter=0; rangeIter<rangeCardinality; rangeIter++) {
894 for(size_t d=0; d<valuesAtDofCoordsOriented_d.extent(3); d++) {
895 Scalar val = valuesAtDofCoordsOriented_d(cellNo, domainIter, rangeIter, d);
896 for (size_t j = 0; j<numVectors; ++j)
897 reducedValuesAtDofCoordsOriented_d(cellNo, rangeIter, d, j) += val * lclX(J, j);
898 }
899 }
900 }
901 });
902
903 for (size_t j = 0; j<numVectors; ++j)
904 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), j),
905 Kokkos::subview(reducedValuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), j),
906 range_basis.get(),
907 elemOrtsWorkset_d
908 );
909 } else {
910 Kokkos::parallel_for("miniEM:MatrixFreeInterpolationOp:cellLoop1",
911 range_type(0, std::min(numCells, elementIds_d.extent_int(0)-Teuchos::as<int>(elemIter))),
912 KOKKOS_LAMBDA(const LocalOrdinal cellNo) {
913 LocalOrdinal cellNo2 = elemIter+cellNo;
914 LocalOrdinal elemId = elementIds_d(cellNo2);
915 for(size_t domainIter=0; domainIter<domainCardinality; domainIter++) {
916 LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
917 for(size_t rangeIter=0; rangeIter<rangeCardinality; rangeIter++) {
918 Scalar val = valuesAtDofCoordsOriented_d(cellNo, domainIter, rangeIter);
919 for (size_t j = 0; j<numVectors; ++j)
920 reducedValuesAtDofCoordsOriented_d(cellNo, rangeIter, j) += val * lclX(J, j);
921 }
922 }
923 });
924
925 for (size_t j = 0; j<numVectors; ++j)
926 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), j),
927 Kokkos::subview(reducedValuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), j),
928 range_basis.get(),
929 elemOrtsWorkset_d
930 );
931 }
932
933 auto owner_d = owner_d_;
934
935 Kokkos::parallel_for("miniEM::MatrixFreeInterpolationOp::cellLoop2",
936 range_type(elemIter, std::min(elemIter+numCells,
937 elementIds_d.extent(0))),
938 KOKKOS_LAMBDA(const LocalOrdinal cellNo2) {
939 LocalOrdinal cellNo = cellNo2-elemIter;
940 LocalOrdinal elemId = elementIds_d(cellNo2);
941
942 // loop over range LIDs
943 for(size_t rangeIter = 0; rangeIter < rangeLIDs_d.extent(1); ++rangeIter) {
944 LocalOrdinal range_row = rangeLIDs_d(elemId, rangeOffsets_d(rangeIter));
945
946 // if owned
947 if ((range_row < lclTargetSize) && (owner_d(range_row) == elemId)) {
948
949 for (size_t j = 0; j<numVectors; ++j) {
950 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, j);
951 lclY(range_row,j) = beta*lclY(range_row,j) + alpha*val;
952 }
953 } // end if owned
954 } // end range LID loop
955 }); // end element loop
956 } //end workset loop
957 } //end element block loop
958
959 }
960
961 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
962 void
964 applyTransposed (const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
965 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
966 Scalar alpha,
967 Scalar beta) const {
968
969 using Teuchos::RCP;
970 using Teuchos::rcp;
971 using Teuchos::rcp_dynamic_cast;
972 using range_type = Kokkos::RangePolicy<LocalOrdinal, DeviceSpace>;
973
974 typedef Intrepid2::OrientationTools<DeviceSpace> ots;
975 typedef Intrepid2::LagrangianInterpolation<DeviceSpace> li;
976 typedef Kokkos::DynRankView<Scalar,DeviceSpace> DynRankDeviceView;
977 using TST = Teuchos::ScalarTraits<Scalar>;
978 const Scalar ZERO = TST::zero();
979 using view_t = typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev;
980 using const_view_t = typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev::const_type;
981
982 Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer(std::string("Mini-EM: matrix-free apply trans ") + name));
983
984 const_view_t lclX = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
985 colmapMV_->putScalar(ZERO);
986 view_t lclYtemp = colmapMV_->getLocalViewDevice(Tpetra::Access::ReadWrite);
987
988 // get the domain and range bases
989 auto domain_fieldPattern = domain_ugi->getFieldPattern(domain_basis_name);
990 auto domain_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(domain_fieldPattern,true)->getIntrepidBasis();
991 auto range_fieldPattern = range_ugi->getFieldPattern(range_basis_name);
992 auto range_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(range_fieldPattern,true)->getIntrepidBasis();
993
994 // cardinalities
995 const size_t domainCardinality = domain_basis->getCardinality();
996 const size_t rangeCardinality = range_basis->getCardinality();
997
998 size_t maxNumElementsPerBlock = 0;
999
1000 const int dim = range_basis->getBaseCellTopology().getDimension();
1001
1002 if (op == Intrepid2::OPERATOR_VALUE) {
1003 TEUCHOS_ASSERT(rangeCardinality >= domainCardinality);
1004 TEUCHOS_ASSERT_EQUALITY(domain_basis->getFunctionSpace(), range_basis->getFunctionSpace());
1005 }
1006
1007 // allocate some views
1008 int numCells;
1009 if (maxNumElementsPerBlock > 0)
1010 numCells = std::min(maxNumElementsPerBlock, worksetSize);
1011 else
1012 numCells = worksetSize;
1013 DynRankDeviceView range_dofCoords_d("range_dofCoords_d", rangeCardinality, dim);
1014 DynRankDeviceView basisCoeffsLIOriented_d("basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
1015
1016 // the ranks of these depend on dimension
1017 DynRankDeviceView valuesAtDofCoordsNonOriented_d;
1018 DynRankDeviceView valuesAtDofCoordsOriented_d;
1019
1020 {
1021 // Let Intrepid2 give us the correctly dimensioned view, then build one with +1 ranks and extent(0) == numCells
1022 auto temp = domain_basis->allocateOutputView(rangeCardinality, op);
1023
1024 // These view have dimensions
1025 // numCells, numFields=domainCardinality, numPoints=rangeCardinality, (spatialDim)
1026 //
1027 if (temp.rank() == 3) {
1028 valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
1029 valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
1030 } else {
1031 valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
1032 valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
1033 }
1034 }
1035
1036 int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
1037 TEUCHOS_ASSERT((fieldRank == 0) || (fieldRank == 1));
1038
1039 auto rangeLIDs_d = range_ugi->getLIDs();
1040 auto domainLIDs_d = domain_ugi->getLIDs();
1041 Kokkos::fence();
1042
1043 // range dof coordinates
1044 range_basis->getDofCoords(range_dofCoords_d);
1045
1046 // compute values of op * (domain basis) at range dof coords on reference element
1047 domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
1048
1049 // loop over element blocks
1050 std::vector<std::string> elementBlockIds;
1051 range_ugi->getElementBlockIds(elementBlockIds);
1052 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
1053
1054 auto rangeOffsets_d = range_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
1055 auto domainOffsets_d = domain_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
1056
1057 // get element ids
1058 Kokkos::View<int *, DeviceSpace> elementIds_d;
1059 {
1060 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
1061 Kokkos::View<int *, Kokkos::HostSpace> elementIds_h(elementIds.data(), elementIds.size());
1062 elementIds_d = Kokkos::View<int *, DeviceSpace>("elementIds_d", elementIds_h.extent(0));
1063 Kokkos::deep_copy(elementIds_d, elementIds_h);
1064 }
1065
1066 // get element orientations
1067 auto elemOrts_d = orientations_.at(elementBlockIds[blockIter]);
1068
1069 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
1070
1071 int endCellRange =
1072 std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
1073 Teuchos::as<int>(elemIter));
1074
1075 // get subviews on workset
1076 auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
1077 auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
1078 // Last workset might be shorter.
1079 auto worksetRange = Kokkos::make_pair(0, endCellRange);
1080 auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
1081 auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
1082
1083 // apply orientations for domain basis
1084 // shuffles things in the second dimension, i.e. wrt domain basis
1085 ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
1086 valuesAtDofCoordsNonOriented_d,
1087 elemOrtsWorkset_d,
1088 domain_basis.get());
1089 Kokkos::fence();
1090
1091 // get basis coefficients of domain basis functions wrt range basis
1092 for(size_t domainIter=0; domainIter<domainCardinality; domainIter++)
1093 // Get basis coeffs wrt range basis on reference element.
1094 // basisCoeffsLI has dimensions (numCells, numFields=rangeCardinality, domainCardinality)
1095 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), domainIter),
1096 Kokkos::subview(valuesAtDofCoordsOrientedWorkset_d, Kokkos::ALL(), domainIter, Kokkos::ALL(), Kokkos::ALL()),
1097 range_basis.get(), elemOrtsWorkset_d);
1098 Kokkos::fence();
1099
1100 auto owner_d = owner_d_;
1101
1102
1103 Kokkos::parallel_for("miniEM::MatrixFreeInterpolationOp::cellLoop",
1104 range_type(elemIter, std::min(elemIter+numCells,
1105 elementIds_d.extent(0))),
1106 KOKKOS_LAMBDA(const LocalOrdinal cellNo2) {
1107 LocalOrdinal cellNo = cellNo2-elemIter;
1108 LocalOrdinal elemId = elementIds_d(cellNo2);
1109
1110 // loop over range LIDs
1111 for(size_t rangeIter = 0; rangeIter < rangeLIDs_d.extent(1); ++rangeIter) {
1112 LocalOrdinal range_row = rangeLIDs_d(elemId, rangeOffsets_d(rangeIter));
1113
1114 // if owned
1115 if ((range_row < (LocalOrdinal) lclX.extent(0)) && (owner_d(range_row) == elemId)) {
1116
1117 for(size_t domainIter = 0; domainIter < domainLIDs_d.extent(1); domainIter++) {
1118 LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
1119 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
1120 for (size_t j = 0; j<lclYtemp.extent(1); ++j)
1121 Kokkos::atomic_add(&lclYtemp(J,j), alpha*val*lclX(range_row,j));
1122 }
1123 } //end if owned
1124 } //end range LID loop
1125 }); // end element loop
1126 } //end workset loop
1127 } //end element block loop
1128 Kokkos::fence();
1129
1130 if (beta == ZERO)
1131 Y.putScalar(ZERO);
1132 else
1133 Y.scale(beta);
1134 Y.doExport(*colmapMV_, *import_, Tpetra::ADD_ASSIGN);
1135 Kokkos::fence();
1136 }
1137
1138
1139}
Kokkos::View< typename Sacado::ScalarType< ScalarT >::type **, Kokkos::LayoutRight, PHX::Device > vals
Teuchos::RCP< panzer::DOFManager > domain_ugi
void apply(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > domainMap_
Teuchos::RCP< const Tpetra::Import< LocalOrdinal, GlobalOrdinal, Node > > import_
void allocateColumnMapVector(size_t numVectors)
MatrixFreeInterpolationOp(const Teuchos::RCP< const panzer::ConnManager > &conn, const Teuchos::RCP< panzer::DOFManager > &_domain_ugi, const Teuchos::RCP< panzer::DOFManager > &_range_ugi, const std::string &_domain_basis_name, const std::string &_range_basis_name, Intrepid2::EOperator _op=Intrepid2::OPERATOR_VALUE, size_t _worksetSize=1000)
void applyTransposed(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > rangeMap_
Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > columnMap_
void applyNonTransposed(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
void precomputeOwnersAndOrientations(const Teuchos::RCP< const panzer::ConnManager > &conn)
Teuchos::RCP< panzer::DOFManager > range_ugi
void buildIntrepidOrientations(const std::vector< std::string > &eBlockNames, const panzer::ConnManager &connMgrInput, std::map< std::string, std::vector< Intrepid2::Orientation > > &orientations)
Builds the element orientations for the specified element blocks.
Teuchos::RCP< Thyra::LinearOpBase< double > > buildInterpolation(const Teuchos::RCP< const panzer::LinearObjFactory< panzer::Traits > > &linObjFactory, const std::string &domain_basis_name, const std::string &range_basis_name, Intrepid2::EOperator op, size_t worksetSize, const bool matrixFree)
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > removeSmallEntries(Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, typename Teuchos::ScalarTraits< Scalar >::magnitudeType tol)