Panzer Version of the Day
Loading...
Searching...
No Matches
Panzer_Interpolation.cpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Panzer: A partial differential equation assembly
4// engine for strongly coupled complex multiphysics systems
5//
6// Copyright 2011 NTESS and the Panzer contributors.
7// SPDX-License-Identifier: BSD-3-Clause
8// *****************************************************************************
9// @HEADER
10
14#include "Intrepid2_OrientationTools.hpp"
15#include "Intrepid2_LagrangianInterpolation.hpp"
16#ifdef PANZER_HAVE_EPETRA_STACK
17#include "Epetra_MpiComm.h"
18#endif
19
20// #define PANZER_INTERPOLATION_DEBUG_OUTPUT = 1
21
22namespace panzer {
23
24template <class Scalar,
25 class LocalOrdinal,
26 class GlobalOrdinal,
27 class Node>
28Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
29removeSmallEntries(Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& A,
30 typename Teuchos::ScalarTraits<Scalar>::magnitudeType tol) {
31 using crs_matrix = Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
32 using row_ptr_type = typename crs_matrix::local_graph_device_type::row_map_type::non_const_type;
33 using col_idx_type = typename crs_matrix::local_graph_device_type::entries_type::non_const_type;
34 using vals_type = typename crs_matrix::local_matrix_device_type::values_type;
35
36#if KOKKOS_VERSION >= 40799
37 using ATS = KokkosKernels::ArithTraits<Scalar>;
38#else
39 using ATS = Kokkos::ArithTraits<Scalar>;
40#endif
41 using impl_SC = typename ATS::val_type;
42#if KOKKOS_VERSION >= 40799
43 using impl_ATS = KokkosKernels::ArithTraits<impl_SC>;
44#else
45 using impl_ATS = Kokkos::ArithTraits<impl_SC>;
46#endif
47
48 auto lclA = A->getLocalMatrixDevice();
49
50 auto rowptr = row_ptr_type("rowptr", lclA.numRows() + 1);
51
52 Kokkos::parallel_for(
53 "removeSmallEntries::rowptr1",
54 Kokkos::RangePolicy<LocalOrdinal>(0, lclA.numRows()),
55 KOKKOS_LAMBDA(const LocalOrdinal rlid) {
56 auto row = lclA.row(rlid);
57 for (LocalOrdinal k = 0; k < row.length; ++k) {
58 if (impl_ATS::magnitude(row.value(k)) > tol) {
59 rowptr(rlid + 1) += 1;
60 }
61 }
62 });
63 LocalOrdinal nnz;
64 Kokkos::parallel_scan(
65 "removeSmallEntries::rowptr2",
66 Kokkos::RangePolicy<LocalOrdinal>(0, lclA.numRows()),
67 KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) {
68 partial_nnz += rowptr(rlid + 1);
69 if (is_final)
70 rowptr(rlid + 1) = partial_nnz;
71 },
72 nnz);
73
74 auto idx = col_idx_type("idx", nnz);
75 auto vals = vals_type("vals", nnz);
76
77 Kokkos::parallel_for(
78 "removeSmallEntries::indicesValues",
79 Kokkos::RangePolicy<LocalOrdinal>(0, lclA.numRows()),
80 KOKKOS_LAMBDA(const LocalOrdinal rlid) {
81 auto row = lclA.row(rlid);
82 auto I = rowptr(rlid);
83 for (LocalOrdinal k = 0; k < row.length; ++k) {
84 if (impl_ATS::magnitude(row.value(k)) > tol) {
85 idx(I) = row.colidx(k);
86 vals(I) = row.value(k);
87 I += 1;
88 }
89 }
90 });
91 Kokkos::fence();
92
93 auto newA = Teuchos::rcp(new crs_matrix(A->getRowMap(), A->getColMap(), rowptr, idx, vals));
94 newA->fillComplete(A->getDomainMap(),
95 A->getRangeMap());
96 return newA;
97}
98
99
100Teuchos::RCP<Thyra::LinearOpBase<double>> buildInterpolation(const Teuchos::RCP<const panzer::LinearObjFactory<panzer::Traits>> &linObjFactory,
101 const std::string &domain_basis_name, const std::string &range_basis_name,
102 Intrepid2::EOperator op, size_t worksetSize,
103 const bool matrixFree)
104{
105 using Teuchos::RCP;
106 using Teuchos::rcp;
107 using Teuchos::rcp_dynamic_cast;
108
109 using Scalar = double;
110
112#ifdef PANZER_HAVE_EPETRA_STACK
113 using epetraBlockedLinObjFactory = typename panzer::BlockedEpetraLinearObjFactory<panzer::Traits, LocalOrdinal>;
114#endif
115 using UGI = panzer::GlobalIndexer;
116
117 // must be able to cast to a block linear object factory
118 RCP<const tpetraBlockedLinObjFactory > tblof = rcp_dynamic_cast<const tpetraBlockedLinObjFactory >(linObjFactory);
119#ifdef PANZER_HAVE_EPETRA_STACK
120 RCP<const epetraBlockedLinObjFactory > eblof = rcp_dynamic_cast<const epetraBlockedLinObjFactory >(linObjFactory);
121#endif
122
123 RCP<const panzer::BlockedDOFManager> blockedDOFMngr;
124 if (tblof != Teuchos::null) {
125 blockedDOFMngr = tblof->getGlobalIndexer();
126#ifdef PANZER_HAVE_EPETRA_STACK
127 } else if (eblof != Teuchos::null) {
128 blockedDOFMngr = eblof->getGlobalIndexer();
129#endif
130 } else {
131 TEUCHOS_ASSERT(false);
132 }
133
134 // get global indexers for domain and range dofs
135 std::vector<RCP<UGI> > fieldDOFMngrs = blockedDOFMngr->getFieldDOFManagers();
136 int domainFieldNum = blockedDOFMngr->getFieldNum(domain_basis_name);
137 int rangeFieldNum = blockedDOFMngr->getFieldNum(range_basis_name);
138 int domainBlockIndex = blockedDOFMngr->getFieldBlock(domainFieldNum);
139 int rangeBlockIndex = blockedDOFMngr->getFieldBlock(rangeFieldNum);
140 RCP<panzer::DOFManager> domain_ugi = rcp_dynamic_cast<panzer::DOFManager>(blockedDOFMngr->getFieldDOFManagers()[domainBlockIndex], true);
141 RCP<panzer::DOFManager> range_ugi = rcp_dynamic_cast<panzer::DOFManager>(blockedDOFMngr->getFieldDOFManagers()[rangeBlockIndex], true);
142
143 RCP<const panzer::ConnManager> conn = blockedDOFMngr->getConnManager();
144
145 return buildInterpolation(conn, domain_ugi, range_ugi, domain_basis_name, range_basis_name, op, worksetSize,
146 /*forceVectorial=*/false,
147 /*useTpetra=*/tblof != Teuchos::null,
148 matrixFree);
149}
150
151
152Teuchos::RCP<Thyra::LinearOpBase<double> > buildInterpolation(const Teuchos::RCP<const panzer::ConnManager> &conn,
153 const Teuchos::RCP<panzer::DOFManager> &domain_ugi,
154 const Teuchos::RCP<panzer::DOFManager> &range_ugi,
155 const std::string& domain_basis_name,
156 const std::string& range_basis_name,
157 Intrepid2::EOperator op,
158 size_t worksetSize,
159 const bool force_vectorial,
160 const bool useTpetra,
161 const bool matrixFree)
162{
163 using Teuchos::RCP;
164 using Teuchos::rcp;
165 using Teuchos::rcp_dynamic_cast;
166
167 using Scalar = double;
168
169 using STS = Teuchos::ScalarTraits<Scalar>;
170#if KOKKOS_VERSION >= 40799
171 using KAT = KokkosKernels::ArithTraits<Scalar>;
172#else
173 using KAT = Kokkos::ArithTraits<Scalar>;
174#endif
175 using OT = Teuchos::OrdinalTraits<GlobalOrdinal>;
176
177 using DeviceSpace = PHX::Device;
178 using HostSpace = Kokkos::HostSpace;
179 using ots = Intrepid2::OrientationTools<DeviceSpace>;
180 using li = Intrepid2::LagrangianInterpolation<DeviceSpace>;
181 using DynRankDeviceView = Kokkos::DynRankView<double, DeviceSpace>;
182
183 using tp_graph = Tpetra::CrsGraph<LocalOrdinal, GlobalOrdinal>;
184 using tp_matrix = Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal>;
185 using tp_map = Tpetra::Map<LocalOrdinal, GlobalOrdinal>;
186#ifdef PANZER_HAVE_EPETRA_STACK
187 using ep_matrix = Epetra_CrsMatrix;
188 using ep_map = Epetra_Map;
189#endif
190
191 if (matrixFree) {
192 TEUCHOS_ASSERT(useTpetra);
193 TEUCHOS_ASSERT(!force_vectorial);
194 auto mfOp = rcp(new MatrixFreeInterpolationOp<Scalar,LocalOrdinal,GlobalOrdinal>(conn, domain_ugi, range_ugi, domain_basis_name, range_basis_name, op, worksetSize));
195 return Thyra::tpetraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,typename tp_matrix::node_type>(Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(mfOp->getRangeMap()),
196 Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(mfOp->getDomainMap()),
197 mfOp);
198 }
199
200 // get the domain and range bases
201 auto domain_fieldPattern = domain_ugi->getFieldPattern(domain_basis_name);
202 auto domain_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(domain_fieldPattern,true)->getIntrepidBasis();
203 auto range_fieldPattern = range_ugi->getFieldPattern(range_basis_name);
204 auto range_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(range_fieldPattern,true)->getIntrepidBasis();
205
206 // cardinalities
207 const size_t domainCardinality = domain_basis->getCardinality();
208 const size_t rangeCardinality = range_basis->getCardinality();
209
210 const int dim = range_basis->getBaseCellTopology().getDimension();
211
212 if (op == Intrepid2::OPERATOR_VALUE) {
213 TEUCHOS_ASSERT(rangeCardinality >= domainCardinality);
214 TEUCHOS_ASSERT_EQUALITY(domain_basis->getFunctionSpace(), range_basis->getFunctionSpace());
215 }
216
217 // Create the global interp matrix.
218 RCP<const tp_map> tp_rangemap;
219 RCP<const tp_map> tp_domainmap;
220 RCP<const tp_map> tp_rowmap;
221 RCP<const tp_map> tp_colmap;
222 RCP<tp_matrix> tp_interp_matrix;
223 typename tp_matrix::local_matrix_device_type lcl_tp_interp_matrix;
224#ifdef PANZER_HAVE_EPETRA_STACK
225 RCP<const ep_map> ep_rangemap;
226 RCP<const ep_map> ep_domainmap;
227 RCP<const ep_map> ep_rowmap;
228 RCP<const ep_map> ep_colmap;
229 RCP<ep_matrix> ep_interp_matrix;
230#endif
231
232 auto rangeElementLIDs_d = range_ugi->getLIDs();
233 auto domainElementLIDs_d = domain_ugi->getLIDs();
234
235 RCP<Thyra::LinearOpBase<Scalar> > thyra_interp;
236 size_t maxNumElementsPerBlock = 0;
237 LocalOrdinal minLocalIndex = 0;
238 LocalOrdinal maxLocalIndex = 0;
239 if (useTpetra) {
240 // build maps
241 std::vector<GlobalOrdinal> gids;
242 range_ugi->getOwnedIndices(gids);
243 tp_rowmap = rcp(new tp_map(OT::invalid(), gids.data(), static_cast<LocalOrdinal>(gids.size()), OT::zero(), range_ugi->getComm()));
244 tp_rangemap = tp_rowmap;
245 domain_ugi->getOwnedIndices(gids);
246 tp_domainmap = rcp(new tp_map(OT::invalid(), gids.data(), static_cast<LocalOrdinal>(gids.size()), OT::zero(), domain_ugi->getComm()));
247 domain_ugi->getOwnedAndGhostedIndices(gids);
248 tp_colmap = rcp(new tp_map(OT::invalid(), gids.data(), static_cast<LocalOrdinal>(gids.size()), OT::zero(), domain_ugi->getComm()));
249
250 minLocalIndex = tp_rowmap->getMinLocalIndex();
251 maxLocalIndex = tp_rowmap->getMaxLocalIndex();
252
253 // estimate number of entries per row
254 // This is an upper bound, as we are counting dofs that are on shared nodes, edges, faces more than once.
255 using dv = Kokkos::DualView<size_t*, typename tp_graph::device_type>;
256 dv numEntriesPerRow("numEntriesPerRow", tp_rowmap->getLocalNumElements());
257 {
258 auto numEntriesPerRow_d = numEntriesPerRow.view_device();
259
260 // loop over element blocks
261 std::vector<std::string> elementBlockIds;
262 range_ugi->getElementBlockIds(elementBlockIds);
263 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
264
265 // loop over elements
266 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
267 Kokkos::View<int *, HostSpace> elementIds_h(elementIds.data(), elementIds.size());
268 Kokkos::View<int *, DeviceSpace> elementIds_d("elementIds_d", elementIds_h.extent(0));
269 Kokkos::deep_copy(elementIds_d, elementIds_h);
270 maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
271
272 Kokkos::parallel_for("MiniEM_Interpolation::numEntriesPerRow",
273 Kokkos::RangePolicy<size_t, typename tp_matrix::node_type::execution_space>(0, elementIds.size()),
274 KOKKOS_LAMBDA(const size_t elemIter) {
275 auto elemId = elementIds_d(elemIter);
276
277 // get IDs for range dofs
278 auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
279
280 // loop over range LIDs
281 for(size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
282 const LocalOrdinal range_row = rangeLIDs_d(rangeIter);
283 const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
284 if (isOwned)
285 Kokkos::atomic_add(&numEntriesPerRow_d(range_row), domainCardinality);
286 } //end range LID loop
287 });
288 } // blocks loop
289 numEntriesPerRow.template modify<typename dv::t_dev>();
290 numEntriesPerRow.template sync<typename dv::t_host>();
291 }
292
293 // Set up graph
294 auto tp_interp_graph = rcp(new tp_graph(tp_rowmap, tp_colmap, numEntriesPerRow));
295
296 { // This runs on host
297 Kokkos::View<LocalOrdinal**, HostSpace> rangeElementLIDs_h("rangeElementLIDs_h", rangeElementLIDs_d.extent(0), rangeCardinality);
298 Kokkos::View<LocalOrdinal**, HostSpace> domainElementLIDs_h("domainElementLIDs_h", domainElementLIDs_d.extent(0), domainCardinality);
299 Kokkos::deep_copy(rangeElementLIDs_h, rangeElementLIDs_d);
300 Kokkos::deep_copy(domainElementLIDs_h, domainElementLIDs_d);
301
302 // loop over element blocks
303 std::vector<std::string> elementBlockIds;
304 range_ugi->getElementBlockIds(elementBlockIds);
305 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
306
307 // loop over elements
308 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
309 maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
310 for(std::size_t elemIter = 0; elemIter < elementIds.size(); ++elemIter) {
311 auto elemId = elementIds[elemIter];
312
313 // get IDs for range dofs
314 auto rangeLIDs_h = Kokkos::subview(rangeElementLIDs_h, elemId, Kokkos::ALL());
315 auto domainLIDs_h = Kokkos::subview(domainElementLIDs_h, elemId, Kokkos::ALL());
316
317 // loop over range LIDs
318 for(size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
319 const LocalOrdinal range_row = rangeLIDs_h(rangeIter);
320 const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
321 if (isOwned) {
322 Teuchos::ArrayView<LocalOrdinal> domainLIDs_av = Teuchos::ArrayView<LocalOrdinal>(domainLIDs_h.data(), domainLIDs_h.extent_int(0));
323 tp_interp_graph->insertLocalIndices(range_row, domainLIDs_av);
324 }
325 } //end range LID loop
326 } // elements loop
327 } // blocks loop
328 }
329
330 RCP<Teuchos::ParameterList> pl = rcp(new Teuchos::ParameterList());
331 pl->set("Optimize Storage", true);
332 tp_interp_graph->fillComplete(tp_domainmap, tp_rangemap, pl);
333
334 tp_interp_matrix = rcp(new tp_matrix(tp_interp_graph));
335 lcl_tp_interp_matrix = tp_interp_matrix->getLocalMatrixDevice();
336 }
337#ifdef PANZER_HAVE_EPETRA_STACK
338 else {
339
340 const RCP<const Teuchos::MpiComm<int> > mpiComm = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(range_ugi->getComm());
341 auto ep_comm = Teuchos::rcp(new Epetra_MpiComm(*mpiComm->getRawMpiComm()));
342 std::vector<GlobalOrdinal> gids;
343 range_ugi->getOwnedIndices(gids);
344 ep_rowmap = rcp(new ep_map(OT::invalid(), static_cast<LocalOrdinal>(gids.size()), gids.data(), OT::zero(), *ep_comm));
345 ep_rangemap = ep_rowmap;
346 domain_ugi->getOwnedIndices(gids);
347 ep_domainmap = rcp(new ep_map(OT::invalid(), static_cast<LocalOrdinal>(gids.size()), gids.data(), OT::zero(), *ep_comm));
348 domain_ugi->getOwnedAndGhostedIndices(gids);
349 ep_colmap = rcp(new ep_map(OT::invalid(), static_cast<LocalOrdinal>(gids.size()), gids.data(), OT::zero(), *ep_comm));
350
351 {
352 // loop over element blocks
353 std::vector<std::string> elementBlockIds;
354 range_ugi->getElementBlockIds(elementBlockIds);
355 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
356
357 // loop over elements
358 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
359 maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
360 }
361 }
362
363 // TODO: Fix this.
364 size_t nnzPerRowEstimate = 25*domainCardinality;
365
366 ep_interp_matrix = rcp(new ep_matrix(Copy, *ep_rowmap, *ep_colmap, static_cast<int>(nnzPerRowEstimate), /*StaticProfile=*/true));
367
368 RCP<const Thyra::LinearOpBase<double> > th_ep_interp = Thyra::epetraLinearOp(ep_interp_matrix,
369 Thyra::NOTRANS,
370 Thyra::EPETRA_OP_APPLY_APPLY,
371 Thyra::EPETRA_OP_ADJOINT_SUPPORTED,
372 Thyra::create_VectorSpace(ep_rangemap),
373 Thyra::create_VectorSpace(ep_domainmap));
374 thyra_interp = Teuchos::rcp_const_cast<Thyra::LinearOpBase<double> >(th_ep_interp);
375 }
376#endif
377
378 // allocate some views
379 int numCells;
380 if (maxNumElementsPerBlock > 0)
381 numCells = static_cast<int>(std::min(maxNumElementsPerBlock, worksetSize));
382 else
383 numCells = static_cast<int>(worksetSize);
384
385 DynRankDeviceView range_dofCoords_d("range_dofCoords_d", rangeCardinality, dim);
386 DynRankDeviceView basisCoeffsLIOriented_d("basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
387
388 // the ranks of these depend on dimension
389 DynRankDeviceView valuesAtDofCoordsNonOriented_d;
390 DynRankDeviceView valuesAtDofCoordsOriented_d;
391
392 if (!force_vectorial) {
393 // Let Intrepid2 give us the correctly dimensioned view, then build one with +1 ranks and extent(0) == numCells
394 auto temp = domain_basis->allocateOutputView(static_cast<int>(rangeCardinality), op);
395
396 // These view have dimensions
397 // numCells, numFields=domainCardinality, numPoints=rangeCardinality, (spatialDim)
398 //
399 if (temp.rank() == 3) {
400 valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
401 valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
402 } else {
403 valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
404 valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
405 }
406 } else {
407 valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", domainCardinality, rangeCardinality, dim);
408 valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, domainCardinality, rangeCardinality, dim);
409 }
410
411 int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
412 TEUCHOS_ASSERT((fieldRank == 0) || (fieldRank == 1));
413
414 auto entryFilterTol = 1e5*Teuchos::ScalarTraits<typename STS::magnitudeType>::eps();
415
416 // range dof coordinates
417 range_basis->getDofCoords(range_dofCoords_d);
418
419 // compute values of op * (domain basis) at range dof coords on reference element
420 domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
421
422 // get block ids
423 std::vector<std::string> elementBlockIds;
424 range_ugi->getElementBlockIds(elementBlockIds);
425
426 // get orientations for all blocks
427 std::map<std::string, std::vector<Intrepid2::Orientation> > orientations;
428 buildIntrepidOrientations(elementBlockIds, *conn, orientations);
429
430 // loop over element blocks
431 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
432
433 auto rangeOffsets_d = range_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
434 auto domainOffsets_d = domain_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
435#ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
436 std::cout << "rangeOffsets_d" << std::endl;
437 for (int i = 0; i < rangeCardinality; i++)
438 std::cout << rangeOffsets_d(i) << " ";
439 std::cout << std::endl;
440 std::cout << "domainOffsets_d" << std::endl;
441 for (int i = 0; i < domainCardinality; i++)
442 std::cout << domainOffsets_d(i) << " ";
443 std::cout << std::endl;
444#endif
445
446 // get element ids
447 Kokkos::View<int *, DeviceSpace> elementIds_d;
448 {
449 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
450 Kokkos::View<int *, HostSpace> elementIds_h(elementIds.data(), elementIds.size());
451 elementIds_d = Kokkos::View<int *, DeviceSpace>("elementIds_d", elementIds_h.extent(0));
452 Kokkos::deep_copy(elementIds_d, elementIds_h);
453 }
454
455 // get element orientations
456 typename Kokkos::DynRankView<Intrepid2::Orientation,DeviceSpace> elemOrts_d ("elemOrts_d", elementIds_d.extent(0));
457 {
458 // copy orientations to device
459 auto blockOrientations = orientations[elementBlockIds[blockIter]];
460 Kokkos::View<Intrepid2::Orientation*, HostSpace> elemOrts_h(blockOrientations.data(), blockOrientations.size());
461 Kokkos::deep_copy(elemOrts_d, elemOrts_h);
462 }
463
464 // loop over element worksets
465 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
466
467 int endCellRange =
468 std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
469 Teuchos::as<int>(elemIter));
470
471 // get subviews on workset
472 auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
473 auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
474 // Last workset might be shorter.
475 auto worksetRange = Kokkos::make_pair(0, endCellRange);
476 auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
477 auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
478
479 // apply orientations for domain basis
480 // shuffles things in the second dimension, i.e. wrt domain basis
481 ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
482 valuesAtDofCoordsNonOriented_d,
483 elemOrtsWorkset_d,
484 domain_basis.get());
485
486 // get basis coefficients of domain basis functions wrt range basis
487 for(size_t domainIter=0; domainIter<domainCardinality; domainIter++)
488 // Get basis coeffs wrt range basis on reference element.
489 // basisCoeffsLI has dimensions (numCells, numFields=rangeCardinality, domainCardinality)
490 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), domainIter),
491 Kokkos::subview(valuesAtDofCoordsOrientedWorkset_d, Kokkos::ALL(), domainIter, Kokkos::ALL(), Kokkos::ALL()),
492 range_basis.get(),
493 elemOrtsWorkset_d);
494
495#ifdef PANZER_HAVE_EPETRA_STACK
496 if (!useTpetra) { // Epetra fill
497
498 Kokkos::View<LocalOrdinal*,DeviceSpace> indices_d("indices", domainCardinality);
499 Kokkos::View<Scalar*, DeviceSpace> values_d ("values", domainCardinality);
500
501
502 for (int cellNo = 0; cellNo < endCellRange; cellNo++) {
503 auto elemId = elementIds_d(elemIter+cellNo);
504
505 // get IDs for range and domain dofs
506 auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
507 auto domainLIDs_d = Kokkos::subview(domainElementLIDs_d, elemId, Kokkos::ALL());
508
509 // loop over range LIDs
510 for(size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
511 const LocalOrdinal range_row = rangeLIDs_d(rangeOffsets_d(rangeIter));
512 const bool isOwned = ep_rowmap->MyLID(range_row);
513 if (isOwned) {
514 // filter entries for zeros
515 LocalOrdinal rowNNZ = 0;
516 for(size_t domainIter = 0; domainIter < domainCardinality; domainIter++) {
517 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
518 if (KAT::magnitude(val) > entryFilterTol) {
519 indices_d(rowNNZ) = domainLIDs_d(domainOffsets_d(domainIter));
520 values_d(rowNNZ) = val;
521 ++rowNNZ;
522 }
523 }
524
525 int ret = ep_interp_matrix->ReplaceMyValues(range_row, rowNNZ, values_d.data(), indices_d.data());
526 if (ret != 0) {
527 ret = ep_interp_matrix->InsertMyValues(range_row, rowNNZ, values_d.data(), indices_d.data());
528 TEUCHOS_ASSERT(ret == 0);
529 }
530 } //end if owned
531 } // end range LID loop
532 } //end workset loop
533 } // Epetra fill
534 else
535#endif
536 { // Tpetra fill
537 Kokkos::parallel_for(
538 "MiniEM_Interpolation::worksetLoop",
539 Kokkos::RangePolicy<int, typename tp_matrix::node_type::execution_space>(0, endCellRange),
540 KOKKOS_LAMBDA(const int cellNo) {
541 auto elemId = elementIds_d(elemIter+cellNo);
542
543 // get IDs for range and domain dofs
544 auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
545 auto domainLIDs_d = Kokkos::subview(domainElementLIDs_d, elemId, Kokkos::ALL());
546
547#ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
548 std::cout << "\n" << elemOrts_d(elemIter+cellNo).to_string() << std::endl;
549 std::cout << "rangeLIDs" << std::endl;
550 for (int i = 0; i < rangeCardinality; i++)
551 std::cout << rangeLIDs_d(i) << " ";
552 std::cout << std::endl << "domainLIDs" << std::endl;
553 for (int i = 0; i < domainCardinality; i++)
554 std::cout << domainLIDs_d(i) << " ";
555 std::cout << std::endl;
556#endif
557 // loop over range LIDs
558 for(size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
559 const LocalOrdinal range_row = rangeLIDs_d(rangeOffsets_d(rangeIter));
560 const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
561 if (isOwned) {
562 // filter entries for zeros
563 for(size_t domainIter = 0; domainIter < domainCardinality; domainIter++) {
564 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
565 if (KAT::magnitude(val) > entryFilterTol) {
566
567#if defined(PANZER_INTERPOLATION_DEBUG_OUTPUT) || defined(PANZER_DEBUG)
568 {
569 // Check that there is no entry yet or that we are overwriting it with the same value
570 auto row = lcl_tp_interp_matrix.rowConst(range_row);
571 for(LocalOrdinal kk = 0; kk<row.length; ++kk)
572 if (row.colidx(kk) == domainLIDs_d(domainOffsets_d(domainIter)))
573 if (!(KAT::magnitude(row.value(kk)-val) < entryFilterTol || KAT::magnitude(row.value(kk)) < entryFilterTol)) {
574#ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
575 std::cout << "Replacing (" << range_row << "," << row.colidx(kk) << ") = " << row.value(kk) << " with " << val << std::endl;
576#endif
577#ifdef PANZER_DEBUG
578 Kokkos::abort("MiniEM interpolation worksetLoop failed!");
579#endif
580 }
581 }
582#endif
583#ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
584 std::cout << "Setting (" << range_row << "," << domainLIDs_d(domainOffsets_d(domainIter)) << ") = " << val << std::endl;
585#endif
586 lcl_tp_interp_matrix.replaceValues(range_row, &(domainLIDs_d(domainOffsets_d(domainIter))), 1, &val, /*is_sorted=*/false, /*force_atomic=*/true);
587 }
588 } //end if owned
589 } // isOwned
590 } // end range LID loop
591 }); //end workset loop
592 } // Tpetra fill
593 } //end element loop
594 } //end element block loop
595
596 if (useTpetra) {
597 tp_interp_matrix->fillComplete(tp_domainmap, tp_rangemap);
598
599 if (op != Intrepid2::OPERATOR_VALUE) {
600 // Discrete gradient, curl, etc actually live on edges, faces, etc, so we have a lot of zeros in the matrix.
601 tp_interp_matrix = removeSmallEntries(tp_interp_matrix, entryFilterTol);
602 }
603
604 thyra_interp = Thyra::tpetraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,typename tp_matrix::node_type>(Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(tp_rangemap),
605 Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(tp_domainmap),
606 tp_interp_matrix);
607 }
608#ifdef PANZER_HAVE_EPETRA_STACK
609 else
610 ep_interp_matrix->FillComplete(*ep_domainmap, *ep_rangemap);
611#endif
612
613 return thyra_interp;
614}
615
616 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
618 MatrixFreeInterpolationOp(const Teuchos::RCP<const panzer::ConnManager> &conn,
619 const Teuchos::RCP<panzer::DOFManager> &_domain_ugi,
620 const Teuchos::RCP<panzer::DOFManager> &_range_ugi,
621 const std::string& _domain_basis_name,
622 const std::string& _range_basis_name,
623 Intrepid2::EOperator _op,
624 size_t _worksetSize) :
625 name(""),
626 domain_basis_name(_domain_basis_name),
627 range_basis_name(_range_basis_name),
628 op(_op),
629 worksetSize(_worksetSize),
630 domain_ugi(_domain_ugi),
631 range_ugi(_range_ugi)
632 {
633
634 using Teuchos::RCP;
635 using Teuchos::rcp;
636 using Teuchos::rcp_dynamic_cast;
637
638 using OT = Teuchos::OrdinalTraits<GlobalOrdinal>;
639
640 // typedef Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal> tp_matrix;
641 typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal> tp_map;
642
643 RCP<const tp_map> tp_rangemap;
644 RCP<const tp_map> tp_domainmap;
645 RCP<const tp_map> tp_rowmap;
646 RCP<const tp_map> tp_colmap;
647
648 {
649 // build maps
650 std::vector<GlobalOrdinal> gids;
651 range_ugi->getOwnedIndices(gids);
652 tp_rowmap = rcp(new tp_map(OT::invalid(), gids.data(), gids.size(), OT::zero(), range_ugi->getComm()));
653 tp_rangemap = tp_rowmap;
654 domain_ugi->getOwnedIndices(gids);
655 tp_domainmap = rcp(new tp_map(OT::invalid(), gids.data(), gids.size(), OT::zero(), domain_ugi->getComm()));
656 domain_ugi->getOwnedAndGhostedIndices(gids);
657 tp_colmap = rcp(new tp_map(OT::invalid(), gids.data(), gids.size(), OT::zero(), domain_ugi->getComm()));
658 }
659
660 domainMap_ = tp_domainmap;
661 rangeMap_ = tp_rangemap;
662 columnMap_ = tp_colmap;
663 import_ = rcp(new Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node>(domainMap_, columnMap_));
664
666
668
669 }
670
671 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
672 void
674 allocateColumnMapVector(size_t numVectors) {
675 colmapMV_ = rcp(new Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(columnMap_, numVectors));
676 }
677
678 // Pre-compute elements that own a DoF
679 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
680 void
682 precomputeOwnersAndOrientations(const Teuchos::RCP<const panzer::ConnManager> &conn) {
683 size_t maxNumElementsPerBlock = 0;
684 int numCells;
685 if (maxNumElementsPerBlock > 0)
686 numCells = std::min(maxNumElementsPerBlock, worksetSize);
687 else
688 numCells = worksetSize;
689
690 LocalOrdinal lclTargetSize = getRangeMap()->getLocalNumElements();
691 owner_d_ = Kokkos::View<LocalOrdinal*,DeviceSpace>("owner", lclTargetSize);
692
693 auto rangeLIDs_d = range_ugi->getLIDs();
694
695 auto owner_d = owner_d_;
696
697 // loop over element blocks
698 std::vector<std::string> elementBlockIds;
699 range_ugi->getElementBlockIds(elementBlockIds);
700 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
701
702 // loop over element worksets
703 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
704 Kokkos::View<int*,DeviceSpace>::host_mirror_type elementIds_h(elementIds.data(), elementIds.size());
705 Kokkos::View<int*,DeviceSpace> elementIds_d("elementIds_d", elementIds_h.extent(0));
706 Kokkos::deep_copy(elementIds_d, elementIds_h);
707 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
708 using range_type = Kokkos::RangePolicy<LocalOrdinal, DeviceSpace>;
709 Kokkos::parallel_for("miniEM::MatrixFreeInterpolationOp::cellLoop",
710 range_type(elemIter, std::min(elemIter+numCells,
711 elementIds_d.extent(0))),
712 KOKKOS_LAMBDA(const LocalOrdinal cellNo2) {
713 auto elemId = elementIds_d(cellNo2);
714
715 // loop over range LIDs
716 for(size_t rangeIter = 0; rangeIter < rangeLIDs_d.extent(1); ++rangeIter) {
717 LocalOrdinal range_row = rangeLIDs_d(elemId, rangeIter);
718 if (range_row < lclTargetSize)
719 owner_d(range_row) = elemId;
720 }
721 });
722 }
723 }
724
725 // get orientations for all blocks
726 std::map<std::string, std::vector<Intrepid2::Orientation> > orientations;
727 buildIntrepidOrientations(elementBlockIds, *conn, orientations);
728
729 using HostSpace = Kokkos::HostSpace;
730
731 // copy orientations to device
732 for (auto const& orientation : orientations) {
733 auto blockOrientations = orientation.second;
734 orientations_[orientation.first] = typename Kokkos::DynRankView<Intrepid2::Orientation,DeviceSpace>("elemOrts_d", blockOrientations.size());
735 Kokkos::View<Intrepid2::Orientation*, HostSpace> elemOrts_h(blockOrientations.data(), blockOrientations.size());
736 Kokkos::deep_copy(orientations_[orientation.first], elemOrts_h);
737 }
738 }
739
740 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
741 void
743 apply (const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
744 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
745 Teuchos::ETransp mode,
746 Scalar alpha,
747 Scalar beta) const {
748
749 if (mode == Teuchos::NO_TRANS) {
750 applyNonTransposed(X, Y, alpha, beta);
751 return;
752 } else if (mode == Teuchos::TRANS) {
753 applyTransposed(X, Y, alpha, beta);
754 return;
755 } else
756 TEUCHOS_ASSERT(false);
757 }
758
759 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
760 void
762 applyNonTransposed (const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
763 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
764 Scalar alpha,
765 Scalar beta) const {
766
767 using Teuchos::RCP;
768 using Teuchos::rcp;
769 using Teuchos::rcp_dynamic_cast;
770 using range_type = Kokkos::RangePolicy<LocalOrdinal, DeviceSpace>;
771
772 using ots = Intrepid2::OrientationTools<DeviceSpace>;
773 using li = Intrepid2::LagrangianInterpolation<DeviceSpace>;
774 using DynRankDeviceView = Kokkos::DynRankView<Scalar,DeviceSpace>;
775 using view_t = typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev;
776 using const_view_t = typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev::const_type;
777
778 Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer(std::string("Mini-EM: matrix-free apply no_trans ") + name));
779
780 using TST = Teuchos::ScalarTraits<Scalar>;
781 const Scalar ZERO = TST::zero();
782 if (beta == ZERO)
783 Y.putScalar(ZERO);
784 colmapMV_->doImport(X, *import_,Tpetra::INSERT);
785
786 const_view_t lclX = colmapMV_->getLocalViewDevice(Tpetra::Access::ReadOnly);
787 view_t lclY = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
788 size_t numVectors = lclY.extent(1);
789 LocalOrdinal lclTargetSize = getRangeMap()->getLocalNumElements();
790
791 // get the domain and range bases
792 auto domain_fieldPattern = domain_ugi->getFieldPattern(domain_basis_name);
793 auto domain_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(domain_fieldPattern,true)->getIntrepidBasis();
794 auto range_fieldPattern = range_ugi->getFieldPattern(range_basis_name);
795 auto range_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(range_fieldPattern,true)->getIntrepidBasis();
796
797 // cardinalities
798 const size_t domainCardinality = domain_basis->getCardinality();
799 const size_t rangeCardinality = range_basis->getCardinality();
800
801 size_t maxNumElementsPerBlock = 0;
802
803 const int dim = range_basis->getBaseCellTopology().getDimension();
804
805 if (op == Intrepid2::OPERATOR_VALUE) {
806 TEUCHOS_ASSERT(rangeCardinality >= domainCardinality);
807 TEUCHOS_ASSERT_EQUALITY(domain_basis->getFunctionSpace(), range_basis->getFunctionSpace());
808 }
809
810 // allocate some views
811 int numCells;
812 if (maxNumElementsPerBlock > 0)
813 numCells = std::min(maxNumElementsPerBlock, worksetSize);
814 else
815 numCells = worksetSize;
816 DynRankDeviceView range_dofCoords_d("range_dofCoords_d", rangeCardinality, dim);
817 DynRankDeviceView basisCoeffsLIOriented_d("basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
818
819 // the ranks of these depend on dimension
820 DynRankDeviceView valuesAtDofCoordsNonOriented_d;
821 DynRankDeviceView valuesAtDofCoordsOriented_d;
822 DynRankDeviceView reducedValuesAtDofCoordsOriented_d;
823
824 {
825 // Let Intrepid2 give us the correctly dimensioned view, then build one with +1 ranks and extent(0) == numCells
826 auto temp = domain_basis->allocateOutputView(rangeCardinality, op);
827
828 // These view have dimensions
829 // numCells, numFields=domainCardinality, numPoints=rangeCardinality, (spatialDim)
830 //
831 if (temp.rank() == 3) {
832 valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
833 valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
834 reducedValuesAtDofCoordsOriented_d = DynRankDeviceView("reducedValuesAtDofCoordsOriented_d", numCells, temp.extent(1), temp.extent(2), numVectors);
835 } else {
836 valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
837 valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
838 reducedValuesAtDofCoordsOriented_d = DynRankDeviceView("reducedValuesAtDofCoordsOriented_d", numCells, temp.extent(1), numVectors);
839 }
840 }
841
842 int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
843 TEUCHOS_ASSERT((fieldRank == 0) || (fieldRank == 1));
844
845 auto rangeLIDs_d = range_ugi->getLIDs();
846 auto domainLIDs_d = domain_ugi->getLIDs();
847
848 // range dof coordinates
849 range_basis->getDofCoords(range_dofCoords_d);
850
851 // compute values of op * (domain basis) at range dof coords on reference element
852 domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
853
854 // loop over element blocks
855 std::vector<std::string> elementBlockIds;
856 range_ugi->getElementBlockIds(elementBlockIds);
857 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
858
859 auto rangeOffsets_d = range_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
860 auto domainOffsets_d = domain_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
861
862 // get element ids
863 Kokkos::View<int *, DeviceSpace> elementIds_d;
864 {
865 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
866 Kokkos::View<int *, Kokkos::HostSpace> elementIds_h(elementIds.data(), elementIds.size());
867 elementIds_d = Kokkos::View<int *, DeviceSpace>("elementIds_d", elementIds_h.extent(0));
868 Kokkos::deep_copy(elementIds_d, elementIds_h);
869 }
870
871 // get element orientations
872 auto elemOrts_d = orientations_.at(elementBlockIds[blockIter]);
873
874 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
875
876 int endCellRange =
877 std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
878 Teuchos::as<int>(elemIter));
879
880 // get subviews on workset
881 auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
882 auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
883 // Last workset might be shorter.
884 auto worksetRange = Kokkos::make_pair(0, endCellRange);
885 auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
886 auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
887
888 // apply orientations for LO basis
889 // shuffles things in the second dimension, i.e. wrt LO basis
890 ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
891 valuesAtDofCoordsNonOriented_d,
892 elemOrtsWorkset_d,
893 domain_basis.get());
894
895 Kokkos::deep_copy(reducedValuesAtDofCoordsOriented_d, 0.0);
896
897 if (reducedValuesAtDofCoordsOriented_d.rank() == 4) {
898 Kokkos::parallel_for("miniEM:MatrixFreeInterpolationOp:cellLoop1",
899 range_type(0, std::min(numCells, elementIds_d.extent_int(0)-Teuchos::as<int>(elemIter))),
900 KOKKOS_LAMBDA(const LocalOrdinal cellNo) {
901 LocalOrdinal cellNo2 = elemIter+cellNo;
902 LocalOrdinal elemId = elementIds_d(cellNo2);
903 for(size_t domainIter=0; domainIter<domainCardinality; domainIter++) {
904 LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
905 for(size_t rangeIter=0; rangeIter<rangeCardinality; rangeIter++) {
906 for(size_t d=0; d<valuesAtDofCoordsOriented_d.extent(3); d++) {
907 Scalar val = valuesAtDofCoordsOriented_d(cellNo, domainIter, rangeIter, d);
908 for (size_t j = 0; j<numVectors; ++j)
909 reducedValuesAtDofCoordsOriented_d(cellNo, rangeIter, d, j) += val * lclX(J, j);
910 }
911 }
912 }
913 });
914
915 for (size_t j = 0; j<numVectors; ++j)
916 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), j),
917 Kokkos::subview(reducedValuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), j),
918 range_basis.get(),
919 elemOrtsWorkset_d
920 );
921 } else {
922 Kokkos::parallel_for("miniEM:MatrixFreeInterpolationOp:cellLoop1",
923 range_type(0, std::min(numCells, elementIds_d.extent_int(0)-Teuchos::as<int>(elemIter))),
924 KOKKOS_LAMBDA(const LocalOrdinal cellNo) {
925 LocalOrdinal cellNo2 = elemIter+cellNo;
926 LocalOrdinal elemId = elementIds_d(cellNo2);
927 for(size_t domainIter=0; domainIter<domainCardinality; domainIter++) {
928 LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
929 for(size_t rangeIter=0; rangeIter<rangeCardinality; rangeIter++) {
930 Scalar val = valuesAtDofCoordsOriented_d(cellNo, domainIter, rangeIter);
931 for (size_t j = 0; j<numVectors; ++j)
932 reducedValuesAtDofCoordsOriented_d(cellNo, rangeIter, j) += val * lclX(J, j);
933 }
934 }
935 });
936
937 for (size_t j = 0; j<numVectors; ++j)
938 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), j),
939 Kokkos::subview(reducedValuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), j),
940 range_basis.get(),
941 elemOrtsWorkset_d
942 );
943 }
944
945 auto owner_d = owner_d_;
946
947 Kokkos::parallel_for("miniEM::MatrixFreeInterpolationOp::cellLoop2",
948 range_type(elemIter, std::min(elemIter+numCells,
949 elementIds_d.extent(0))),
950 KOKKOS_LAMBDA(const LocalOrdinal cellNo2) {
951 LocalOrdinal cellNo = cellNo2-elemIter;
952 LocalOrdinal elemId = elementIds_d(cellNo2);
953
954 // loop over range LIDs
955 for(size_t rangeIter = 0; rangeIter < rangeLIDs_d.extent(1); ++rangeIter) {
956 LocalOrdinal range_row = rangeLIDs_d(elemId, rangeOffsets_d(rangeIter));
957
958 // if owned
959 if ((range_row < lclTargetSize) && (owner_d(range_row) == elemId)) {
960
961 for (size_t j = 0; j<numVectors; ++j) {
962 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, j);
963 lclY(range_row,j) = beta*lclY(range_row,j) + alpha*val;
964 }
965 } // end if owned
966 } // end range LID loop
967 }); // end element loop
968 } //end workset loop
969 } //end element block loop
970
971 }
972
973 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
974 void
976 applyTransposed (const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
977 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
978 Scalar alpha,
979 Scalar beta) const {
980
981 using Teuchos::RCP;
982 using Teuchos::rcp;
983 using Teuchos::rcp_dynamic_cast;
984 using range_type = Kokkos::RangePolicy<LocalOrdinal, DeviceSpace>;
985
986 typedef Intrepid2::OrientationTools<DeviceSpace> ots;
987 typedef Intrepid2::LagrangianInterpolation<DeviceSpace> li;
988 typedef Kokkos::DynRankView<Scalar,DeviceSpace> DynRankDeviceView;
989 using TST = Teuchos::ScalarTraits<Scalar>;
990 const Scalar ZERO = TST::zero();
991 using view_t = typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev;
992 using const_view_t = typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev::const_type;
993
994 Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer(std::string("Mini-EM: matrix-free apply trans ") + name));
995
996 const_view_t lclX = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
997 colmapMV_->putScalar(ZERO);
998 view_t lclYtemp = colmapMV_->getLocalViewDevice(Tpetra::Access::ReadWrite);
999
1000 // get the domain and range bases
1001 auto domain_fieldPattern = domain_ugi->getFieldPattern(domain_basis_name);
1002 auto domain_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(domain_fieldPattern,true)->getIntrepidBasis();
1003 auto range_fieldPattern = range_ugi->getFieldPattern(range_basis_name);
1004 auto range_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(range_fieldPattern,true)->getIntrepidBasis();
1005
1006 // cardinalities
1007 const size_t domainCardinality = domain_basis->getCardinality();
1008 const size_t rangeCardinality = range_basis->getCardinality();
1009
1010 size_t maxNumElementsPerBlock = 0;
1011
1012 const int dim = range_basis->getBaseCellTopology().getDimension();
1013
1014 if (op == Intrepid2::OPERATOR_VALUE) {
1015 TEUCHOS_ASSERT(rangeCardinality >= domainCardinality);
1016 TEUCHOS_ASSERT_EQUALITY(domain_basis->getFunctionSpace(), range_basis->getFunctionSpace());
1017 }
1018
1019 // allocate some views
1020 int numCells;
1021 if (maxNumElementsPerBlock > 0)
1022 numCells = std::min(maxNumElementsPerBlock, worksetSize);
1023 else
1024 numCells = worksetSize;
1025 DynRankDeviceView range_dofCoords_d("range_dofCoords_d", rangeCardinality, dim);
1026 DynRankDeviceView basisCoeffsLIOriented_d("basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
1027
1028 // the ranks of these depend on dimension
1029 DynRankDeviceView valuesAtDofCoordsNonOriented_d;
1030 DynRankDeviceView valuesAtDofCoordsOriented_d;
1031
1032 {
1033 // Let Intrepid2 give us the correctly dimensioned view, then build one with +1 ranks and extent(0) == numCells
1034 auto temp = domain_basis->allocateOutputView(rangeCardinality, op);
1035
1036 // These view have dimensions
1037 // numCells, numFields=domainCardinality, numPoints=rangeCardinality, (spatialDim)
1038 //
1039 if (temp.rank() == 3) {
1040 valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
1041 valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
1042 } else {
1043 valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
1044 valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
1045 }
1046 }
1047
1048 int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
1049 TEUCHOS_ASSERT((fieldRank == 0) || (fieldRank == 1));
1050
1051 auto rangeLIDs_d = range_ugi->getLIDs();
1052 auto domainLIDs_d = domain_ugi->getLIDs();
1053 Kokkos::fence();
1054
1055 // range dof coordinates
1056 range_basis->getDofCoords(range_dofCoords_d);
1057
1058 // compute values of op * (domain basis) at range dof coords on reference element
1059 domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
1060
1061 // loop over element blocks
1062 std::vector<std::string> elementBlockIds;
1063 range_ugi->getElementBlockIds(elementBlockIds);
1064 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
1065
1066 auto rangeOffsets_d = range_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
1067 auto domainOffsets_d = domain_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
1068
1069 // get element ids
1070 Kokkos::View<int *, DeviceSpace> elementIds_d;
1071 {
1072 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
1073 Kokkos::View<int *, Kokkos::HostSpace> elementIds_h(elementIds.data(), elementIds.size());
1074 elementIds_d = Kokkos::View<int *, DeviceSpace>("elementIds_d", elementIds_h.extent(0));
1075 Kokkos::deep_copy(elementIds_d, elementIds_h);
1076 }
1077
1078 // get element orientations
1079 auto elemOrts_d = orientations_.at(elementBlockIds[blockIter]);
1080
1081 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
1082
1083 int endCellRange =
1084 std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
1085 Teuchos::as<int>(elemIter));
1086
1087 // get subviews on workset
1088 auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
1089 auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
1090 // Last workset might be shorter.
1091 auto worksetRange = Kokkos::make_pair(0, endCellRange);
1092 auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
1093 auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
1094
1095 // apply orientations for domain basis
1096 // shuffles things in the second dimension, i.e. wrt domain basis
1097 ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
1098 valuesAtDofCoordsNonOriented_d,
1099 elemOrtsWorkset_d,
1100 domain_basis.get());
1101 Kokkos::fence();
1102
1103 // get basis coefficients of domain basis functions wrt range basis
1104 for(size_t domainIter=0; domainIter<domainCardinality; domainIter++)
1105 // Get basis coeffs wrt range basis on reference element.
1106 // basisCoeffsLI has dimensions (numCells, numFields=rangeCardinality, domainCardinality)
1107 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), domainIter),
1108 Kokkos::subview(valuesAtDofCoordsOrientedWorkset_d, Kokkos::ALL(), domainIter, Kokkos::ALL(), Kokkos::ALL()),
1109 range_basis.get(), elemOrtsWorkset_d);
1110 Kokkos::fence();
1111
1112 auto owner_d = owner_d_;
1113
1114
1115 Kokkos::parallel_for("miniEM::MatrixFreeInterpolationOp::cellLoop",
1116 range_type(elemIter, std::min(elemIter+numCells,
1117 elementIds_d.extent(0))),
1118 KOKKOS_LAMBDA(const LocalOrdinal cellNo2) {
1119 LocalOrdinal cellNo = cellNo2-elemIter;
1120 LocalOrdinal elemId = elementIds_d(cellNo2);
1121
1122 // loop over range LIDs
1123 for(size_t rangeIter = 0; rangeIter < rangeLIDs_d.extent(1); ++rangeIter) {
1124 LocalOrdinal range_row = rangeLIDs_d(elemId, rangeOffsets_d(rangeIter));
1125
1126 // if owned
1127 if ((range_row < (LocalOrdinal) lclX.extent(0)) && (owner_d(range_row) == elemId)) {
1128
1129 for(size_t domainIter = 0; domainIter < domainLIDs_d.extent(1); domainIter++) {
1130 LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
1131 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
1132 for (size_t j = 0; j<lclYtemp.extent(1); ++j)
1133 Kokkos::atomic_add(&lclYtemp(J,j), alpha*val*lclX(range_row,j));
1134 }
1135 } //end if owned
1136 } //end range LID loop
1137 }); // end element loop
1138 } //end workset loop
1139 } //end element block loop
1140 Kokkos::fence();
1141
1142 if (beta == ZERO)
1143 Y.putScalar(ZERO);
1144 else
1145 Y.scale(beta);
1146 Y.doExport(*colmapMV_, *import_, Tpetra::ADD_ASSIGN);
1147 Kokkos::fence();
1148 }
1149
1150
1151}
Kokkos::View< typename Sacado::ScalarType< ScalarT >::type **, Kokkos::LayoutRight, PHX::Device > vals
Teuchos::RCP< panzer::DOFManager > domain_ugi
void apply(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > domainMap_
Teuchos::RCP< const Tpetra::Import< LocalOrdinal, GlobalOrdinal, Node > > import_
void allocateColumnMapVector(size_t numVectors)
MatrixFreeInterpolationOp(const Teuchos::RCP< const panzer::ConnManager > &conn, const Teuchos::RCP< panzer::DOFManager > &_domain_ugi, const Teuchos::RCP< panzer::DOFManager > &_range_ugi, const std::string &_domain_basis_name, const std::string &_range_basis_name, Intrepid2::EOperator _op=Intrepid2::OPERATOR_VALUE, size_t _worksetSize=1000)
void applyTransposed(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > rangeMap_
Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > columnMap_
void applyNonTransposed(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
void precomputeOwnersAndOrientations(const Teuchos::RCP< const panzer::ConnManager > &conn)
Teuchos::RCP< panzer::DOFManager > range_ugi
void buildIntrepidOrientations(const std::vector< std::string > &eBlockNames, const panzer::ConnManager &connMgrInput, std::map< std::string, std::vector< Intrepid2::Orientation > > &orientations)
Builds the element orientations for the specified element blocks.
Teuchos::RCP< Thyra::LinearOpBase< double > > buildInterpolation(const Teuchos::RCP< const panzer::LinearObjFactory< panzer::Traits > > &linObjFactory, const std::string &domain_basis_name, const std::string &range_basis_name, Intrepid2::EOperator op, size_t worksetSize, const bool matrixFree)
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > removeSmallEntries(Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, typename Teuchos::ScalarTraits< Scalar >::magnitudeType tol)