144Teuchos::RCP<Thyra::LinearOpBase<double> >
buildInterpolation(
const Teuchos::RCP<const panzer::ConnManager> &conn,
145 const Teuchos::RCP<panzer::DOFManager> &domain_ugi,
146 const Teuchos::RCP<panzer::DOFManager> &range_ugi,
147 const std::string& domain_basis_name,
148 const std::string& range_basis_name,
149 Intrepid2::EOperator op,
151 const bool force_vectorial,
152 const bool useTpetra,
153 const bool matrixFree)
157 using Teuchos::rcp_dynamic_cast;
159 using Scalar = double;
161 using STS = Teuchos::ScalarTraits<Scalar>;
162 using KAT = KokkosKernels::ArithTraits<Scalar>;
163 using OT = Teuchos::OrdinalTraits<GlobalOrdinal>;
165 using DeviceSpace = PHX::Device;
166 using HostSpace = Kokkos::HostSpace;
167 using ots = Intrepid2::OrientationTools<DeviceSpace>;
168 using li = Intrepid2::LagrangianInterpolation<DeviceSpace>;
169 using DynRankDeviceView = Kokkos::DynRankView<double, DeviceSpace>;
171 using tp_graph = Tpetra::CrsGraph<LocalOrdinal, GlobalOrdinal>;
172 using tp_matrix = Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal>;
173 using tp_map = Tpetra::Map<LocalOrdinal, GlobalOrdinal>;
174#ifdef PANZER_HAVE_EPETRA_STACK
175 using ep_matrix = Epetra_CrsMatrix;
176 using ep_map = Epetra_Map;
180 TEUCHOS_ASSERT(useTpetra);
181 TEUCHOS_ASSERT(!force_vectorial);
183 return Thyra::tpetraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,typename tp_matrix::node_type>(Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(mfOp->getRangeMap()),
184 Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(mfOp->getDomainMap()),
189 auto domain_fieldPattern = domain_ugi->getFieldPattern(domain_basis_name);
190 auto domain_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(domain_fieldPattern,
true)->getIntrepidBasis();
191 auto range_fieldPattern = range_ugi->getFieldPattern(range_basis_name);
192 auto range_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(range_fieldPattern,
true)->getIntrepidBasis();
195 const size_t domainCardinality = domain_basis->getCardinality();
196 const size_t rangeCardinality = range_basis->getCardinality();
198 const int dim = range_basis->getBaseCellTopology().getDimension();
200 if (op == Intrepid2::OPERATOR_VALUE) {
201 TEUCHOS_ASSERT(rangeCardinality >= domainCardinality);
202 TEUCHOS_ASSERT_EQUALITY(domain_basis->getFunctionSpace(), range_basis->getFunctionSpace());
206 RCP<const tp_map> tp_rangemap;
207 RCP<const tp_map> tp_domainmap;
208 RCP<const tp_map> tp_rowmap;
209 RCP<const tp_map> tp_colmap;
210 RCP<tp_matrix> tp_interp_matrix;
211 typename tp_matrix::local_matrix_device_type lcl_tp_interp_matrix;
212#ifdef PANZER_HAVE_EPETRA_STACK
213 RCP<const ep_map> ep_rangemap;
214 RCP<const ep_map> ep_domainmap;
215 RCP<const ep_map> ep_rowmap;
216 RCP<const ep_map> ep_colmap;
217 RCP<ep_matrix> ep_interp_matrix;
220 auto rangeElementLIDs_d = range_ugi->getLIDs();
221 auto domainElementLIDs_d = domain_ugi->getLIDs();
223 RCP<Thyra::LinearOpBase<Scalar> > thyra_interp;
224 size_t maxNumElementsPerBlock = 0;
225 LocalOrdinal minLocalIndex = 0;
226 LocalOrdinal maxLocalIndex = 0;
229 std::vector<GlobalOrdinal> gids;
230 range_ugi->getOwnedIndices(gids);
231 tp_rowmap = rcp(
new tp_map(OT::invalid(), gids.data(),
static_cast<LocalOrdinal
>(gids.size()), OT::zero(), range_ugi->getComm()));
232 tp_rangemap = tp_rowmap;
233 domain_ugi->getOwnedIndices(gids);
234 tp_domainmap = rcp(
new tp_map(OT::invalid(), gids.data(),
static_cast<LocalOrdinal
>(gids.size()), OT::zero(), domain_ugi->getComm()));
235 domain_ugi->getOwnedAndGhostedIndices(gids);
236 tp_colmap = rcp(
new tp_map(OT::invalid(), gids.data(),
static_cast<LocalOrdinal
>(gids.size()), OT::zero(), domain_ugi->getComm()));
238 minLocalIndex = tp_rowmap->getMinLocalIndex();
239 maxLocalIndex = tp_rowmap->getMaxLocalIndex();
243 using dv = Kokkos::DualView<size_t*, typename tp_graph::device_type>;
244 dv numEntriesPerRow(
"numEntriesPerRow", tp_rowmap->getLocalNumElements());
246 auto numEntriesPerRow_d = numEntriesPerRow.view_device();
249 std::vector<std::string> elementBlockIds;
250 range_ugi->getElementBlockIds(elementBlockIds);
251 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
254 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
255 Kokkos::View<int *, HostSpace> elementIds_h(elementIds.data(), elementIds.size());
256 Kokkos::View<int *, DeviceSpace> elementIds_d(
"elementIds_d", elementIds_h.extent(0));
257 Kokkos::deep_copy(elementIds_d, elementIds_h);
258 maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
260 Kokkos::parallel_for(
"MiniEM_Interpolation::numEntriesPerRow",
261 Kokkos::RangePolicy<size_t, typename tp_matrix::node_type::execution_space>(0, elementIds.size()),
262 KOKKOS_LAMBDA(
const size_t elemIter) {
263 auto elemId = elementIds_d(elemIter);
266 auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
269 for(
size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
270 const LocalOrdinal range_row = rangeLIDs_d(rangeIter);
271 const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
273 Kokkos::atomic_add(&numEntriesPerRow_d(range_row), domainCardinality);
277 numEntriesPerRow.template modify<typename dv::t_dev>();
278 numEntriesPerRow.template sync<typename dv::t_host>();
282 auto tp_interp_graph = rcp(
new tp_graph(tp_rowmap, tp_colmap, numEntriesPerRow));
285 Kokkos::View<LocalOrdinal**, HostSpace> rangeElementLIDs_h(
"rangeElementLIDs_h", rangeElementLIDs_d.extent(0), rangeCardinality);
286 Kokkos::View<LocalOrdinal**, HostSpace> domainElementLIDs_h(
"domainElementLIDs_h", domainElementLIDs_d.extent(0), domainCardinality);
287 Kokkos::deep_copy(rangeElementLIDs_h, rangeElementLIDs_d);
288 Kokkos::deep_copy(domainElementLIDs_h, domainElementLIDs_d);
291 std::vector<std::string> elementBlockIds;
292 range_ugi->getElementBlockIds(elementBlockIds);
293 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
296 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
297 maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
298 for(std::size_t elemIter = 0; elemIter < elementIds.size(); ++elemIter) {
299 auto elemId = elementIds[elemIter];
302 auto rangeLIDs_h = Kokkos::subview(rangeElementLIDs_h, elemId, Kokkos::ALL());
303 auto domainLIDs_h = Kokkos::subview(domainElementLIDs_h, elemId, Kokkos::ALL());
306 for(
size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
307 const LocalOrdinal range_row = rangeLIDs_h(rangeIter);
308 const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
310 Teuchos::ArrayView<LocalOrdinal> domainLIDs_av = Teuchos::ArrayView<LocalOrdinal>(domainLIDs_h.data(), domainLIDs_h.extent_int(0));
311 tp_interp_graph->insertLocalIndices(range_row, domainLIDs_av);
318 RCP<Teuchos::ParameterList> pl = rcp(
new Teuchos::ParameterList());
319 pl->set(
"Optimize Storage",
true);
320 tp_interp_graph->fillComplete(tp_domainmap, tp_rangemap, pl);
322 tp_interp_matrix = rcp(
new tp_matrix(tp_interp_graph));
323 lcl_tp_interp_matrix = tp_interp_matrix->getLocalMatrixDevice();
325#ifdef PANZER_HAVE_EPETRA_STACK
328 const RCP<const Teuchos::MpiComm<int> > mpiComm = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(range_ugi->getComm());
329 auto ep_comm = Teuchos::rcp(
new Epetra_MpiComm(*mpiComm->getRawMpiComm()));
330 std::vector<GlobalOrdinal> gids;
331 range_ugi->getOwnedIndices(gids);
332 ep_rowmap = rcp(
new ep_map(OT::invalid(),
static_cast<LocalOrdinal
>(gids.size()), gids.data(), OT::zero(), *ep_comm));
333 ep_rangemap = ep_rowmap;
334 domain_ugi->getOwnedIndices(gids);
335 ep_domainmap = rcp(
new ep_map(OT::invalid(),
static_cast<LocalOrdinal
>(gids.size()), gids.data(), OT::zero(), *ep_comm));
336 domain_ugi->getOwnedAndGhostedIndices(gids);
337 ep_colmap = rcp(
new ep_map(OT::invalid(),
static_cast<LocalOrdinal
>(gids.size()), gids.data(), OT::zero(), *ep_comm));
341 std::vector<std::string> elementBlockIds;
342 range_ugi->getElementBlockIds(elementBlockIds);
343 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
346 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
347 maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
352 size_t nnzPerRowEstimate = 25*domainCardinality;
354 ep_interp_matrix = rcp(
new ep_matrix(
Copy, *ep_rowmap, *ep_colmap,
static_cast<int>(nnzPerRowEstimate),
true));
356 RCP<const Thyra::LinearOpBase<double> > th_ep_interp = Thyra::epetraLinearOp(ep_interp_matrix,
358 Thyra::EPETRA_OP_APPLY_APPLY,
359 Thyra::EPETRA_OP_ADJOINT_SUPPORTED,
360 Thyra::create_VectorSpace(ep_rangemap),
361 Thyra::create_VectorSpace(ep_domainmap));
362 thyra_interp = Teuchos::rcp_const_cast<Thyra::LinearOpBase<double> >(th_ep_interp);
368 if (maxNumElementsPerBlock > 0)
369 numCells =
static_cast<int>(std::min(maxNumElementsPerBlock, worksetSize));
371 numCells =
static_cast<int>(worksetSize);
373 DynRankDeviceView range_dofCoords_d(
"range_dofCoords_d", rangeCardinality, dim);
374 DynRankDeviceView basisCoeffsLIOriented_d(
"basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
377 DynRankDeviceView valuesAtDofCoordsNonOriented_d;
378 DynRankDeviceView valuesAtDofCoordsOriented_d;
380 if (!force_vectorial) {
382 auto temp = domain_basis->allocateOutputView(
static_cast<int>(rangeCardinality), op);
387 if (temp.rank() == 3) {
388 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
389 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
391 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
392 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
395 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", domainCardinality, rangeCardinality, dim);
396 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, domainCardinality, rangeCardinality, dim);
399 int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
400 TEUCHOS_ASSERT((fieldRank == 0) || (fieldRank == 1));
402 auto entryFilterTol = 1e5*Teuchos::ScalarTraits<typename STS::magnitudeType>::eps();
405 range_basis->getDofCoords(range_dofCoords_d);
408 domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
411 std::vector<std::string> elementBlockIds;
412 range_ugi->getElementBlockIds(elementBlockIds);
415 std::map<std::string, std::vector<Intrepid2::Orientation> > orientations;
419 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
421 auto rangeOffsets_d = range_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
422 auto domainOffsets_d = domain_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
423#ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
424 std::cout <<
"rangeOffsets_d" << std::endl;
425 for (
int i = 0; i < rangeCardinality; i++)
426 std::cout << rangeOffsets_d(i) <<
" ";
427 std::cout << std::endl;
428 std::cout <<
"domainOffsets_d" << std::endl;
429 for (
int i = 0; i < domainCardinality; i++)
430 std::cout << domainOffsets_d(i) <<
" ";
431 std::cout << std::endl;
435 Kokkos::View<int *, DeviceSpace> elementIds_d;
437 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
438 Kokkos::View<int *, HostSpace> elementIds_h(elementIds.data(), elementIds.size());
439 elementIds_d = Kokkos::View<int *, DeviceSpace>(
"elementIds_d", elementIds_h.extent(0));
440 Kokkos::deep_copy(elementIds_d, elementIds_h);
444 typename Kokkos::DynRankView<Intrepid2::Orientation,DeviceSpace> elemOrts_d (
"elemOrts_d", elementIds_d.extent(0));
447 auto blockOrientations = orientations[elementBlockIds[blockIter]];
448 Kokkos::View<Intrepid2::Orientation*, HostSpace> elemOrts_h(blockOrientations.data(), blockOrientations.size());
449 Kokkos::deep_copy(elemOrts_d, elemOrts_h);
453 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
456 std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
457 Teuchos::as<int>(elemIter));
460 auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
461 auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
463 auto worksetRange = Kokkos::make_pair(0, endCellRange);
464 auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
465 auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
469 ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
470 valuesAtDofCoordsNonOriented_d,
475 for(
size_t domainIter=0; domainIter<domainCardinality; domainIter++)
478 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), domainIter),
479 Kokkos::subview(valuesAtDofCoordsOrientedWorkset_d, Kokkos::ALL(), domainIter, Kokkos::ALL(), Kokkos::ALL()),
483#ifdef PANZER_HAVE_EPETRA_STACK
486 Kokkos::View<LocalOrdinal*,DeviceSpace> indices_d(
"indices", domainCardinality);
487 Kokkos::View<Scalar*, DeviceSpace> values_d (
"values", domainCardinality);
490 for (
int cellNo = 0; cellNo < endCellRange; cellNo++) {
491 auto elemId = elementIds_d(elemIter+cellNo);
494 auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
495 auto domainLIDs_d = Kokkos::subview(domainElementLIDs_d, elemId, Kokkos::ALL());
498 for(
size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
499 const LocalOrdinal range_row = rangeLIDs_d(rangeOffsets_d(rangeIter));
500 const bool isOwned = ep_rowmap->MyLID(range_row);
503 LocalOrdinal rowNNZ = 0;
504 for(
size_t domainIter = 0; domainIter < domainCardinality; domainIter++) {
505 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
506 if (KAT::magnitude(val) > entryFilterTol) {
507 indices_d(rowNNZ) = domainLIDs_d(domainOffsets_d(domainIter));
508 values_d(rowNNZ) = val;
513 int ret = ep_interp_matrix->ReplaceMyValues(range_row, rowNNZ, values_d.data(), indices_d.data());
515 ret = ep_interp_matrix->InsertMyValues(range_row, rowNNZ, values_d.data(), indices_d.data());
516 TEUCHOS_ASSERT(ret == 0);
525 Kokkos::parallel_for(
526 "MiniEM_Interpolation::worksetLoop",
527 Kokkos::RangePolicy<int, typename tp_matrix::node_type::execution_space>(0, endCellRange),
528 KOKKOS_LAMBDA(
const int cellNo) {
529 auto elemId = elementIds_d(elemIter+cellNo);
532 auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
533 auto domainLIDs_d = Kokkos::subview(domainElementLIDs_d, elemId, Kokkos::ALL());
535#ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
536 std::cout <<
"\n" << elemOrts_d(elemIter+cellNo).to_string() << std::endl;
537 std::cout <<
"rangeLIDs" << std::endl;
538 for (
int i = 0; i < rangeCardinality; i++)
539 std::cout << rangeLIDs_d(i) <<
" ";
540 std::cout << std::endl <<
"domainLIDs" << std::endl;
541 for (
int i = 0; i < domainCardinality; i++)
542 std::cout << domainLIDs_d(i) <<
" ";
543 std::cout << std::endl;
546 for(
size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
547 const LocalOrdinal range_row = rangeLIDs_d(rangeOffsets_d(rangeIter));
548 const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
551 for(
size_t domainIter = 0; domainIter < domainCardinality; domainIter++) {
552 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
553 if (KAT::magnitude(val) > entryFilterTol) {
555#if defined(PANZER_INTERPOLATION_DEBUG_OUTPUT) || defined(PANZER_DEBUG)
558 auto row = lcl_tp_interp_matrix.rowConst(range_row);
559 for(LocalOrdinal kk = 0; kk<row.length; ++kk)
560 if (row.colidx(kk) == domainLIDs_d(domainOffsets_d(domainIter)))
561 if (!(KAT::magnitude(row.value(kk)-val) < entryFilterTol || KAT::magnitude(row.value(kk)) < entryFilterTol)) {
562#ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
563 std::cout <<
"Replacing (" << range_row <<
"," << row.colidx(kk) <<
") = " << row.value(kk) <<
" with " << val << std::endl;
566 Kokkos::abort(
"MiniEM interpolation worksetLoop failed!");
571#ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
572 std::cout <<
"Setting (" << range_row <<
"," << domainLIDs_d(domainOffsets_d(domainIter)) <<
") = " << val << std::endl;
574 lcl_tp_interp_matrix.replaceValues(range_row, &(domainLIDs_d(domainOffsets_d(domainIter))), 1, &val,
false,
true);
585 tp_interp_matrix->fillComplete(tp_domainmap, tp_rangemap);
587 if (op != Intrepid2::OPERATOR_VALUE) {
592 thyra_interp = Thyra::tpetraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,typename tp_matrix::node_type>(Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(tp_rangemap),
593 Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(tp_domainmap),
596#ifdef PANZER_HAVE_EPETRA_STACK
598 ep_interp_matrix->FillComplete(*ep_domainmap, *ep_rangemap);
750 applyNonTransposed (
const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
751 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
757 using Teuchos::rcp_dynamic_cast;
758 using range_type = Kokkos::RangePolicy<LocalOrdinal, DeviceSpace>;
760 using ots = Intrepid2::OrientationTools<DeviceSpace>;
761 using li = Intrepid2::LagrangianInterpolation<DeviceSpace>;
762 using DynRankDeviceView = Kokkos::DynRankView<Scalar,DeviceSpace>;
763 using view_t =
typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev;
764 using const_view_t =
typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev::const_type;
766 Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer(std::string(
"Mini-EM: matrix-free apply no_trans ") + name));
768 using TST = Teuchos::ScalarTraits<Scalar>;
769 const Scalar ZERO = TST::zero();
772 colmapMV_->doImport(X, *import_,Tpetra::INSERT);
774 const_view_t lclX = colmapMV_->getLocalViewDevice(Tpetra::Access::ReadOnly);
775 view_t lclY = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
776 size_t numVectors = lclY.extent(1);
777 LocalOrdinal lclTargetSize = getRangeMap()->getLocalNumElements();
780 auto domain_fieldPattern = domain_ugi->getFieldPattern(domain_basis_name);
781 auto domain_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(domain_fieldPattern,
true)->getIntrepidBasis();
782 auto range_fieldPattern = range_ugi->getFieldPattern(range_basis_name);
783 auto range_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(range_fieldPattern,
true)->getIntrepidBasis();
786 const size_t domainCardinality = domain_basis->getCardinality();
787 const size_t rangeCardinality = range_basis->getCardinality();
789 size_t maxNumElementsPerBlock = 0;
791 const int dim = range_basis->getBaseCellTopology().getDimension();
793 if (op == Intrepid2::OPERATOR_VALUE) {
794 TEUCHOS_ASSERT(rangeCardinality >= domainCardinality);
795 TEUCHOS_ASSERT_EQUALITY(domain_basis->getFunctionSpace(), range_basis->getFunctionSpace());
800 if (maxNumElementsPerBlock > 0)
801 numCells = std::min(maxNumElementsPerBlock, worksetSize);
803 numCells = worksetSize;
804 DynRankDeviceView range_dofCoords_d(
"range_dofCoords_d", rangeCardinality, dim);
805 DynRankDeviceView basisCoeffsLIOriented_d(
"basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
808 DynRankDeviceView valuesAtDofCoordsNonOriented_d;
809 DynRankDeviceView valuesAtDofCoordsOriented_d;
810 DynRankDeviceView reducedValuesAtDofCoordsOriented_d;
814 auto temp = domain_basis->allocateOutputView(rangeCardinality, op);
819 if (temp.rank() == 3) {
820 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
821 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
822 reducedValuesAtDofCoordsOriented_d = DynRankDeviceView(
"reducedValuesAtDofCoordsOriented_d", numCells, temp.extent(1), temp.extent(2), numVectors);
824 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
825 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
826 reducedValuesAtDofCoordsOriented_d = DynRankDeviceView(
"reducedValuesAtDofCoordsOriented_d", numCells, temp.extent(1), numVectors);
830 int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
831 TEUCHOS_ASSERT((fieldRank == 0) || (fieldRank == 1));
833 auto rangeLIDs_d = range_ugi->getLIDs();
834 auto domainLIDs_d = domain_ugi->getLIDs();
837 range_basis->getDofCoords(range_dofCoords_d);
840 domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
843 std::vector<std::string> elementBlockIds;
844 range_ugi->getElementBlockIds(elementBlockIds);
845 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
847 auto rangeOffsets_d = range_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
848 auto domainOffsets_d = domain_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
851 Kokkos::View<int *, DeviceSpace> elementIds_d;
853 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
854 Kokkos::View<int *, Kokkos::HostSpace> elementIds_h(elementIds.data(), elementIds.size());
855 elementIds_d = Kokkos::View<int *, DeviceSpace>(
"elementIds_d", elementIds_h.extent(0));
856 Kokkos::deep_copy(elementIds_d, elementIds_h);
860 auto elemOrts_d = orientations_.at(elementBlockIds[blockIter]);
862 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
865 std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
866 Teuchos::as<int>(elemIter));
869 auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
870 auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
872 auto worksetRange = Kokkos::make_pair(0, endCellRange);
873 auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
874 auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
878 ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
879 valuesAtDofCoordsNonOriented_d,
883 Kokkos::deep_copy(reducedValuesAtDofCoordsOriented_d, 0.0);
885 if (reducedValuesAtDofCoordsOriented_d.rank() == 4) {
886 Kokkos::parallel_for(
"miniEM:MatrixFreeInterpolationOp:cellLoop1",
887 range_type(0, std::min(numCells, elementIds_d.extent_int(0)-Teuchos::as<int>(elemIter))),
888 KOKKOS_LAMBDA(
const LocalOrdinal cellNo) {
889 LocalOrdinal cellNo2 = elemIter+cellNo;
890 LocalOrdinal elemId = elementIds_d(cellNo2);
891 for(
size_t domainIter=0; domainIter<domainCardinality; domainIter++) {
892 LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
893 for(
size_t rangeIter=0; rangeIter<rangeCardinality; rangeIter++) {
894 for(
size_t d=0; d<valuesAtDofCoordsOriented_d.extent(3); d++) {
895 Scalar val = valuesAtDofCoordsOriented_d(cellNo, domainIter, rangeIter, d);
896 for (
size_t j = 0; j<numVectors; ++j)
897 reducedValuesAtDofCoordsOriented_d(cellNo, rangeIter, d, j) += val * lclX(J, j);
903 for (
size_t j = 0; j<numVectors; ++j)
904 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), j),
905 Kokkos::subview(reducedValuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), j),
910 Kokkos::parallel_for(
"miniEM:MatrixFreeInterpolationOp:cellLoop1",
911 range_type(0, std::min(numCells, elementIds_d.extent_int(0)-Teuchos::as<int>(elemIter))),
912 KOKKOS_LAMBDA(
const LocalOrdinal cellNo) {
913 LocalOrdinal cellNo2 = elemIter+cellNo;
914 LocalOrdinal elemId = elementIds_d(cellNo2);
915 for(
size_t domainIter=0; domainIter<domainCardinality; domainIter++) {
916 LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
917 for(
size_t rangeIter=0; rangeIter<rangeCardinality; rangeIter++) {
918 Scalar val = valuesAtDofCoordsOriented_d(cellNo, domainIter, rangeIter);
919 for (
size_t j = 0; j<numVectors; ++j)
920 reducedValuesAtDofCoordsOriented_d(cellNo, rangeIter, j) += val * lclX(J, j);
925 for (
size_t j = 0; j<numVectors; ++j)
926 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), j),
927 Kokkos::subview(reducedValuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), j),
933 auto owner_d = owner_d_;
935 Kokkos::parallel_for(
"miniEM::MatrixFreeInterpolationOp::cellLoop2",
936 range_type(elemIter, std::min(elemIter+numCells,
937 elementIds_d.extent(0))),
938 KOKKOS_LAMBDA(
const LocalOrdinal cellNo2) {
939 LocalOrdinal cellNo = cellNo2-elemIter;
940 LocalOrdinal elemId = elementIds_d(cellNo2);
943 for(
size_t rangeIter = 0; rangeIter < rangeLIDs_d.extent(1); ++rangeIter) {
944 LocalOrdinal range_row = rangeLIDs_d(elemId, rangeOffsets_d(rangeIter));
947 if ((range_row < lclTargetSize) && (owner_d(range_row) == elemId)) {
949 for (
size_t j = 0; j<numVectors; ++j) {
950 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, j);
951 lclY(range_row,j) = beta*lclY(range_row,j) + alpha*val;
964 applyTransposed (
const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
965 Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
971 using Teuchos::rcp_dynamic_cast;
972 using range_type = Kokkos::RangePolicy<LocalOrdinal, DeviceSpace>;
974 typedef Intrepid2::OrientationTools<DeviceSpace> ots;
975 typedef Intrepid2::LagrangianInterpolation<DeviceSpace> li;
976 typedef Kokkos::DynRankView<Scalar,DeviceSpace> DynRankDeviceView;
977 using TST = Teuchos::ScalarTraits<Scalar>;
978 const Scalar ZERO = TST::zero();
979 using view_t =
typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev;
980 using const_view_t =
typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev::const_type;
982 Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer(std::string(
"Mini-EM: matrix-free apply trans ") + name));
984 const_view_t lclX = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
985 colmapMV_->putScalar(ZERO);
986 view_t lclYtemp = colmapMV_->getLocalViewDevice(Tpetra::Access::ReadWrite);
989 auto domain_fieldPattern = domain_ugi->getFieldPattern(domain_basis_name);
990 auto domain_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(domain_fieldPattern,
true)->getIntrepidBasis();
991 auto range_fieldPattern = range_ugi->getFieldPattern(range_basis_name);
992 auto range_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(range_fieldPattern,
true)->getIntrepidBasis();
995 const size_t domainCardinality = domain_basis->getCardinality();
996 const size_t rangeCardinality = range_basis->getCardinality();
998 size_t maxNumElementsPerBlock = 0;
1000 const int dim = range_basis->getBaseCellTopology().getDimension();
1002 if (op == Intrepid2::OPERATOR_VALUE) {
1003 TEUCHOS_ASSERT(rangeCardinality >= domainCardinality);
1004 TEUCHOS_ASSERT_EQUALITY(domain_basis->getFunctionSpace(), range_basis->getFunctionSpace());
1009 if (maxNumElementsPerBlock > 0)
1010 numCells = std::min(maxNumElementsPerBlock, worksetSize);
1012 numCells = worksetSize;
1013 DynRankDeviceView range_dofCoords_d(
"range_dofCoords_d", rangeCardinality, dim);
1014 DynRankDeviceView basisCoeffsLIOriented_d(
"basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
1017 DynRankDeviceView valuesAtDofCoordsNonOriented_d;
1018 DynRankDeviceView valuesAtDofCoordsOriented_d;
1022 auto temp = domain_basis->allocateOutputView(rangeCardinality, op);
1027 if (temp.rank() == 3) {
1028 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
1029 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
1031 valuesAtDofCoordsNonOriented_d = DynRankDeviceView(
"valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
1032 valuesAtDofCoordsOriented_d = DynRankDeviceView(
"valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
1036 int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
1037 TEUCHOS_ASSERT((fieldRank == 0) || (fieldRank == 1));
1039 auto rangeLIDs_d = range_ugi->getLIDs();
1040 auto domainLIDs_d = domain_ugi->getLIDs();
1044 range_basis->getDofCoords(range_dofCoords_d);
1047 domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
1050 std::vector<std::string> elementBlockIds;
1051 range_ugi->getElementBlockIds(elementBlockIds);
1052 for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
1054 auto rangeOffsets_d = range_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
1055 auto domainOffsets_d = domain_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
1058 Kokkos::View<int *, DeviceSpace> elementIds_d;
1060 std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
1061 Kokkos::View<int *, Kokkos::HostSpace> elementIds_h(elementIds.data(), elementIds.size());
1062 elementIds_d = Kokkos::View<int *, DeviceSpace>(
"elementIds_d", elementIds_h.extent(0));
1063 Kokkos::deep_copy(elementIds_d, elementIds_h);
1067 auto elemOrts_d = orientations_.at(elementBlockIds[blockIter]);
1069 for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
1072 std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
1073 Teuchos::as<int>(elemIter));
1076 auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
1077 auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
1079 auto worksetRange = Kokkos::make_pair(0, endCellRange);
1080 auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
1081 auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
1085 ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
1086 valuesAtDofCoordsNonOriented_d,
1088 domain_basis.get());
1092 for(
size_t domainIter=0; domainIter<domainCardinality; domainIter++)
1095 li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), domainIter),
1096 Kokkos::subview(valuesAtDofCoordsOrientedWorkset_d, Kokkos::ALL(), domainIter, Kokkos::ALL(), Kokkos::ALL()),
1097 range_basis.get(), elemOrtsWorkset_d);
1100 auto owner_d = owner_d_;
1103 Kokkos::parallel_for(
"miniEM::MatrixFreeInterpolationOp::cellLoop",
1104 range_type(elemIter, std::min(elemIter+numCells,
1105 elementIds_d.extent(0))),
1106 KOKKOS_LAMBDA(
const LocalOrdinal cellNo2) {
1107 LocalOrdinal cellNo = cellNo2-elemIter;
1108 LocalOrdinal elemId = elementIds_d(cellNo2);
1111 for(
size_t rangeIter = 0; rangeIter < rangeLIDs_d.extent(1); ++rangeIter) {
1112 LocalOrdinal range_row = rangeLIDs_d(elemId, rangeOffsets_d(rangeIter));
1115 if ((range_row < (LocalOrdinal) lclX.extent(0)) && (owner_d(range_row) == elemId)) {
1117 for(
size_t domainIter = 0; domainIter < domainLIDs_d.extent(1); domainIter++) {
1118 LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
1119 Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
1120 for (
size_t j = 0; j<lclYtemp.extent(1); ++j)
1121 Kokkos::atomic_add(&lclYtemp(J,j), alpha*val*lclX(range_row,j));
1134 Y.doExport(*colmapMV_, *import_, Tpetra::ADD_ASSIGN);