76 const Teuchos::ParameterList& p)
77 : globalIndexer_(indexer)
78 , globalDataKey_(
"Residual Scatter Container")
80 std::string scatterName = p.get<std::string>(
"Scatter Name");
82 Teuchos::rcp(
new PHX::Tag<ScalarT>(scatterName,Teuchos::rcp(
new PHX::MDALayout<Dummy>(0))));
85 const std::vector<std::string>& names =
86 *(p.get< Teuchos::RCP< std::vector<std::string> > >(
"Dependent Names"));
89 fieldMap_ = p.get< Teuchos::RCP< std::map<std::string,std::string> > >(
"Dependent Map");
91 Teuchos::RCP<PHX::DataLayout> dl =
92 p.get< Teuchos::RCP<const panzer::PureBasis> >(
"Basis")->functional;
95 scatterFields_.resize(names.size());
96 for (std::size_t eq = 0; eq < names.size(); ++eq) {
97 scatterFields_[eq] = PHX::MDField<const ScalarT,Cell,NODE>(names[eq],dl);
100 this->addDependentField(scatterFields_[eq]);
104 this->addEvaluatedField(*scatterHolder_);
106 if (p.isType<std::string>(
"Global Data Key"))
107 globalDataKey_ = p.get<std::string>(
"Global Data Key");
109 this->setName(scatterName+
" Scatter Residual");
118 const Workset & workset_0 = (*d.worksets_)[0];
119 const std::string blockId = this->wda(workset_0).
block_id;
121 fieldIds_.resize(scatterFields_.size());
122 fieldOffsets_.resize(scatterFields_.size());
123 fieldGlobalIndexers_.resize(scatterFields_.size());
124 productVectorBlockIndex_.resize(scatterFields_.size());
125 int maxElementBlockGIDCount = -1;
126 for(std::size_t fd=0; fd < scatterFields_.size(); ++fd) {
127 const std::string fieldName = fieldMap_->find(scatterFields_[fd].fieldTag().name())->second;
128 const int globalFieldNum = globalIndexer_->getFieldNum(fieldName);
129 productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum);
130 fieldGlobalIndexers_[fd] = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]];
131 fieldIds_[fd] = fieldGlobalIndexers_[fd]->getFieldNum(fieldName);
133 const std::vector<int>&
offsets = fieldGlobalIndexers_[fd]->getGIDFieldOffsets(blockId,fieldIds_[fd]);
134 fieldOffsets_[fd] = PHX::View<int*>(
"ScatterResidual_BlockedTpetra(Residual):fieldOffsets",
offsets.size());
135 auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
136 for (std::size_t i=0; i <
offsets.size(); ++i)
138 Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
140 maxElementBlockGIDCount = std::max(fieldGlobalIndexers_[fd]->getElementBlockGIDCount(blockId),maxElementBlockGIDCount);
146 worksetLIDs_ = PHX::View<LO**>(
"ScatterResidual_BlockedTpetra(Residual):worksetLIDs",
147 scatterFields_[0].extent(0),
148 maxElementBlockGIDCount);
174 using Teuchos::rcp_dynamic_cast;
178 const auto& localCellIds = this->wda(workset).cell_local_ids_k;
179 const RCP<ProductVectorBase<double>> thyraBlockResidual = rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer_->get_f(),
true);
182 int currentWorksetLIDSubBlock = -1;
183 for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
185 if (productVectorBlockIndex_[fieldIndex] != currentWorksetLIDSubBlock) {
186 fieldGlobalIndexers_[fieldIndex]->getElementLIDs(localCellIds,worksetLIDs_);
187 currentWorksetLIDSubBlock = productVectorBlockIndex_[fieldIndex];
190 auto& tpetraResidual = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraBlockResidual->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),
true))->getTpetraVector());
191 const auto& kokkosResidual = tpetraResidual.getLocalViewDevice(Tpetra::Access::ReadWrite);
194 const auto& fieldOffsets = fieldOffsets_[fieldIndex];
195 const auto& worksetLIDs = worksetLIDs_;
196 const auto& fieldValues = scatterFields_[fieldIndex].get_static_view();
198 Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (
const int& cell) {
199 for(
int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
200 const int lid = worksetLIDs(cell,fieldOffsets(basis));
201 Kokkos::atomic_add(&kokkosResidual(lid,0), fieldValues(cell,basis));
214 const Teuchos::ParameterList& p)
215 : globalIndexer_(indexer)
216 , globalDataKey_(
"Residual Scatter Container")
218 std::string scatterName = p.get<std::string>(
"Scatter Name");
220 Teuchos::rcp(
new PHX::Tag<ScalarT>(scatterName,Teuchos::rcp(
new PHX::MDALayout<Dummy>(0))));
223 const std::vector<std::string>& names =
224 *(p.get< Teuchos::RCP< std::vector<std::string> > >(
"Dependent Names"));
227 fieldMap_ = p.get< Teuchos::RCP< std::map<std::string,std::string> > >(
"Dependent Map");
229 Teuchos::RCP<PHX::DataLayout> dl =
230 p.get< Teuchos::RCP<const panzer::PureBasis> >(
"Basis")->functional;
233 scatterFields_.resize(names.size());
234 for (std::size_t eq = 0; eq < names.size(); ++eq) {
235 scatterFields_[eq] = PHX::MDField<const ScalarT,Cell,NODE>(names[eq],dl);
238 this->addDependentField(scatterFields_[eq]);
242 this->addEvaluatedField(*scatterHolder_);
244 if (p.isType<std::string>(
"Global Data Key"))
245 globalDataKey_ = p.get<std::string>(
"Global Data Key");
247 this->setName(scatterName+
" Scatter Residual (Jacobian)");
256 const Workset & workset_0 = (*d.worksets_)[0];
257 const std::string blockId = this->wda(workset_0).
block_id;
259 fieldIds_.resize(scatterFields_.size());
260 fieldOffsets_.resize(scatterFields_.size());
261 productVectorBlockIndex_.resize(scatterFields_.size());
262 for (std::size_t fd=0; fd < scatterFields_.size(); ++fd) {
263 const std::string fieldName = fieldMap_->find(scatterFields_[fd].fieldTag().name())->second;
264 const int globalFieldNum = globalIndexer_->getFieldNum(fieldName);
265 productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum);
266 const auto& fieldGlobalIndexer = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]];
267 fieldIds_[fd] = fieldGlobalIndexer->getFieldNum(fieldName);
269 const std::vector<int>&
offsets = globalIndexer_->getGIDFieldOffsets(blockId,globalFieldNum);
270 fieldOffsets_[fd] = PHX::View<int*>(
"ScatterResidual_BlockedTpetra(Jacobian):fieldOffsets",
offsets.size());
271 auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
272 for (std::size_t i=0; i <
offsets.size(); ++i)
274 Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
280 int elementBlockGIDCount = 0;
281 for (
const auto& blockDOFMgr : globalIndexer_->getFieldDOFManagers())
282 elementBlockGIDCount += blockDOFMgr->getElementBlockGIDCount(blockId);
284 worksetLIDs_ = Kokkos::View<LO**, Kokkos::LayoutRight, PHX::Device>(
285 "ScatterResidual_BlockedTpetra(Jacobian):worksetLIDs_",
286 scatterFields_[0].extent(0), elementBlockGIDCount );
289 const auto& blockGlobalIndexers = globalIndexer_->getFieldDOFManagers();
290 const int numBlocks =
static_cast<int>(globalIndexer_->getFieldDOFManagers().size());
291 blockOffsets_ = PHX::View<LO*>(
"ScatterResidual_BlockedTpetra(Jacobian):blockOffsets_",
293 const auto hostBlockOffsets = Kokkos::create_mirror_view(blockOffsets_);
294 for (
int blk=0;blk<numBlocks;blk++) {
295 int blockOffset = globalIndexer_->getBlockGIDOffset(blockId,blk);
296 hostBlockOffsets(blk) = blockOffset;
298 hostBlockOffsets(numBlocks) = hostBlockOffsets(numBlocks-1) + blockGlobalIndexers[blockGlobalIndexers.size()-1]->getElementBlockGIDCount(blockId);
299 Kokkos::deep_copy(blockOffsets_,hostBlockOffsets);
303 int max_blockDerivativeSize = 0;
304 for (
int blk=0;blk<numBlocks;blk++) {
305 const int blockDerivativeSize = hostBlockOffsets(blk+1) - hostBlockOffsets(blk);
306 if ( blockDerivativeSize > max_blockDerivativeSize )
307 max_blockDerivativeSize = blockDerivativeSize;
309 workset_vals_ = Kokkos::View<typename Sacado::ScalarType<ScalarT>::type**, Kokkos::LayoutRight, PHX::Device>(
310 "ScatterResidual_BlockedTpetra(Jacobian):workset_vals_",
311 scatterFields_[0].extent(0), max_blockDerivativeSize );
337 using Teuchos::rcp_dynamic_cast;
342 const auto& localCellIds = this->wda(workset).cell_local_ids_k;
344 const int numFieldBlocks = globalIndexer_->getNumFieldBlocks();
345 const RCP<const ContainerType> blockedContainer = blockedContainer_;
346 const RCP<ProductVectorBase<double>> thyraBlockResidual = rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer_->get_f());
347 const bool haveResidual = Teuchos::nonnull(thyraBlockResidual);
348 const RCP<BlockedLinearOpBase<double>> Jac = rcp_dynamic_cast<BlockedLinearOpBase<double> >(blockedContainer_->get_A(),
true);
354 using LocalMatrixType = KokkosSparse::CrsMatrix<double,LO,PHX::Device,Kokkos::MemoryTraits<Kokkos::Unmanaged>,
size_t>;
355 typename PHX::View<LocalMatrixType**>::host_mirror_type
356 hostJacTpetraBlocks(
"panzer::ScatterResidual_BlockTpetra<Jacobian>::hostJacTpetraBlocks", numFieldBlocks,numFieldBlocks);
358 PHX::View<int**> blockExistsInJac = PHX::View<int**>(
"blockExistsInJac_",numFieldBlocks,numFieldBlocks);
359 auto hostBlockExistsInJac = Kokkos::create_mirror_view(blockExistsInJac);
361 for (
int row=0; row < numFieldBlocks; ++row) {
362 for (
int col=0; col < numFieldBlocks; ++col) {
363 const auto thyraTpetraOperator = rcp_dynamic_cast<Thyra::TpetraLinearOp<double,LO,GO,NodeT>>(Jac->getNonconstBlock(row,col),
false);
364 if (nonnull(thyraTpetraOperator)) {
380 const auto tpetraCrsMatrix = rcp_dynamic_cast<Tpetra::CrsMatrix<double,LO,GO,NodeT>>(thyraTpetraOperator->getTpetraOperator(),
true);
381 const auto managedMatrix = tpetraCrsMatrix->getLocalMatrixDevice();
382 const auto managedGraph = managedMatrix.graph;
385 using StaticCrsGraphType =
typename LocalMatrixType::StaticCrsGraphType;
386 StaticCrsGraphType unmanagedGraph;
387 unmanagedGraph.entries =
typename StaticCrsGraphType::entries_type(managedGraph.entries.data(),managedGraph.entries.extent(0));
388 unmanagedGraph.row_map =
typename StaticCrsGraphType::row_map_type(managedGraph.row_map.data(),managedGraph.row_map.extent(0));
389 unmanagedGraph.row_block_offsets =
typename StaticCrsGraphType::row_block_type(managedGraph.row_block_offsets.data(),managedGraph.row_block_offsets.extent(0));
391 typename LocalMatrixType::values_type unmanagedValues(managedMatrix.values.data(),managedMatrix.values.extent(0));
392 LocalMatrixType unmanagedMatrix(managedMatrix.values.label(), managedMatrix.numCols(), unmanagedValues, unmanagedGraph);
393 new (&hostJacTpetraBlocks(row,col)) LocalMatrixType(unmanagedMatrix);
396 hostBlockExistsInJac(row,col) = 1;
399 hostBlockExistsInJac(row,col) = 0;
403 typename PHX::View<LocalMatrixType**>
404 jacTpetraBlocks(
"panzer::ScatterResidual_BlockedTpetra<Jacobian>::jacTpetraBlocks",numFieldBlocks,numFieldBlocks);
405 Kokkos::deep_copy(jacTpetraBlocks,hostJacTpetraBlocks);
406 Kokkos::deep_copy(blockExistsInJac,hostBlockExistsInJac);
413 const auto& globalIndexers = globalIndexer_->getFieldDOFManagers();
414 auto blockOffsets_h = Kokkos::create_mirror_view(blockOffsets_);
415 Kokkos::deep_copy(blockOffsets_h, blockOffsets_);
416 for (
size_t block=0; block < globalIndexers.size(); ++block) {
417 const auto subviewOfBlockLIDs = Kokkos::subview(worksetLIDs_,Kokkos::ALL(), std::make_pair(blockOffsets_h(block),blockOffsets_h(block+1)));
418 globalIndexers[block]->getElementLIDs(localCellIds,subviewOfBlockLIDs);
422 for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
424 const int blockRowIndex = productVectorBlockIndex_[fieldIndex];
425 typename Tpetra::Vector<double,LO,GO,PHX::Device>::dual_view_type::t_dev kokkosResidual;
427 auto& tpetraResidual = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraBlockResidual->getNonconstVectorBlock(blockRowIndex),
true))->getTpetraVector());
428 kokkosResidual = tpetraResidual.getLocalViewDevice(Tpetra::Access::ReadWrite);
432 const PHX::View<const int*> fieldOffsets = fieldOffsets_[fieldIndex];
433 const PHX::View<const ScalarT**> fieldValues = scatterFields_[fieldIndex].get_static_view();
434 const PHX::View<const LO*> blockOffsets = blockOffsets_;
436 const Kokkos::View<const LO**, Kokkos::LayoutRight, PHX::Device> worksetLIDs = worksetLIDs_;
437 Kokkos::View<typename Sacado::ScalarType<ScalarT>::type**, Kokkos::LayoutRight, PHX::Device> workset_vals = workset_vals_;
439 Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (
const int& cell) {
440 for(
int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
441 typedef PHX::MDField<const ScalarT,Cell,NODE>
FieldType;
442 typename FieldType::array_type::reference_type tmpFieldVal = fieldValues(cell,basis);
443 const int rowLID = worksetLIDs(cell,fieldOffsets(basis));
446 Kokkos::atomic_add(&kokkosResidual(rowLID,0), tmpFieldVal.val());
448 for (
int blockColIndex=0; blockColIndex < numFieldBlocks; ++blockColIndex) {
449 if (blockExistsInJac(blockRowIndex,blockColIndex)) {
450 const int start = blockOffsets(blockColIndex);
451 const int stop = blockOffsets(blockColIndex+1);
452 const int sensSize = stop-start;
454 for (
int i=0; i < sensSize; ++i)
455 workset_vals(cell,i) = tmpFieldVal.fastAccessDx(start+i);
457 jacTpetraBlocks(blockRowIndex,blockColIndex).sumIntoValues(rowLID, &worksetLIDs(cell,start), sensSize, &workset_vals(cell,0),
true,
true);
466 for (
int row=0; row < numFieldBlocks; ++row) {
467 for (
int col=0; col < numFieldBlocks; ++col) {
468 if (hostBlockExistsInJac(row,col)) {
469 hostJacTpetraBlocks(row,col).~CrsMatrix();
483 const Teuchos::ParameterList& p)
484 : globalIndexer_(indexer)
485 , globalDataKey_(
"Residual Scatter Container")
487 std::string scatterName = p.get<std::string>(
"Scatter Name");
489 Teuchos::rcp(
new PHX::Tag<ScalarT>(scatterName,Teuchos::rcp(
new PHX::MDALayout<Dummy>(0))));
492 const std::vector<std::string>& names =
493 *(p.get< Teuchos::RCP< std::vector<std::string> > >(
"Dependent Names"));
496 fieldMap_ = p.get< Teuchos::RCP< std::map<std::string,std::string> > >(
"Dependent Map");
498 Teuchos::RCP<PHX::DataLayout> dl =
499 p.get< Teuchos::RCP<const panzer::PureBasis> >(
"Basis")->functional;
502 scatterFields_.resize(names.size());
503 for (std::size_t eq = 0; eq < names.size(); ++eq) {
504 scatterFields_[eq] = PHX::MDField<const ScalarT,Cell,NODE>(names[eq],dl);
507 this->addDependentField(scatterFields_[eq]);
511 this->addEvaluatedField(*scatterHolder_);
513 if (p.isType<std::string>(
"Global Data Key"))
514 globalDataKey_ = p.get<std::string>(
"Global Data Key");
516 this->setName(scatterName+
" Scatter Residual (Tangent)");
525 const Workset & workset_0 = (*d.worksets_)[0];
526 const std::string blockId = this->wda(workset_0).
block_id;
528 fieldIds_.resize(scatterFields_.size());
529 fieldOffsets_.resize(scatterFields_.size());
530 fieldGlobalIndexers_.resize(scatterFields_.size());
531 productVectorBlockIndex_.resize(scatterFields_.size());
532 int maxElementBlockGIDCount = -1;
533 for(std::size_t fd=0; fd < scatterFields_.size(); ++fd) {
534 const std::string fieldName = fieldMap_->find(scatterFields_[fd].fieldTag().name())->second;
535 const int globalFieldNum = globalIndexer_->getFieldNum(fieldName);
536 productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum);
537 fieldGlobalIndexers_[fd] = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]];
538 fieldIds_[fd] = fieldGlobalIndexers_[fd]->getFieldNum(fieldName);
540 const std::vector<int>&
offsets = fieldGlobalIndexers_[fd]->getGIDFieldOffsets(blockId,fieldIds_[fd]);
541 fieldOffsets_[fd] = PHX::View<int*>(
"ScatterResidual_BlockedTpetra(Tangent):fieldOffsets",
offsets.size());
542 auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]);
543 for (std::size_t i=0; i <
offsets.size(); ++i)
545 Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets);
547 maxElementBlockGIDCount = std::max(fieldGlobalIndexers_[fd]->getElementBlockGIDCount(blockId),maxElementBlockGIDCount);
553 worksetLIDs_ = PHX::View<LO**>(
"ScatterResidual_BlockedTpetra(Tangent):worksetLIDs",
554 scatterFields_[0].extent(0),
555 maxElementBlockGIDCount);
564 using Teuchos::rcp_dynamic_cast;
568 std::vector<std::string> activeParameters =
569 rcp_dynamic_cast<ParameterList_GlobalEvaluationData>(d.gedc->getDataObject(
"PARAMETER_NAMES"))->getActiveParameters();
571 const int numBlocks =
static_cast<int>(globalIndexer_->getFieldDOFManagers().size());
573 dfdpFieldsVoV_.initialize(
"ScatterResidual_Tpetra<Tangent>::dfdpFieldsVoV_",activeParameters.size(),numBlocks);
575 for(std::size_t i=0;i<activeParameters.size();i++) {
576 RCP<ContainerType> paramBlockedContainer = rcp_dynamic_cast<ContainerType>(d.gedc->getDataObject(activeParameters[i]),
true);
577 RCP<ProductVectorBase<double>> productVector =
578 rcp_dynamic_cast<ProductVectorBase<double>>(paramBlockedContainer->get_f(),
true);
579 for(
int j=0;j<numBlocks;j++) {
580 auto& tpetraBlock = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(productVector->getNonconstVectorBlock(j),
true))->getTpetraVector());
581 const auto& dfdp_view = tpetraBlock.getLocalViewDevice(Tpetra::Access::ReadWrite);
582 dfdpFieldsVoV_.addView(dfdp_view,i,j);
586 dfdpFieldsVoV_.syncHostToDevice();
589 blockedContainer_ = rcp_dynamic_cast<const ContainerType>(d.gedc->getDataObject(globalDataKey_));
591 if(blockedContainer_==Teuchos::null) {
592 RCP<const LOCPair_GlobalEvaluationData> gdata = rcp_dynamic_cast<const LOCPair_GlobalEvaluationData>(d.gedc->getDataObject(globalDataKey_),
true);
593 blockedContainer_ = rcp_dynamic_cast<const ContainerType>(gdata->getGhostedLOC());
603 using Teuchos::rcp_dynamic_cast;
607 const auto& localCellIds = this->wda(workset).cell_local_ids_k;
608 const RCP<ProductVectorBase<double>> thyraBlockResidual = rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer_->get_f(),
true);
611 int currentWorksetLIDSubBlock = -1;
612 for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
614 if (productVectorBlockIndex_[fieldIndex] != currentWorksetLIDSubBlock) {
615 fieldGlobalIndexers_[fieldIndex]->getElementLIDs(localCellIds,worksetLIDs_);
616 currentWorksetLIDSubBlock = productVectorBlockIndex_[fieldIndex];
619 auto& tpetraResidual = *((rcp_dynamic_cast<Thyra::TpetraVector<RealType,LO,GO,NodeT>>(thyraBlockResidual->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),
true))->getTpetraVector());
620 const auto& kokkosResidual = tpetraResidual.getLocalViewDevice(Tpetra::Access::ReadWrite);
623 const auto& fieldOffsets = fieldOffsets_[fieldIndex];
624 const auto& worksetLIDs = worksetLIDs_;
625 const auto& fieldValues = scatterFields_[fieldIndex].get_static_view();
626 const auto& tangentFieldsDevice = dfdpFieldsVoV_.getViewDevice();
627 const auto& kokkosTangents = Kokkos::subview(tangentFieldsDevice,Kokkos::ALL(),productVectorBlockIndex_[fieldIndex]);
628 const double num_params = Kokkos::dimension_scalar(fieldValues)-1;
630 Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,workset.num_cells), KOKKOS_LAMBDA (
const int& cell) {
631 for(
int basis=0; basis < static_cast<int>(fieldOffsets.size()); ++basis) {
632 const int lid = worksetLIDs(cell,fieldOffsets(basis));
633 Kokkos::atomic_add(&kokkosResidual(lid,0), fieldValues(cell,basis).val());
634 for(
int i_param=0; i_param<
num_params; i_param++)
635 kokkosTangents(i_param)(lid,0) += fieldValues(cell,basis).fastAccessDx(i_param);