390 typedef typename Teuchos::ScalarTraits<Scalar>::coordinateType coordinate_type;
391 typedef Xpetra::MultiVectorFactory<coordinate_type, LO, GO, NO> RealValuedMultiVectorFactory;
392 const ParameterList& pL = GetParameterList();
393 std::string nspName =
"Nullspace";
394 if (pL.isParameter(
"Nullspace name")) nspName = pL.get<std::string>(
"Nullspace name");
396 auto A = Get<RCP<Matrix>>(fineLevel,
"A");
397 auto aggregates = Get<RCP<Aggregates>>(fineLevel,
"Aggregates");
398 auto amalgInfo = Get<RCP<AmalgamationInfo>>(fineLevel,
"UnAmalgamationInfo");
399 auto fineNullspace = Get<RCP<MultiVector>>(fineLevel, nspName);
400 auto coarseMap = Get<RCP<const Map>>(fineLevel,
"CoarseMap");
401 RCP<RealValuedMultiVector> fineCoords;
402 if (bTransferCoordinates_) {
403 fineCoords = Get<RCP<RealValuedMultiVector>>(fineLevel,
"Coordinates");
406 RCP<Matrix> Ptentative;
409 if (aggregates->GetNumGlobalAggregatesComputeIfNeeded() == 0) {
410 Ptentative = Teuchos::null;
411 Set(coarseLevel,
"P", Ptentative);
414 RCP<MultiVector> coarseNullspace;
415 RCP<RealValuedMultiVector> coarseCoords;
417 if (bTransferCoordinates_) {
418 RCP<const Map> coarseCoordMap;
421 if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null)
422 blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize();
427 coarseCoordMap = coarseMap;
433 coarseCoords = RealValuedMultiVectorFactory::Build(coarseCoordMap, fineCoords->getNumVectors(),
false);
436 auto uniqueMap = fineCoords->getMap();
437 RCP<RealValuedMultiVector> ghostedCoords = fineCoords;
438 if (aggregates->AggregatesCrossProcessors()) {
439 auto nonUniqueMap = aggregates->GetMap();
440 auto importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
442 ghostedCoords = RealValuedMultiVectorFactory::Build(nonUniqueMap, fineCoords->getNumVectors(),
false);
443 ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);
448 auto aggGraph = aggregates->GetGraph();
449 auto numAggs = aggGraph.numRows();
451 auto fineCoordsView = fineCoords->getLocalViewDevice(Tpetra::Access::ReadOnly);
452 auto coarseCoordsView = coarseCoords->getLocalViewDevice(Tpetra::Access::OverwriteAll);
458 const auto dim = fineCoords->getNumVectors();
460 typename AppendTrait<
decltype(fineCoordsView), Kokkos::RandomAccess>::type fineCoordsRandomView = fineCoordsView;
461 for (
size_t j = 0; j < dim; j++) {
462 Kokkos::parallel_for(
463 "MueLu::TentativeP::BuildCoords", Kokkos::RangePolicy<local_ordinal_type, execution_space>(0, numAggs),
464 KOKKOS_LAMBDA(
const LO i) {
468 auto aggregate = aggGraph.rowConst(i);
470 coordinate_type sum = 0.0;
471 for (
size_t colID = 0; colID < static_cast<size_t>(aggregate.length); colID++)
472 sum += fineCoordsRandomView(aggregate(colID), j);
474 coarseCoordsView(i, j) = sum / aggregate.length;
480 if (!aggregates->AggregatesCrossProcessors()) {
481 if (Xpetra::Helpers<SC, LO, GO, NO>::isTpetraBlockCrs(A)) {
482 BuildPuncoupledBlockCrs(coarseLevel, A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace,
485 BuildPuncoupled(coarseLevel, A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.
GetLevelID());
488 BuildPcoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
498 if (A->IsView(
"stridedMaps") ==
true)
499 Ptentative->CreateView(
"stridedMaps", A->getRowMap(
"stridedMaps"), coarseMap);
501 Ptentative->CreateView(
"stridedMaps", Ptentative->getRangeMap(), coarseMap);
503 if (bTransferCoordinates_) {
504 Set(coarseLevel,
"Coordinates", coarseCoords);
509 RCP<const Teuchos::Comm<int>> nodeComm = Get<RCP<const Teuchos::Comm<int>>>(fineLevel,
"Node Comm");
510 Set<RCP<const Teuchos::Comm<int>>>(coarseLevel,
"Node Comm", nodeComm);
513 Set(coarseLevel,
"Nullspace", coarseNullspace);
514 Set(coarseLevel,
"P", Ptentative);
517 RCP<ParameterList> params = rcp(
new ParameterList());
518 params->set(
"printLoadBalancingInfo",
true);
526 RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
527 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative,
528 RCP<MultiVector>& coarseNullspace,
const int levelID)
const {
529 auto rowMap = A->getRowMap();
530 auto colMap = A->getColMap();
532 const size_t numRows = rowMap->getLocalNumElements();
533 const size_t NSDim = fineNullspace->getNumVectors();
535 typedef KokkosKernels::ArithTraits<SC> ATS;
536 using impl_SC =
typename ATS::val_type;
537 using impl_ATS = KokkosKernels::ArithTraits<impl_SC>;
538 const impl_SC zero = impl_ATS::zero(), one = impl_ATS::one();
540 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
545 aggGraph = aggregates->GetGraph();
547 auto aggRows = aggGraph.row_map;
548 auto aggCols = aggGraph.entries;
556 goodMap = isGoodMap(*rowMap, *colMap);
560 "MueLu: TentativePFactory_kokkos: for now works only with good maps "
561 "(i.e. \"matching\" row and column maps)");
570 LO fullBlockSize, blockID, stridingOffset, stridedBlockSize;
572 amalgInfo->GetStridingInformation(fullBlockSize, blockID, stridingOffset, stridedBlockSize, indexBase);
573 GO globalOffset = amalgInfo->GlobalOffset();
576 auto procWinner = aggregates->GetProcWinner()->getLocalViewDevice(Tpetra::Access::ReadOnly);
577 auto vertex2AggId = aggregates->GetVertex2AggId()->getLocalViewDevice(Tpetra::Access::ReadOnly);
578 const size_t numAggregates = aggregates->GetNumAggregates();
580 int myPID = aggregates->GetMap()->getComm()->getRank();
585 typedef typename Aggregates::aggregates_sizes_type::non_const_type AggSizeType;
586 AggSizeType aggDofSizes;
588 if (stridedBlockSize == 1) {
592 aggDofSizes = AggSizeType(
"agg_dof_sizes", numAggregates + 1);
594 auto sizesConst = aggregates->ComputeAggregateSizes();
595 Kokkos::deep_copy(Kokkos::subview(aggDofSizes, Kokkos::make_pair(
static_cast<size_t>(1), numAggregates + 1)), sizesConst);
601 aggDofSizes = AggSizeType(
"agg_dof_sizes", numAggregates + 1);
603 auto nodeMap = aggregates->GetMap()->getLocalMap();
604 auto dofMap = colMap->getLocalMap();
606 Kokkos::parallel_for(
607 "MueLu:TentativePF:Build:compute_agg_sizes",
range_type(0, numAggregates),
608 KOKKOS_LAMBDA(
const LO agg) {
609 auto aggRowView = aggGraph.rowConst(agg);
612 for (LO colID = 0; colID < aggRowView.length; colID++) {
613 GO nodeGID = nodeMap.getGlobalElement(aggRowView(colID));
615 for (LO k = 0; k < stridedBlockSize; k++) {
616 GO dofGID = (nodeGID - indexBase) * fullBlockSize + k + indexBase + globalOffset + stridingOffset;
618 if (dofMap.getLocalElement(dofGID) != INVALID)
622 aggDofSizes(agg + 1) = size;
629 ReduceMaxFunctor<LO,
decltype(aggDofSizes)> reduceMax(aggDofSizes);
630 Kokkos::parallel_reduce(
"MueLu:TentativePF:Build:max_agg_size",
range_type(0, aggDofSizes.extent(0)), reduceMax, maxAggSize);
634 Kokkos::parallel_scan(
635 "MueLu:TentativePF:Build:aggregate_sizes:stage1_scan",
range_type(0, numAggregates + 1),
636 KOKKOS_LAMBDA(
const LO i, LO& update,
const bool& final_pass) {
637 update += aggDofSizes(i);
639 aggDofSizes(i) = update;
644 Kokkos::View<LO*, DeviceType>
agg2RowMapLO(Kokkos::ViewAllocateWithoutInitializing(
"agg2row_map_LO"), numRows);
648 AggSizeType aggOffsets(Kokkos::ViewAllocateWithoutInitializing(
"aggOffsets"), numAggregates);
649 Kokkos::deep_copy(aggOffsets, Kokkos::subview(aggDofSizes, Kokkos::make_pair(
static_cast<size_t>(0), numAggregates)));
651 Kokkos::parallel_for(
652 "MueLu:TentativePF:Build:createAgg2RowMap",
range_type(0, vertex2AggId.extent(0)),
653 KOKKOS_LAMBDA(
const LO lnode) {
654 if (procWinner(lnode, 0) == myPID) {
656 auto aggID = vertex2AggId(lnode, 0);
658 auto offset = Kokkos::atomic_fetch_add(&aggOffsets(aggID), stridedBlockSize);
662 for (LO k = 0; k < stridedBlockSize; k++)
663 agg2RowMapLO(offset + k) = lnode * stridedBlockSize + k;
670 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim,
true);
673 auto fineNS = fineNullspace->getLocalViewDevice(Tpetra::Access::ReadWrite);
674 auto coarseNS = coarseNullspace->getLocalViewDevice(Tpetra::Access::ReadWrite);
678 typedef typename Xpetra::Matrix<SC, LO, GO, NO>::local_matrix_device_type local_matrix_type;
679 typedef typename local_matrix_type::row_map_type::non_const_type rows_type;
680 typedef typename local_matrix_type::index_type::non_const_type cols_type;
681 typedef typename local_matrix_type::values_type::non_const_type vals_type;
684 typedef Kokkos::View<int[10], DeviceType> status_type;
685 status_type status(
"status");
690 const ParameterList& pL = GetParameterList();
691 const bool&
doQRStep = pL.get<
bool>(
"tentative: calculate qr");
693 GetOStream(
Runtime1) <<
"TentativePFactory : bypassing local QR phase" << std::endl;
695 GetOStream(
Warnings0) <<
"TentativePFactor : for nontrivial nullspace, this may degrade performance" << std::endl;
698 size_t nnzEstimate = numRows * NSDim;
699 rows_type
rowsAux(Kokkos::ViewAllocateWithoutInitializing(
"Ptent_aux_rows"), numRows + 1);
700 cols_type
colsAux(Kokkos::ViewAllocateWithoutInitializing(
"Ptent_aux_cols"), nnzEstimate);
701 vals_type
valsAux(
"Ptent_aux_vals", nnzEstimate);
702 rows_type
rows(
"Ptent_rows", numRows + 1);
709 Kokkos::parallel_for(
710 "MueLu:TentativePF:BuildPuncoupled:for1",
range_type(0, numRows + 1),
711 KOKKOS_LAMBDA(
const LO row) {
714 Kokkos::parallel_for(
715 "MueLu:TentativePF:BuildUncoupled:for2",
range_type(0, nnzEstimate),
716 KOKKOS_LAMBDA(
const LO j) {
733 const Kokkos::TeamPolicy<execution_space> policy(numAggregates, 1);
736 Kokkos::parallel_for(
737 "MueLu:TentativePF:BuildUncoupled:main_loop", policy,
738 KOKKOS_LAMBDA(
const typename Kokkos::TeamPolicy<execution_space>::member_type& thread) {
739 auto agg = thread.league_rank();
747 auto norm = impl_ATS::magnitude(zero);
752 for (
decltype(aggSize) k = 0; k < aggSize; k++) {
754 norm += dnorm * dnorm;
768 for (
decltype(aggSize) k = 0; k < aggSize; k++) {
772 rows(localRow + 1) = 1;
778 typename status_type::host_mirror_type statusHost = Kokkos::create_mirror_view(status);
779 Kokkos::deep_copy(statusHost, status);
780 for (
decltype(statusHost.size()) i = 0; i < statusHost.size(); i++)
782 std::ostringstream oss;
783 oss <<
"MueLu::TentativePFactory::MakeTentative: ";
785 case 0: oss <<
"!goodMap is not implemented";
break;
786 case 1: oss <<
"fine level NS part has a zero column";
break;
792 Kokkos::parallel_for(
793 "MueLu:TentativePF:BuildUncoupled:main_loop_noqr", policy,
794 KOKKOS_LAMBDA(
const typename Kokkos::TeamPolicy<execution_space>::member_type& thread) {
795 auto agg = thread.league_rank();
804 for (
decltype(aggSize) k = 0; k < aggSize; k++) {
808 rows(localRow + 1) = 1;
815 Kokkos::parallel_reduce(
816 "MueLu:TentativeP:CountNNZ",
range_type(0, numRows + 1),
817 KOKKOS_LAMBDA(
const LO i,
size_t& nnz_count) {
818 nnz_count +=
rows(i);
836 Kokkos::TeamPolicy<execution_space> policy(numAggregates, 1);
838 decltype(aggDofSizes ),
decltype(maxAggSize),
decltype(
agg2RowMapLO),
844 using shared_matrix =
typename decltype(localQRFunctor)::shared_matrix;
846 int n = fineNSRandom.extent(1);
847 int size = shared_matrix::shmem_size(m, n) +
848 shared_matrix::shmem_size(m, m);
850 if (size < policy.scratch_size_max((
int)0))
851 policy.set_scratch_size((
int)0, Kokkos::PerTeam(size));
852 else if (size < policy.scratch_size_max((
int)1))
853 policy.set_scratch_size((
int)1, Kokkos::PerTeam(size));
855 throw Exceptions::RuntimeError(
"Neither L0 scratch memory (max size " + std::to_string(policy.scratch_size_max((
int)0)) +
856 "), nor L1 scratch memory (max size " + std::to_string(policy.scratch_size_max((
int)1)) +
857 ") is large enough for requested allocation of size " + std::to_string(size));
859 Kokkos::parallel_reduce(
"MueLu:TentativePF:BuildUncoupled:main_qr_loop", policy, localQRFunctor, nnz);
862 typename status_type::host_mirror_type statusHost = Kokkos::create_mirror_view(status);
863 Kokkos::deep_copy(statusHost, status);
864 for (
decltype(statusHost.size()) i = 0; i < statusHost.size(); i++)
866 std::ostringstream oss;
867 oss <<
"MueLu::TentativePFactory::MakeTentative: ";
869 case 0: oss <<
"!goodMap is not implemented";
break;
870 case 1: oss <<
"fine level NS part has a zero column";
break;
883 if (nnz != nnzEstimate) {
888 Kokkos::parallel_scan(
889 "MueLu:TentativePF:Build:compress_rows",
range_type(0, numRows + 1),
890 KOKKOS_LAMBDA(
const LO i, LO& upd,
const bool&
final) {
900 cols = cols_type(
"Ptent_cols", nnz);
901 vals = vals_type(
"Ptent_vals", nnz);
906 Kokkos::parallel_for(
907 "MueLu:TentativePF:Build:compress_cols_vals",
range_type(0, numRows),
908 KOKKOS_LAMBDA(
const LO i) {
909 LO rowStart =
rows(i);
914 cols(rowStart + lnnz) =
colsAux(j);
915 vals(rowStart + lnnz) =
valsAux(j);
927 GetOStream(
Runtime1) <<
"TentativePFactory : aggregates do not cross process boundaries" << std::endl;
933 local_matrix_type lclMatrix = local_matrix_type(
"A", numRows, coarseMap->getLocalNumElements(), nnz, vals,
rows, cols);
936 RCP<ParameterList> FCparams;
937 if (pL.isSublist(
"matrixmatrix: kernel params"))
938 FCparams = rcp(
new ParameterList(pL.sublist(
"matrixmatrix: kernel params")));
940 FCparams = rcp(
new ParameterList);
943 FCparams->set(
"compute global constants", FCparams->get(
"compute global constants",
false));
944 FCparams->set(
"Timer Label", std::string(
"MueLu::TentativeP-") +
toString(levelID));
946 auto PtentCrs = CrsMatrixFactory::Build(lclMatrix, rowMap, coarseMap, coarseMap, A->getDomainMap());
947 Ptentative = rcp(
new CrsMatrixWrap(PtentCrs));
954 RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
955 RCP<const Map> coarsePointMap, RCP<Matrix>& Ptentative,
956 RCP<MultiVector>& coarseNullspace,
const int levelID)
const {
966 RCP<const Map> rowMap = A->getRowMap();
967 RCP<const Map> rangeMap = A->getRangeMap();
968 RCP<const Map> colMap = A->getColMap();
970 const size_t numFineBlockRows = rowMap->getLocalNumElements();
974 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
976 typedef KokkosKernels::ArithTraits<SC> ATS;
977 using impl_SC =
typename ATS::val_type;
978 using impl_ATS = KokkosKernels::ArithTraits<impl_SC>;
979 const impl_SC one = impl_ATS::one();
982 const size_t NSDim = fineNullspace->getNumVectors();
983 auto aggSizes = aggregates->ComputeAggregateSizes();
988 aggGraph = aggregates->GetGraph();
990 auto aggRows = aggGraph.row_map;
991 auto aggCols = aggGraph.entries;
997 const size_t numCoarseBlockRows = coarsePointMap->getLocalNumElements() / NSDim;
998 RCP<const Map> coarseBlockMap = MapFactory::Build(coarsePointMap->lib(),
999 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
1001 coarsePointMap->getIndexBase(),
1002 coarsePointMap->getComm());
1004 const ParameterList& pL = GetParameterList();
1014 "MueLu: TentativePFactory_kokkos: for now works only with good maps "
1015 "(i.e. \"matching\" row and column maps)");
1024 LO fullBlockSize, blockID, stridingOffset, stridedBlockSize;
1026 amalgInfo->GetStridingInformation(fullBlockSize, blockID, stridingOffset, stridedBlockSize, indexBase);
1030 auto procWinner = aggregates->GetProcWinner()->getLocalViewDevice(Tpetra::Access::ReadOnly);
1031 auto vertex2AggId = aggregates->GetVertex2AggId()->getLocalViewDevice(Tpetra::Access::ReadOnly);
1032 const size_t numAggregates = aggregates->GetNumAggregates();
1034 int myPID = aggregates->GetMap()->getComm()->getRank();
1039 typedef typename Aggregates::aggregates_sizes_type::non_const_type AggSizeType;
1040 AggSizeType aggDofSizes;
1046 aggDofSizes = AggSizeType(
"agg_dof_sizes", numAggregates + 1);
1048 Kokkos::deep_copy(Kokkos::subview(aggDofSizes, Kokkos::make_pair(
static_cast<size_t>(1), numAggregates + 1)), aggSizes);
1054 ReduceMaxFunctor<LO,
decltype(aggDofSizes)> reduceMax(aggDofSizes);
1055 Kokkos::parallel_reduce(
"MueLu:TentativePF:Build:max_agg_size",
range_type(0, aggDofSizes.extent(0)), reduceMax, maxAggSize);
1059 Kokkos::parallel_scan(
1060 "MueLu:TentativePF:Build:aggregate_sizes:stage1_scan",
range_type(0, numAggregates + 1),
1061 KOKKOS_LAMBDA(
const LO i, LO& update,
const bool& final_pass) {
1062 update += aggDofSizes(i);
1064 aggDofSizes(i) = update;
1069 Kokkos::View<LO*, DeviceType> aggToRowMapLO(Kokkos::ViewAllocateWithoutInitializing(
"aggtorow_map_LO"), numFineBlockRows);
1073 AggSizeType aggOffsets(Kokkos::ViewAllocateWithoutInitializing(
"aggOffsets"), numAggregates);
1074 Kokkos::deep_copy(aggOffsets, Kokkos::subview(aggDofSizes, Kokkos::make_pair(
static_cast<size_t>(0), numAggregates)));
1076 Kokkos::parallel_for(
1077 "MueLu:TentativePF:Build:createAgg2RowMap",
range_type(0, vertex2AggId.extent(0)),
1078 KOKKOS_LAMBDA(
const LO lnode) {
1079 if (procWinner(lnode, 0) == myPID) {
1081 auto aggID = vertex2AggId(lnode, 0);
1083 auto offset = Kokkos::atomic_fetch_add(&aggOffsets(aggID), stridedBlockSize);
1087 for (LO k = 0; k < stridedBlockSize; k++)
1088 aggToRowMapLO(offset + k) = lnode * stridedBlockSize + k;
1095 coarseNullspace = MultiVectorFactory::Build(coarsePointMap, NSDim,
true);
1098 auto fineNS = fineNullspace->getLocalViewDevice(Tpetra::Access::ReadWrite);
1099 auto coarseNS = coarseNullspace->getLocalViewDevice(Tpetra::Access::ReadWrite);
1101 typedef typename Xpetra::Matrix<SC, LO, GO, NO>::local_matrix_device_type local_matrix_type;
1102 typedef typename local_matrix_type::row_map_type::non_const_type rows_type;
1103 typedef typename local_matrix_type::index_type::non_const_type cols_type;
1107 typedef Kokkos::View<int[10], DeviceType> status_type;
1108 status_type status(
"status");
1114 GetOStream(
Runtime1) <<
"TentativePFactory : bypassing local QR phase" << std::endl;
1120 rows_type ia(Kokkos::ViewAllocateWithoutInitializing(
"BlockGraph_rowptr"), numFineBlockRows + 1);
1121 cols_type ja(Kokkos::ViewAllocateWithoutInitializing(
"BlockGraph_colind"), numFineBlockRows);
1123 Kokkos::parallel_for(
1124 "MueLu:TentativePF:BlockCrs:graph_init",
range_type(0, numFineBlockRows),
1125 KOKKOS_LAMBDA(
const LO j) {
1129 if (j == (LO)numFineBlockRows - 1)
1130 ia[numFineBlockRows] = numFineBlockRows;
1134 const Kokkos::TeamPolicy<execution_space> policy(numAggregates, 1);
1135 Kokkos::parallel_for(
1136 "MueLu:TentativePF:BlockCrs:fillGraph", policy,
1137 KOKKOS_LAMBDA(
const typename Kokkos::TeamPolicy<execution_space>::member_type& thread) {
1138 auto agg = thread.league_rank();
1139 Xpetra::global_size_t offset = agg;
1144 for (LO j = 0; j < aggSize; j++) {
1146 const LO localRow = aggToRowMapLO[aggDofSizes[agg] + j];
1147 const size_t rowStart = ia[localRow];
1148 ja[rowStart] = offset;
1158 rows_type i_temp(Kokkos::ViewAllocateWithoutInitializing(
"BlockGraph_rowptr"), numFineBlockRows + 1);
1160 Kokkos::parallel_scan(
1161 "MueLu:TentativePF:BlockCrs:compress_rows",
range_type(0, numFineBlockRows),
1162 KOKKOS_LAMBDA(
const LO i, LO& upd,
const bool&
final) {
1165 for (
auto j = ia[i]; j < ia[i + 1]; j++)
1166 if (ja[j] != INVALID)
1168 if (
final && i == (LO)numFineBlockRows - 1)
1169 i_temp[numFineBlockRows] = upd;
1173 cols_type j_temp(Kokkos::ViewAllocateWithoutInitializing(
"BlockGraph_colind"), nnz);
1175 Kokkos::parallel_for(
1176 "MueLu:TentativePF:BlockCrs:compress_cols",
range_type(0, numFineBlockRows),
1177 KOKKOS_LAMBDA(
const LO i) {
1178 size_t rowStart = i_temp[i];
1180 for (
auto j = ia[i]; j < ia[i + 1]; j++)
1181 if (ja[j] != INVALID) {
1182 j_temp[rowStart + lnnz] = ja[j];
1191 RCP<CrsGraph> BlockGraph = CrsGraphFactory::Build(rowMap, coarseBlockMap, ia, ja);
1195 RCP<ParameterList> FCparams;
1196 if (pL.isSublist(
"matrixmatrix: kernel params"))
1197 FCparams = rcp(
new ParameterList(pL.sublist(
"matrixmatrix: kernel params")));
1199 FCparams = rcp(
new ParameterList);
1201 FCparams->set(
"compute global constants", FCparams->get(
"compute global constants",
false));
1202 std::string levelIDs =
toString(levelID);
1203 FCparams->set(
"Timer Label", std::string(
"MueLu::TentativeP-") + levelIDs);
1204 RCP<const Export> dummy_e;
1205 RCP<const Import> dummy_i;
1206 BlockGraph->expertStaticFillComplete(coarseBlockMap, rowMap, dummy_i, dummy_e, FCparams);
1216 RCP<Xpetra::CrsMatrix<SC, LO, GO, NO>> P_xpetra = Xpetra::CrsMatrixFactory<SC, LO, GO, NO>::BuildBlock(BlockGraph, coarsePointMap, rangeMap, NSDim);
1217 RCP<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO>> P_tpetra = rcp_dynamic_cast<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO>>(P_xpetra);
1218 if (P_tpetra.is_null())
throw std::runtime_error(
"BuildPUncoupled: Matrix factory did not return a Tpetra::BlockCrsMatrix");
1219 RCP<CrsMatrixWrap> P_wrap = rcp(
new CrsMatrixWrap(P_xpetra));
1221 auto values = P_tpetra->getTpetra_BlockCrsMatrix()->getValuesDeviceNonConst();
1222 const LO stride = NSDim * NSDim;
1224 Kokkos::parallel_for(
1225 "MueLu:TentativePF:BlockCrs:main_loop_noqr", policy,
1226 KOKKOS_LAMBDA(
const typename Kokkos::TeamPolicy<execution_space>::member_type& thread) {
1227 auto agg = thread.league_rank();
1231 Xpetra::global_size_t offset = agg * NSDim;
1234 for (LO j = 0; j < aggSize; j++) {
1235 LO localBlockRow = aggToRowMapLO(
aggRows(agg) + j);
1236 LO rowStart = localBlockRow * stride;
1237 for (LO r = 0; r < (LO)NSDim; r++) {
1238 LO localPointRow = localBlockRow * NSDim + r;
1239 for (LO c = 0; c < (LO)NSDim; c++) {
1240 values[rowStart + r * NSDim + c] = fineNSRandom(localPointRow, c);
1246 for (LO j = 0; j < (LO)NSDim; j++)
1250 Ptentative = P_wrap;