77 using XMM = Xpetra::MatrixMatrix<SC, LO, GO, NO>;
78 using local_matrix_type =
typename Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_type;
79 using rowptr_type =
typename local_matrix_type::row_map_type::non_const_type;
80 using colidx_type =
typename local_matrix_type::index_type::non_const_type;
81 using values_type =
typename local_matrix_type::values_type::non_const_type;
83 using impl_scalar_type =
typename Matrix::impl_scalar_type;
84 using ATS = KokkosKernels::ArithTraits<impl_scalar_type>;
85 using mag_type =
typename KokkosKernels::ArithTraits<impl_scalar_type>::magnitudeType;
86 using magATS = KokkosKernels::ArithTraits<mag_type>;
88 using execution_space =
typename Node::execution_space;
89 using memory_space =
typename Node::memory_space;
91 const auto one_Scalar = Teuchos::ScalarTraits<Scalar>::one();
92 const auto one_impl_scalar = ATS::one();
93 const auto zero_LO = KokkosKernels::ArithTraits<LocalOrdinal>::zero();
94 const auto one_LO = KokkosKernels::ArithTraits<LocalOrdinal>::one();
95 const auto one_mag = magATS::one();
96 const auto eps_mag = magATS::epsilon();
97 const auto INVALID_GO = Teuchos::OrdinalTraits<GlobalOrdinal>::invalid();
128 Teuchos::FancyOStream& out0 = GetBlackHole();
129 const ParameterList& pL = GetParameterList();
131 bool update_communicators = pL.get<
bool>(
"repartition: enable") && pL.get<
bool>(
"repartition: use subcommunicators");
133 RCP<Matrix> D0 = Get<RCP<Matrix> >(fineLevel,
"D0");
134 RCP<Matrix> Pn = Get<RCP<Matrix> >(coarseLevel,
"Pnodal");
137 RCP<Operator> CoarseNodeMatrix = Get<RCP<Operator> >(coarseLevel,
"NodeAggMatrix");
140 RCP<ParameterList> mm_params = rcp(
new ParameterList);
141 if (pL.isSublist(
"matrixmatrix: kernel params"))
142 mm_params->sublist(
"matrixmatrix: kernel params") = pL.sublist(
"matrixmatrix: kernel params");
147 auto vec_ones = VectorFactory::Build(Pn->getDomainMap(),
false);
148 vec_ones->putScalar(one_Scalar);
149 auto vec_rowsums = VectorFactory::Build(Pn->getRangeMap(),
false);
150 Pn->apply(*vec_ones, *vec_rowsums, Teuchos::NO_TRANS);
152 auto lclPn = Pn->getLocalMatrixDevice();
153 auto lclRowSums = vec_rowsums->getLocalViewDevice(Tpetra::Access::ReadOnly);
155 bool all_entries_ok =
true;
156 Kokkos::parallel_reduce(
157 Kokkos::RangePolicy<execution_space>(0, lclPn.numRows()), KOKKOS_LAMBDA(
const LocalOrdinal rlid,
bool& entries_ok) {
159 entries_ok = entries_ok && (ATS::magnitude(lclRowSums(rlid, 0) - one_impl_scalar) < eps_mag);
162 auto row = lclPn.rowConst(rlid);
164 entries_ok = entries_ok && (ATS::magnitude(row.value(k)-one_impl_scalar) < eps_mag);
166 } }, Kokkos::LAnd<bool>(all_entries_ok));
168 TEUCHOS_TEST_FOR_EXCEPTION(!all_entries_ok, std::runtime_error,
"The prolongator needs to be piecewise constant and all entries need to be 1.");
176 auto isDirichletFineEdge = Xpetra::VectorFactory<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(D0->getRowMap(),
false);
177 auto numFineEdges = isDirichletFineEdge->getMap()->getLocalNumElements();
181 D0_Pn = XMM::Multiply(*D0,
false, *Pn,
false, dummy, GetOStream(
Runtime0),
true,
true);
182 RCP<Matrix> Z = XMM::Multiply(*D0_Pn,
true, *D0_Pn,
false, dummy, GetOStream(
Runtime0),
true,
true);
184 auto rowMap = Z->getRowMap();
185 auto colMap = Z->getColMap();
186 auto lclRowMap = rowMap->getLocalMap();
187 auto lclColMap = colMap->getLocalMap();
188 auto lclZ = Z->getLocalMatrixDevice();
189 auto numLocalRows = lclZ.numRows();
195 auto importer = Z->getCrsGraph()->getImporter();
197 Teuchos::Array<int> Z_col_pids;
198 Kokkos::View<int*, memory_space> Z_col_pids_d;
199 if (!importer.is_null()) {
201 utils.
getPids(*importer, Z_col_pids,
false);
202 Kokkos::View<int*, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > Z_col_pids_h(Z_col_pids.data(), Z_col_pids.size());
203 Z_col_pids_d = Kokkos::View<int*, memory_space>(
"Z_col_pids_d", Z_col_pids.size());
204 Kokkos::deep_copy(Z_col_pids_d, Z_col_pids_h);
207 int myProcId = rowMap->getComm()->getRank();
210 auto tie_break = KOKKOS_LAMBDA(
int proc0,
int proc1) {
211 if ((proc0 + proc1) % 2 == 1) {
212 return Kokkos::min(proc0, proc1);
214 return Kokkos::max(proc0, proc1);
221 if (clid < numLocalRows) {
224 return (rlid < clid);
227 int otherProcId = Z_col_pids_d(clid);
228 int owner = tie_break(myProcId, otherProcId);
229 return (owner == myProcId);
234 Kokkos::parallel_reduce(
236 auto row = lclZ.rowConst(rlid);
239 auto clid = row.colidx(k);
240 if (add_edge(rlid, clid))
244 numCoarseRegularEdges);
249 using LOMatrix = Tpetra::CrsMatrix<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>;
250 RCP<LOMatrix> D0_LocalOrdinal;
258 using lo_local_matrix_type =
typename LOMatrix::local_matrix_device_type;
260 auto lclGraph = D0->getCrsGraph()->getLocalGraphDevice();
261 Kokkos::View<LocalOrdinal*, memory_space> values(Kokkos::ViewAllocateWithoutInitializing(
"values_LocalOrdinal"), D0->getLocalNumEntries());
263 auto values_scalar = D0->getLocalMatrixDevice().values;
264 Kokkos::parallel_for(
265 "MueLu::ReitzingerPFactory::convert", Kokkos::RangePolicy<execution_space>(0, values.extent(0)), KOKKOS_LAMBDA(
const size_t i) {
266 if (values_scalar(i) == one_impl_scalar)
268 else if (values_scalar(i) == -one_impl_scalar)
271 Kokkos::abort(
"D0 contains bad values");
274 lo_local_matrix_type lclMatrix(
"D0_LocalOrdinal", D0->getLocalMatrixDevice().numCols(), values, lclGraph);
276 D0_LocalOrdinal = rcp(
new LOMatrix(lclMatrix, toTpetra(D0->getRowMap()), toTpetra(D0->getColMap()), toTpetra(D0->getDomainMap()), toTpetra(D0->getRangeMap())));
279 auto oneVec = Xpetra::VectorFactory<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(D0->getDomainMap(),
false);
280 oneVec->putScalar(KokkosKernels::ArithTraits<LocalOrdinal>::one());
281 D0_LocalOrdinal->apply(*toTpetra(oneVec), *toTpetra(isDirichletFineEdge), Teuchos::NO_TRANS);
283 auto lcl_isDirichletFineEdge = isDirichletFineEdge->getLocalViewDevice(Tpetra::Access::ReadWrite);
284 auto lcl_D0_Pn = D0_Pn->getLocalMatrixDevice();
285 Kokkos::parallel_for(
286 Kokkos::RangePolicy<execution_space>(0, lcl_isDirichletFineEdge.extent(0)), KOKKOS_LAMBDA(
const LocalOrdinal i) {
287 if (ATS::magnitude(ATS::magnitude(lcl_isDirichletFineEdge(i, 0)) - one_mag) > eps_mag) {
289 lcl_isDirichletFineEdge(i, 0) = zero_LO;
292 lcl_isDirichletFineEdge(i, 0) = one_LO;
299 auto numberConnectedFineDirichletEdgesToCoarseNode = Xpetra::VectorFactory<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(D0_Pn->getDomainMap(),
false);
301 auto abs_D0_Pn = LOMatrix(toTpetra(D0_Pn->getCrsGraph()));
302 abs_D0_Pn.fillComplete(toTpetra(D0_Pn->getDomainMap()), toTpetra(D0_Pn->getRangeMap()));
303 abs_D0_Pn.setAllToScalar(KokkosKernels::ArithTraits<LocalOrdinal>::one());
304 abs_D0_Pn.apply(*toTpetra(isDirichletFineEdge), *toTpetra(numberConnectedFineDirichletEdgesToCoarseNode), Teuchos::TRANS);
309 auto lcl_numberConnectedFineDirichletEdgesToCoarseNode = numberConnectedFineDirichletEdgesToCoarseNode->getLocalViewDevice(Tpetra::Access::ReadOnly);
311 Kokkos::parallel_reduce(
312 Kokkos::RangePolicy<execution_space>(0, lcl_numberConnectedFineDirichletEdgesToCoarseNode.extent(0)),
314 if (ATS::magnitude(lcl_numberConnectedFineDirichletEdgesToCoarseNode(i, 0)) > eps_mag) {
318 numCoarseDirichletEdges);
324 MueLu_sumAll(rowMap->getComm(), numCoarseRegularEdges, numGlobalRegularEdges);
325 MueLu_sumAll(rowMap->getComm(), numCoarseDirichletEdges, numGlobalDirichletEdges);
326 GetOStream(
Statistics0) <<
"regular edges: " << numGlobalRegularEdges <<
", Dirichlet edges: " << numGlobalDirichletEdges << std::endl;
329 numCoarseEdges = numCoarseRegularEdges + numCoarseDirichletEdges;
330 rowptr_type rowptr(Kokkos::ViewAllocateWithoutInitializing(
"rowptr D0H"), numCoarseEdges + 1);
332 LocalOrdinal nnz = 2 * numCoarseRegularEdges + numCoarseDirichletEdges;
333 colidx_type colidx(Kokkos::ViewAllocateWithoutInitializing(
"colidx D0H"), nnz);
334 values_type values(Kokkos::ViewAllocateWithoutInitializing(
"values D0H"), nnz);
337 Kokkos::parallel_scan(
338 Kokkos::RangePolicy<execution_space>(0, numLocalRows),
340 auto row = lclZ.rowConst(rlid);
344 auto clid = row.colidx(k);
345 if (add_edge(rlid, clid))
355 auto rgid = lclRowMap.getGlobalElement(rlid);
356 auto rclid = lclColMap.getLocalElement(rgid);
360 auto clid = row.colidx(k);
361 if (add_edge(rlid, clid)) {
362 auto cgid = lclColMap.getGlobalElement(clid);
364 colidx(2 * ne) = rclid;
365 colidx(2 * ne + 1) = clid;
367 values(2 * ne) = -one_impl_scalar;
368 values(2 * ne + 1) = one_impl_scalar;
370 values(2 * ne) = one_impl_scalar;
371 values(2 * ne + 1) = -one_impl_scalar;
383 auto lcl_numberConnectedFineDirichletEdgesToCoarseNode = numberConnectedFineDirichletEdgesToCoarseNode->getLocalViewDevice(Tpetra::Access::ReadOnly);
384 Kokkos::parallel_scan(
385 Kokkos::RangePolicy<execution_space>(0, lcl_numberConnectedFineDirichletEdgesToCoarseNode.extent(0)),
387 if (ATS::magnitude(lcl_numberConnectedFineDirichletEdgesToCoarseNode(agg_lid, 0)) > eps_mag) {
393 colidx(2 * numCoarseRegularEdges + ne) = agg_lid;
394 values(2 * numCoarseRegularEdges + ne) = one_impl_scalar;
396 rowptr(numCoarseRegularEdges + ne) = 2 * numCoarseRegularEdges + ne;
402 auto D0H_rowmap = MapFactory::Build(rowMap->lib(), INVALID_GO, numCoarseEdges, 0, rowMap->getComm());
403 auto lclD0H = local_matrix_type(
"D0H", numCoarseEdges, colMap->getLocalNumElements(), nnz, values, rowptr, colidx);
406 D0H = MatrixFactory::Build(lclD0H, D0H_rowmap, colMap, Z->getDomainMap(), D0H_rowmap);
416 RCP<Matrix> D0_Pn_D0HT;
422 int rank = D0->getRowMap()->getComm()->getRank();
424 printf(
"[%d] Level %d Checkpoint #2 Pn = %d/%d/%d/%d D0c = %d/%d/%d/%d D0 = %d/%d/%d/%d\n",rank,fine_level,
425 Pn->getRangeMap()->getComm()->getSize(),
426 Pn->getRowMap()->getComm()->getSize(),
427 Pn->getColMap()->getComm()->getSize(),
428 Pn->getDomainMap()->getComm()->getSize(),
429 D0H->getRangeMap()->getComm()->getSize(),
430 D0H->getRowMap()->getComm()->getSize(),
431 D0H->getColMap()->getComm()->getSize(),
432 D0H->getDomainMap()->getComm()->getSize(),
433 D0->getRangeMap()->getComm()->getSize(),
434 D0->getRowMap()->getComm()->getSize(),
435 D0->getColMap()->getComm()->getSize(),
436 D0->getDomainMap()->getComm()->getSize());
438 D0->getRowMap()->getComm()->barrier();
442 RCP<Matrix> Pn_D0cT = XMM::Multiply(*Pn,
false, *D0H,
true, dummy, out0,
true,
true,
"Pn*D0c'", mm_params);
445 if (!mm_params.is_null()) mm_params->remove(
"importer",
false);
447 D0_Pn_D0HT = XMM::Multiply(*D0,
false, *Pn_D0cT,
false, dummy, out0,
true,
true,
"D0*(Pn*D0c')", mm_params);
455 auto lcl_D0_Pn = D0_Pn->getLocalMatrixDevice();
456 auto lcl_D0_Pn_D0HT = D0_Pn_D0HT->getLocalMatrixDevice();
457 auto lcl_isDirichletFineEdge = isDirichletFineEdge->getLocalViewDevice(Tpetra::Access::ReadOnly);
459 auto lcl_colmap_D0_Pn_D0HT = D0_Pn_D0HT->getColMap()->getLocalMap();
461 const auto half = one_impl_scalar / (one_impl_scalar + one_impl_scalar);
464 rowptr_type Pe_rowptr(
"Pe_rowptr", numFineEdges + 2);
467 Kokkos::parallel_for(
468 "Pe_count_entries", Kokkos::RangePolicy<execution_space>(0, numFineEdges), KOKKOS_LAMBDA(
const LocalOrdinal fineEdge) {
469 if (lcl_isDirichletFineEdge(fineEdge, 0) != one_LO) {
471 auto row = lcl_D0_Pn_D0HT.rowConst(fineEdge);
472 for (
int k = 0; k < row.length; ++k) {
473 auto val = row.value(k);
475 if (!((ATS::magnitude(val - one_impl_scalar) < eps_mag) || (ATS::magnitude(val + one_impl_scalar) < eps_mag) || (ATS::magnitude(val) < eps_mag))) {
477 ++Pe_rowptr(fineEdge + 2);
482 ++Pe_rowptr(fineEdge + 2);
488 Kokkos::parallel_scan(
489 "Pe_prefix_sum", Kokkos::RangePolicy<execution_space>(0, numFineEdges + 2), KOKKOS_LAMBDA(
const LocalOrdinal rlid,
LocalOrdinal& nnz,
const bool update) {
490 nnz += Pe_rowptr(rlid);
492 Pe_rowptr(rlid) = nnz;
498 colidx_type Pe_colidx(
"Pe_colidx", Pe_nnz);
499 values_type Pe_values(
"Pe_values", Pe_nnz);
502 RCP<GOVector> map_coarseNodes_colMap_D0_Pn_to_coarseEdges;
504 auto map_coarseEdges_rowMap_D0H_to_coarseEdges = Xpetra::VectorFactory<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(D0H->getRowMap());
506 auto lcl_map_coarseEdges_rowMap_D0H_to_coarseEdges = map_coarseEdges_rowMap_D0H_to_coarseEdges->getLocalViewDevice(Tpetra::Access::OverwriteAll);
507 auto lclMap = D0H->getRowMap()->getLocalMap();
508 Kokkos::parallel_for(
509 Kokkos::RangePolicy<execution_space>(numCoarseRegularEdges, numCoarseEdges), KOKKOS_LAMBDA(
const LocalOrdinal coarseEdge) {
510 lcl_map_coarseEdges_rowMap_D0H_to_coarseEdges(coarseEdge, 0) = lclMap.getGlobalElement(coarseEdge);
513 auto map_coarseNodes_domainMap_D0_Pn_to_coarseEdges = Xpetra::VectorFactory<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(D0H->getDomainMap());
521 using GOMatrix = Tpetra::CrsMatrix<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, Node>;
522 using go_local_matrix_type =
typename GOMatrix::local_matrix_device_type;
524 auto lclGraph = D0H->getCrsGraph()->getLocalGraphDevice();
525 typename go_local_matrix_type::values_type::non_const_type ones(
"ones_GlobalOrdinal", D0H->getLocalNumEntries());
526 const auto one_GO = KokkosKernels::ArithTraits<typename go_local_matrix_type::values_type::value_type>::one();
527 Kokkos::deep_copy(ones, one_GO);
529 go_local_matrix_type lclMatrix(
"D0H_GlobalOrdinal", D0H->getLocalMatrixDevice().numCols(), ones, lclGraph);
531 auto D0H_GlobalOrdinal = GOMatrix(lclMatrix, toTpetra(D0H->getRowMap()), toTpetra(D0H->getColMap()), toTpetra(D0H->getDomainMap()), toTpetra(D0H->getRangeMap()));
532 D0H_GlobalOrdinal.apply(*toTpetra(map_coarseEdges_rowMap_D0H_to_coarseEdges), *toTpetra(map_coarseNodes_domainMap_D0_Pn_to_coarseEdges), Teuchos::TRANS);
535 auto importer = D0_Pn->getCrsGraph()->getImporter();
536 if (!importer.is_null()) {
537 map_coarseNodes_colMap_D0_Pn_to_coarseEdges = Xpetra::VectorFactory<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(D0_Pn->getColMap());
538 map_coarseNodes_colMap_D0_Pn_to_coarseEdges->doImport(*map_coarseNodes_domainMap_D0_Pn_to_coarseEdges, *importer, Xpetra::INSERT);
540 map_coarseNodes_colMap_D0_Pn_to_coarseEdges = map_coarseNodes_domainMap_D0_Pn_to_coarseEdges;
544 auto lcl_map_coarseNodes_colMap_D0_Pn_to_coarseEdges = map_coarseNodes_colMap_D0_Pn_to_coarseEdges->getLocalViewDevice(Tpetra::Access::ReadOnly);
547 Kokkos::parallel_for(
548 "Pe_fill", Kokkos::RangePolicy<execution_space>(0, numFineEdges), KOKKOS_LAMBDA(
const LocalOrdinal fineEdge_lid) {
549 if (lcl_isDirichletFineEdge(fineEdge_lid, 0) != one_LO) {
551 auto row = lcl_D0_Pn_D0HT.rowConst(fineEdge_lid);
552 for (
int k = 0; k < row.length; ++k) {
553 auto val = row.value(k);
554 if (!((ATS::magnitude(val - one_impl_scalar) < eps_mag) || (ATS::magnitude(val + one_impl_scalar) < eps_mag) || (ATS::magnitude(val) < eps_mag))) {
555 auto clid = row.colidx(k);
557 auto offset = Pe_rowptr(fineEdge_lid + 1);
558 Pe_colidx(offset) = clid;
559 Pe_values(offset) = val * half;
560 ++Pe_rowptr(fineEdge_lid + 1);
566 for (
auto offset_D0_Pn = lcl_D0_Pn.graph.row_map(fineEdge_lid); offset_D0_Pn < lcl_D0_Pn.graph.row_map(fineEdge_lid + 1); ++offset_D0_Pn) {
567 LocalOrdinal coarseNode_lid_D0_Pn = lcl_D0_Pn.graph.entries(offset_D0_Pn);
568 impl_scalar_type val_D0_Pn = lcl_D0_Pn.values(offset_D0_Pn);
569 if (ATS::magnitude(val_D0_Pn) > eps_mag) {
570 GlobalOrdinal coarseEdge_gid = lcl_map_coarseNodes_colMap_D0_Pn_to_coarseEdges(coarseNode_lid_D0_Pn, 0);
572 auto coarseEdge_lid_D0_Pn_D0HT = lcl_colmap_D0_Pn_D0HT.getLocalElement(coarseEdge_gid);
575 const auto val_D0H = one_impl_scalar;
578 auto offset_Pe = Pe_rowptr(fineEdge_lid + 1);
579 Pe_colidx(offset_Pe) = coarseEdge_lid_D0_Pn_D0HT;
580 Pe_values(offset_Pe) = val_D0_Pn / val_D0H;
581 ++Pe_rowptr(fineEdge_lid + 1);
588 auto lclPe = local_matrix_type(
"Pe", numFineEdges, D0_Pn_D0HT->getColMap()->getLocalNumElements(), Pe_nnz, Pe_values, Kokkos::subview(Pe_rowptr, Kokkos::make_pair((
decltype(numFineEdges))0, numFineEdges + 1)), Pe_colidx);
591 Pe = MatrixFactory::Build(lclPe, D0->getRowMap(), D0_Pn_D0HT->getColMap(), D0H->getRangeMap(), D0->getRangeMap());
596 CheckCommutingProperty(*Pe, *D0H, *D0, *Pn);
602 if (update_communicators) {
604 RCP<const Teuchos::Comm<int> > newComm;
605 if (!CoarseNodeMatrix.is_null()) newComm = CoarseNodeMatrix->getDomainMap()->getComm();
606 RCP<const Map> newMap = MapFactory::copyMapWithNewComm(D0H->getRowMap(), newComm);
607 D0H->removeEmptyProcessesInPlace(newMap);
610 if (newMap.is_null()) D0H = Teuchos::null;
612 Set(coarseLevel,
"InPlaceMap", newMap);
615 Set(coarseLevel,
"P", Pe);
616 Set(coarseLevel,
"Ptent", Pe);
618 Set(coarseLevel,
"D0", D0H);
627 int numProcs = Pe->getRowMap()->getComm()->getSize();
630 sprintf(fname,
"Pe_%d_%d.mat", numProcs, fineLevel.
GetLevelID());
631 Xpetra::IO<SC, LO, GO, NO>::Write(fname, *Pe);
632 sprintf(fname,
"Pn_%d_%d.mat", numProcs, fineLevel.
GetLevelID());
633 Xpetra::IO<SC, LO, GO, NO>::Write(fname, *Pn);
634 if (!D0H.is_null()) {
635 sprintf(fname,
"D0c_%d_%d.mat", numProcs, fineLevel.
GetLevelID());
636 Xpetra::IO<SC, LO, GO, NO>::Write(fname, *D0H);
638 sprintf(fname,
"D0f_%d_%d.mat", numProcs, fineLevel.
GetLevelID());
639 Xpetra::IO<SC, LO, GO, NO>::Write(fname, *D0);