142 ParameterList pL = GetParameterList();
143 bDefinitionPhase_ =
false;
145 if (pL.get<
int>(
"aggregation: max agg size") == -1)
146 pL.set(
"aggregation: max agg size", INT_MAX);
149 RCP<const FactoryBase> graphFact = GetFactory(
"Graph");
155 if (pL.get<
bool>(
"aggregation: allow user-specified singletons") ==
true) algos_.push_back(rcp(
new OnePtAggregationAlgorithm(graphFact)));
161 std::string mapOnePtName = pL.get<std::string>(
"OnePt aggregate map name");
162 RCP<Map> OnePtMap = Teuchos::null;
163 if (mapOnePtName.length()) {
164 std::string mapOnePtFactName = pL.get<std::string>(
"OnePt aggregate map factory");
165 if (mapOnePtFactName ==
"" || mapOnePtFactName ==
"NoFactory") {
168 RCP<const FactoryBase> mapOnePtFact = GetFactory(mapOnePtFactName);
169 OnePtMap = currentLevel.
Get<RCP<Map>>(mapOnePtName, mapOnePtFact.get());
174 std::string mapInterfaceName = pL.get<std::string>(
"Interface aggregate map name");
175 RCP<Map> InterfaceMap = Teuchos::null;
177 RCP<const LWGraph> graph;
178 RCP<const LWGraph_kokkos> graph_kokkos;
179 RCP<Aggregates> aggregates;
180 RCP<const Teuchos::Comm<int>> comm;
183 const std::string aggregationBackend = pL.get<std::string>(
"aggregation: backend");
194 if (IsType<RCP<LWGraph>>(currentLevel,
"Graph")) {
195 if ((aggregationBackend ==
"default") || (aggregationBackend ==
"host")) {
196 graph = Get<RCP<LWGraph>>(currentLevel,
"Graph");
198 comm = graph->GetComm();
199 numRows = graph->GetNodeNumVertices();
202 RCP<LWGraph> tmp_graph = Get<RCP<LWGraph>>(currentLevel,
"Graph");
203 graph_kokkos = tmp_graph->copyToDevice();
204 aggregates = rcp(
new Aggregates(*graph_kokkos));
205 comm = graph_kokkos->GetComm();
206 numRows = graph_kokkos->GetNodeNumVertices();
209 }
else if (IsType<RCP<LWGraph_kokkos>>(currentLevel,
"Graph")) {
210 if ((aggregationBackend ==
"default") || (aggregationBackend ==
"kokkos")) {
211 graph_kokkos = Get<RCP<LWGraph_kokkos>>(currentLevel,
"Graph");
212 aggregates = rcp(
new Aggregates(*graph_kokkos));
213 comm = graph_kokkos->GetComm();
214 numRows = graph_kokkos->GetNodeNumVertices();
217 RCP<LWGraph_kokkos> tmp_graph_kokkos = Get<RCP<LWGraph_kokkos>>(currentLevel,
"Graph");
218 graph = tmp_graph_kokkos->copyToHost();
220 comm = graph->GetComm();
221 numRows = graph->GetNodeNumVertices();
225 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Graph has bad type.");
229 TEUCHOS_TEST_FOR_EXCEPTION(pL.get<
bool>(
"aggregation: use interface aggregation"), std::invalid_argument,
"Option: 'aggregation: use interface aggregation' is not supported in the Kokkos version of uncoupled aggregation");
231 TEUCHOS_TEST_FOR_EXCEPTION(pL.get<
bool>(
"aggregation: match ML phase1"), std::invalid_argument,
"Option: 'aggregation: match ML phase1' is not supported in the Kokkos version of uncoupled aggregation");
235 aggregates->setObjectLabel(
"UC");
240 AggStatHostType aggStatHost;
244 aggStatHost = AggStatHostType(Kokkos::ViewAllocateWithoutInitializing(
"aggregation status"), numRows);
245 Kokkos::deep_copy(aggStatHost,
READY);
247 aggStat = AggStatType(Kokkos::ViewAllocateWithoutInitializing(
"aggregation status"), numRows);
248 Kokkos::deep_copy(aggStat,
READY);
252 if (pL.get<
bool>(
"aggregation: use interface aggregation") ==
true) {
253 Teuchos::Array<LO> nodeOnInterface = Get<Array<LO>>(currentLevel,
"nodeOnInterface");
254 for (LO i = 0; i < numRows; i++) {
255 if (nodeOnInterface[i])
263 auto dirichletBoundaryMap = graph->GetBoundaryNodeMap();
264 Kokkos::parallel_for(
265 "MueLu - UncoupledAggregation: tagging boundary nodes in aggStat",
266 Kokkos::RangePolicy<LocalOrdinal, typename LWGraph::execution_space>(0, numRows),
268 if (dirichletBoundaryMap(nodeIdx) ==
true) {
273 auto dirichletBoundaryMap = graph_kokkos->GetBoundaryNodeMap();
274 Kokkos::parallel_for(
275 "MueLu - UncoupledAggregation: tagging boundary nodes in aggStat",
276 Kokkos::RangePolicy<LocalOrdinal, typename LWGraph_kokkos::execution_space>(0, numRows),
278 if (dirichletBoundaryMap(nodeIdx) ==
true) {
285 if (OnePtMap != Teuchos::null) {
286 LO nDofsPerNode = Get<LO>(currentLevel,
"DofsPerNode");
289 GO indexBase = graph->GetDomainMap()->getIndexBase();
290 for (LO i = 0; i < numRows; i++) {
292 GO grid = (graph->GetDomainMap()->getGlobalElement(i) - indexBase) * nDofsPerNode + indexBase;
294 for (LO kr = 0; kr < nDofsPerNode; kr++)
295 if (OnePtMap->isNodeGlobalElement(grid + kr))
296 aggStatHost(i) =
ONEPT;
299 GO indexBase = graph_kokkos->GetDomainMap()->getIndexBase();
300 auto lclDomainMap = graph_kokkos->GetDomainMap()->getLocalMap();
301 auto lclOnePtMap = OnePtMap->getLocalMap();
302 const LocalOrdinal INVALID = Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid();
303 Kokkos::parallel_for(
304 "MueLu - UncoupledAggregation: tagging OnePt map",
305 Kokkos::RangePolicy<LocalOrdinal, typename LWGraph_kokkos::execution_space>(0, numRows),
308 GO grid = (lclDomainMap.getGlobalElement(i) - indexBase) * nDofsPerNode + indexBase;
310 for (LO kr = 0; kr < nDofsPerNode; kr++)
311 if (lclOnePtMap.getLocalElement(grid + kr) != INVALID)
317 LO numNonAggregatedNodes = numRows;
318 std::string aggAlgo = pL.get<std::string>(
"aggregation: coloring algorithm");
319 if (aggAlgo ==
"mis2 coarsening" || aggAlgo ==
"mis2 aggregation") {
320 TEUCHOS_ASSERT(!runOnHost);
324 using device_t =
typename graph_t::device_type;
325 using exec_space =
typename device_t::execution_space;
326 using rowmap_t =
typename graph_t::row_map_type;
327 using colinds_t =
typename graph_t::entries_type;
328 using lno_t =
typename colinds_t::non_const_value_type;
329 rowmap_t aRowptrs = graph_kokkos->getRowPtrs();
330 colinds_t aColinds = graph_kokkos->getEntries();
332 typename colinds_t::non_const_type labels;
334 if (aggAlgo ==
"mis2 coarsening") {
336 labels = KokkosGraph::graph_mis2_coarsen<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
337 }
else if (aggAlgo ==
"mis2 aggregation") {
339 labels = KokkosGraph::graph_mis2_aggregate<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
344 Kokkos::UnorderedMap<LocalOrdinal, void, exec_space> used_labels(numAggs);
345 Kokkos::parallel_for(
346 "MueLu::UncoupledAggregationFactory::MIS2::nonempty_aggs",
347 Kokkos::RangePolicy<exec_space>(0, numRows),
348 KOKKOS_LAMBDA(lno_t i) {
349 if (aggStat(i) ==
READY)
350 used_labels.insert(labels(i));
353 if (used_labels.failed_insert()) {
357 s <<
"numAggs: " << numAggs << std::endl;
358 auto labels_h = Kokkos::create_mirror_view(labels);
359 Kokkos::deep_copy(labels_h, labels);
360 for (
int kk = 0; kk < labels_h.extent_int(0); ++kk) {
361 s << labels_h(kk) <<
" ";
364 std::cout << s.str();
366 TEUCHOS_ASSERT(!used_labels.failed_insert());
369 Kokkos::View<LO*, typename device_t::memory_space> new_labels(
"new_labels", numAggs);
370 Kokkos::parallel_scan(
371 "MueLu::UncoupledAggregationFactory::MIS2::set_new_labels",
372 Kokkos::RangePolicy<exec_space>(0, used_labels.capacity()),
373 KOKKOS_LAMBDA(lno_t i, lno_t & update,
const bool is_final) {
374 if (used_labels.valid_at(i)) {
375 auto label = used_labels.key_at(i);
377 new_labels(label) = update;
386 used_labels.rehash(0);
389 Kokkos::parallel_for(
390 "MueLu::UncoupledAggregationFactory::MIS2::reassign_labels",
391 Kokkos::RangePolicy<exec_space>(0, numRows),
392 KOKKOS_LAMBDA(lno_t i) {
393 labels(i) = new_labels(labels(i));
397 auto vertex2AggId = aggregates->GetVertex2AggId()->getLocalViewDevice(Tpetra::Access::ReadWrite);
398 auto procWinner = aggregates->GetProcWinner()->getLocalViewDevice(Tpetra::Access::OverwriteAll);
399 int rank = comm->getRank();
400 Kokkos::parallel_for(
401 Kokkos::RangePolicy<exec_space>(0, numRows),
402 KOKKOS_LAMBDA(lno_t i) {
403 if (aggStat(i) ==
READY) {
404#ifdef HAVE_MUELU_DEBUG
405 KOKKOS_ASSERT(labels(i) >= 0);
407 procWinner(i, 0) = rank;
409 vertex2AggId(i, 0) = labels(i);
417 numNonAggregatedNodes = 0;
418 aggregates->SetNumAggregates(numAggs);
421 DoGraphColoring(currentLevel, aggAlgo, pL.get<
bool>(
"aggregation: deterministic"), graph_kokkos, aggregates);
423 GetOStream(
Statistics1) <<
" num colors: " << aggregates->GetGraphNumColors() << std::endl;
427 std::vector<GO> localStats;
429 localStats = std::vector<GO>(1 + 2 * algos_.size());
430 localStats[0] = numRows;
432 for (
size_t a = 0; a < algos_.size(); a++) {
433 std::string phase = algos_[a]->description();
435 SubFactoryMonitor sfm2(*
this,
"Algo \"" + phase +
"\"" + (numNonAggregatedNodes == 0 ?
" [skipped since no nodes are left to aggregate]" :
""), currentLevel);
436 int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose());
438 algos_[a]->SetupPhase(pL, comm, numRows, numNonAggregatedNodes);
440 if (numNonAggregatedNodes > 0) {
442 algos_[a]->BuildAggregatesNonKokkos(pL, *graph, *aggregates, aggStatHost, numNonAggregatedNodes);
444 algos_[a]->BuildAggregates(pL, *graph_kokkos, *aggregates, aggStat, numNonAggregatedNodes);
446 algos_[a]->SetProcRankVerbose(oldRank);
449 localStats[2 * a + 1] = numRows - numNonAggregatedNodes;
450 localStats[2 * a + 2] = aggregates->GetNumAggregates();
454 std::vector<GO> globalStats(1 + 2 * algos_.size());
455 Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, (
int)localStats.size(), localStats.data(), globalStats.data());
456 GO numGlobalRows = globalStats[0];
457 GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0;
458 std::stringstream ss;
459 for (
size_t a = 0; a < algos_.size(); a++) {
460 std::string phase = algos_[a]->description();
461 GO numGlobalAggregated = globalStats[2 * a + 1];
462 GO numGlobalAggs = globalStats[2 * a + 2];
463 GO numGlobalNonAggregatedNodes = numGlobalRows - numGlobalAggregatedPrev;
464 double aggPercent = 100 * as<double>(numGlobalAggregated) / as<double>(numGlobalRows);
465 if (aggPercent > 99.99 && aggPercent < 100.00) {
473 ss <<
"Algo \"" + phase +
"\"" + (numGlobalNonAggregatedNodes == 0 ?
" [skipped since no nodes are left to aggregate]" :
"") << std::endl
474 <<
" aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) <<
" (phase), " << std::fixed
475 << std::setprecision(2) << numGlobalAggregated <<
"/" << numGlobalRows <<
" [" << aggPercent <<
"%] (total)\n"
476 <<
" remaining : " << numGlobalRows - numGlobalAggregated <<
"\n"
477 <<
" aggregates : " << numGlobalAggs - numGlobalAggsPrev <<
" (phase), " << numGlobalAggs <<
" (total)" << std::endl;
478 numGlobalAggregatedPrev = numGlobalAggregated;
479 numGlobalAggsPrev = numGlobalAggs;
485 TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError,
"MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!");
487 aggregates->AggregatesCrossProcessors(
false);
488 aggregates->ComputeAggregateSizes(
true );
490 Set(currentLevel,
"Aggregates", aggregates);