142 ParameterList pL = GetParameterList();
143 bDefinitionPhase_ =
false;
145 if (pL.get<
int>(
"aggregation: max agg size") == -1)
146 pL.set(
"aggregation: max agg size", INT_MAX);
149 RCP<const FactoryBase> graphFact = GetFactory(
"Graph");
155 if (pL.get<
bool>(
"aggregation: allow user-specified singletons") ==
true) algos_.push_back(rcp(
new OnePtAggregationAlgorithm(graphFact)));
161 std::string mapOnePtName = pL.get<std::string>(
"OnePt aggregate map name");
162 RCP<Map> OnePtMap = Teuchos::null;
163 if (mapOnePtName.length()) {
164 std::string mapOnePtFactName = pL.get<std::string>(
"OnePt aggregate map factory");
165 if (mapOnePtFactName ==
"" || mapOnePtFactName ==
"NoFactory") {
168 RCP<const FactoryBase> mapOnePtFact = GetFactory(mapOnePtFactName);
169 OnePtMap = currentLevel.
Get<RCP<Map>>(mapOnePtName, mapOnePtFact.get());
174 std::string mapInterfaceName = pL.get<std::string>(
"Interface aggregate map name");
175 RCP<Map> InterfaceMap = Teuchos::null;
177 RCP<const LWGraph> graph;
178 RCP<const LWGraph_kokkos> graph_kokkos;
179 RCP<Aggregates> aggregates;
180 RCP<const Teuchos::Comm<int>> comm;
183 const std::string aggregationBackend = pL.get<std::string>(
"aggregation: backend");
194 if (IsType<RCP<LWGraph>>(currentLevel,
"Graph")) {
195 if ((aggregationBackend ==
"default") || (aggregationBackend ==
"host")) {
196 graph = Get<RCP<LWGraph>>(currentLevel,
"Graph");
198 comm = graph->GetComm();
199 numRows = graph->GetNodeNumVertices();
202 RCP<LWGraph> tmp_graph = Get<RCP<LWGraph>>(currentLevel,
"Graph");
203 graph_kokkos = tmp_graph->copyToDevice();
204 aggregates = rcp(
new Aggregates(*graph_kokkos));
205 comm = graph_kokkos->GetComm();
206 numRows = graph_kokkos->GetNodeNumVertices();
209 }
else if (IsType<RCP<LWGraph_kokkos>>(currentLevel,
"Graph")) {
210 if ((aggregationBackend ==
"default") || (aggregationBackend ==
"kokkos")) {
211 graph_kokkos = Get<RCP<LWGraph_kokkos>>(currentLevel,
"Graph");
212 aggregates = rcp(
new Aggregates(*graph_kokkos));
213 comm = graph_kokkos->GetComm();
214 numRows = graph_kokkos->GetNodeNumVertices();
217 RCP<LWGraph_kokkos> tmp_graph_kokkos = Get<RCP<LWGraph_kokkos>>(currentLevel,
"Graph");
218 graph = tmp_graph_kokkos->copyToHost();
220 comm = graph->GetComm();
221 numRows = graph->GetNodeNumVertices();
225 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Graph has bad type.");
229 TEUCHOS_TEST_FOR_EXCEPTION(pL.get<
bool>(
"aggregation: use interface aggregation"), std::invalid_argument,
"Option: 'aggregation: use interface aggregation' is not supported in the Kokkos version of uncoupled aggregation");
231 TEUCHOS_TEST_FOR_EXCEPTION(pL.get<
bool>(
"aggregation: match ML phase1"), std::invalid_argument,
"Option: 'aggregation: match ML phase1' is not supported in the Kokkos version of uncoupled aggregation");
235 aggregates->setObjectLabel(
"UC");
240 AggStatHostType aggStatHost;
244 aggStatHost = AggStatHostType(Kokkos::ViewAllocateWithoutInitializing(
"aggregation status"), numRows);
245 Kokkos::deep_copy(aggStatHost,
READY);
247 aggStat = AggStatType(Kokkos::ViewAllocateWithoutInitializing(
"aggregation status"), numRows);
248 Kokkos::deep_copy(aggStat,
READY);
252 if (pL.get<
bool>(
"aggregation: use interface aggregation") ==
true) {
253 Teuchos::Array<LO> nodeOnInterface = Get<Array<LO>>(currentLevel,
"nodeOnInterface");
254 for (LO i = 0; i < numRows; i++) {
255 if (nodeOnInterface[i])
263 auto dirichletBoundaryMap = graph->GetBoundaryNodeMap();
264 Kokkos::parallel_for(
265 "MueLu - UncoupledAggregation: tagging boundary nodes in aggStat",
266 Kokkos::RangePolicy<LocalOrdinal, typename LWGraph::execution_space>(0, numRows),
268 if (dirichletBoundaryMap(nodeIdx) ==
true) {
273 auto dirichletBoundaryMap = graph_kokkos->GetBoundaryNodeMap();
274 Kokkos::parallel_for(
275 "MueLu - UncoupledAggregation: tagging boundary nodes in aggStat",
276 Kokkos::RangePolicy<LocalOrdinal, typename LWGraph_kokkos::execution_space>(0, numRows),
278 if (dirichletBoundaryMap(nodeIdx) ==
true) {
285 if (OnePtMap != Teuchos::null) {
286 LO nDofsPerNode = Get<LO>(currentLevel,
"DofsPerNode");
289 GO indexBase = graph->GetDomainMap()->getIndexBase();
290 for (LO i = 0; i < numRows; i++) {
292 GO grid = (graph->GetDomainMap()->getGlobalElement(i) - indexBase) * nDofsPerNode + indexBase;
294 for (LO kr = 0; kr < nDofsPerNode; kr++)
295 if (OnePtMap->isNodeGlobalElement(grid + kr))
296 aggStatHost(i) =
ONEPT;
299 GO indexBase = graph_kokkos->GetDomainMap()->getIndexBase();
300 auto lclDomainMap = graph_kokkos->GetDomainMap()->getLocalMap();
301 auto lclOnePtMap = OnePtMap->getLocalMap();
302 const LocalOrdinal INVALID = Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid();
303 Kokkos::parallel_for(
304 "MueLu - UncoupledAggregation: tagging OnePt map",
305 Kokkos::RangePolicy<LocalOrdinal, typename LWGraph_kokkos::execution_space>(0, numRows),
308 GO grid = (lclDomainMap.getGlobalElement(i) - indexBase) * nDofsPerNode + indexBase;
310 for (LO kr = 0; kr < nDofsPerNode; kr++)
311 if (lclOnePtMap.getLocalElement(grid + kr) != INVALID)
317 LO numNonAggregatedNodes = numRows;
318 std::string aggAlgo = pL.get<std::string>(
"aggregation: coloring algorithm");
319 if (aggAlgo ==
"mis2 coarsening" || aggAlgo ==
"mis2 aggregation") {
320 TEUCHOS_ASSERT(!runOnHost);
324 using device_t =
typename graph_t::device_type;
325 using exec_space =
typename device_t::execution_space;
326 using rowmap_t =
typename graph_t::row_map_type;
327 using colinds_t =
typename graph_t::entries_type;
328 using lno_t =
typename colinds_t::non_const_value_type;
329 rowmap_t aRowptrs = graph_kokkos->getRowPtrs();
330 colinds_t aColinds = graph_kokkos->getEntries();
332 typename colinds_t::non_const_type labels;
334 if (aggAlgo ==
"mis2 coarsening") {
336 labels = KokkosGraph::graph_mis2_coarsen<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
337 }
else if (aggAlgo ==
"mis2 aggregation") {
339 labels = KokkosGraph::graph_mis2_aggregate<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
342 size_t labelCapacity = numAggs * 1.5;
346 Kokkos::UnorderedMap<LocalOrdinal, void, exec_space> used_labels(labelCapacity);
347 Kokkos::parallel_for(
348 "MueLu::UncoupledAggregationFactory::MIS2::nonempty_aggs",
349 Kokkos::RangePolicy<exec_space>(0, numRows),
350 KOKKOS_LAMBDA(lno_t i) {
351 if (aggStat(i) ==
READY)
352 used_labels.insert(labels(i));
355 if (used_labels.failed_insert()) {
357 labelCapacity = (labelCapacity + 1) * 1.5;
362 Kokkos::View<LO*, typename device_t::memory_space> new_labels(
"new_labels", numAggs);
363 Kokkos::parallel_scan(
364 "MueLu::UncoupledAggregationFactory::MIS2::set_new_labels",
365 Kokkos::RangePolicy<exec_space>(0, used_labels.capacity()),
366 KOKKOS_LAMBDA(lno_t i, lno_t & update,
const bool is_final) {
367 if (used_labels.valid_at(i)) {
368 auto label = used_labels.key_at(i);
370 new_labels(label) = update;
378 Kokkos::parallel_for(
379 "MueLu::UncoupledAggregationFactory::MIS2::reassign_labels",
380 Kokkos::RangePolicy<exec_space>(0, numRows),
381 KOKKOS_LAMBDA(lno_t i) {
382 labels(i) = new_labels(labels(i));
388 auto vertex2AggId = aggregates->GetVertex2AggId()->getLocalViewDevice(Tpetra::Access::ReadWrite);
389 auto procWinner = aggregates->GetProcWinner()->getLocalViewDevice(Tpetra::Access::OverwriteAll);
390 int rank = comm->getRank();
391 Kokkos::parallel_for(
392 Kokkos::RangePolicy<exec_space>(0, numRows),
393 KOKKOS_LAMBDA(lno_t i) {
394 if (aggStat(i) ==
READY) {
395#ifdef HAVE_MUELU_DEBUG
396 KOKKOS_ASSERT(labels(i) >= 0);
398 procWinner(i, 0) = rank;
400 vertex2AggId(i, 0) = labels(i);
408 numNonAggregatedNodes = 0;
409 aggregates->SetNumAggregates(numAggs);
412 DoGraphColoring(currentLevel, aggAlgo, pL.get<
bool>(
"aggregation: deterministic"), graph_kokkos, aggregates);
414 GetOStream(
Statistics1) <<
" num colors: " << aggregates->GetGraphNumColors() << std::endl;
418 std::vector<GO> localStats;
420 localStats = std::vector<GO>(1 + 2 * algos_.size());
421 localStats[0] = numRows;
423 for (
size_t a = 0; a < algos_.size(); a++) {
424 std::string phase = algos_[a]->description();
426 SubFactoryMonitor sfm2(*
this,
"Algo \"" + phase +
"\"" + (numNonAggregatedNodes == 0 ?
" [skipped since no nodes are left to aggregate]" :
""), currentLevel);
427 int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose());
429 algos_[a]->SetupPhase(pL, comm, numRows, numNonAggregatedNodes);
431 if (numNonAggregatedNodes > 0) {
433 algos_[a]->BuildAggregatesNonKokkos(pL, *graph, *aggregates, aggStatHost, numNonAggregatedNodes);
435 algos_[a]->BuildAggregates(pL, *graph_kokkos, *aggregates, aggStat, numNonAggregatedNodes);
437 algos_[a]->SetProcRankVerbose(oldRank);
440 localStats[2 * a + 1] = numRows - numNonAggregatedNodes;
441 localStats[2 * a + 2] = aggregates->GetNumAggregates();
445 std::vector<GO> globalStats(1 + 2 * algos_.size());
446 Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, (
int)localStats.size(), localStats.data(), globalStats.data());
447 GO numGlobalRows = globalStats[0];
448 GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0;
449 std::stringstream ss;
450 for (
size_t a = 0; a < algos_.size(); a++) {
451 std::string phase = algos_[a]->description();
452 GO numGlobalAggregated = globalStats[2 * a + 1];
453 GO numGlobalAggs = globalStats[2 * a + 2];
454 GO numGlobalNonAggregatedNodes = numGlobalRows - numGlobalAggregatedPrev;
455 double aggPercent = 100 * as<double>(numGlobalAggregated) / as<double>(numGlobalRows);
456 if (aggPercent > 99.99 && aggPercent < 100.00) {
464 ss <<
"Algo \"" + phase +
"\"" + (numGlobalNonAggregatedNodes == 0 ?
" [skipped since no nodes are left to aggregate]" :
"") << std::endl
465 <<
" aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) <<
" (phase), " << std::fixed
466 << std::setprecision(2) << numGlobalAggregated <<
"/" << numGlobalRows <<
" [" << aggPercent <<
"%] (total)\n"
467 <<
" remaining : " << numGlobalRows - numGlobalAggregated <<
"\n"
468 <<
" aggregates : " << numGlobalAggs - numGlobalAggsPrev <<
" (phase), " << numGlobalAggs <<
" (total)" << std::endl;
469 numGlobalAggregatedPrev = numGlobalAggregated;
470 numGlobalAggsPrev = numGlobalAggs;
476 TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError,
"MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!");
478 aggregates->AggregatesCrossProcessors(
false);
479 aggregates->ComputeAggregateSizes(
true );
481 Set(currentLevel,
"Aggregates", aggregates);
487 const std::string& aggAlgo,
488 const bool deterministic,
489 const RCP<const LWGraph_kokkos> graph,
490 RCP<Aggregates> aggregates)
const {
499 using KernelHandle = KokkosKernels::Experimental::
500 KokkosKernelsHandle<
typename graph_t::row_map_type::value_type,
501 typename graph_t::entries_type::value_type,
502 typename graph_t::entries_type::value_type,
503 typename graph_t::device_type::execution_space,
504 typename graph_t::device_type::memory_space,
505 typename graph_t::device_type::memory_space>;
508 kh.create_distance2_graph_coloring_handle();
511 auto coloringHandle = kh.get_distance2_graph_coloring_handle();
513 const LO numRows = graph->GetNodeNumVertices();
524 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_SERIAL);
526 }
else if (aggAlgo ==
"serial") {
527 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_SERIAL);
529 }
else if (aggAlgo ==
"default") {
530 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_DEFAULT);
532 }
else if (aggAlgo ==
"vertex based") {
533 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB);
535 }
else if (aggAlgo ==
"vertex based bit set") {
536 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB_BIT);
538 }
else if (aggAlgo ==
"edge filtering") {
539 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB_BIT_EF);
541 }
else if (aggAlgo ==
"net based bit set") {
542 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_NB_BIT);
545 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Unrecognized distance 2 coloring algorithm, valid options are: serial, default, matrix squared, vertex based, vertex based bit set, edge filtering")
549 typename graph_t::row_map_type aRowptrs = graph->getRowPtrs();
550 typename graph_t::entries_type aColinds = graph->getEntries();
555 SubFactoryMonitor sfm2(*
this,
"Algo \"Graph Coloring\": KokkosGraph Call", currentLevel);
556 KokkosGraph::Experimental::graph_color_distance2(&kh, numRows, aRowptrs, aColinds);
560 aggregates->SetGraphColors(coloringHandle->get_vertex_colors());
561 aggregates->SetGraphNumColors(
static_cast<LO
>(coloringHandle->get_num_colors()));
564 kh.destroy_distance2_graph_coloring_handle();