31 bool matchMLbehavior = params.get<
bool>(
"aggregation: match ML phase2a");
32 if (matchMLbehavior) {
35 GO in_data[2] = {(GO)numNonAggregatedNodes, (GO)numLocalNodes};
37 Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 2, in_data, out_data);
38 GO phase_one_aggregated = out_data[1] - out_data[0];
39 factorMLOverride_ = as<double>(phase_one_aggregated) / (out_data[1] + 1);
45 Monitor m(*
this,
"BuildAggregatesNonKokkos");
47 int minNodesPerAggregate = params.get<
int>(
"aggregation: min agg size");
48 int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
49 bool matchMLbehavior = params.get<
bool>(
"aggregation: match ML phase2a");
52 const int myRank = graph.
GetComm()->getRank();
54 ArrayRCP<LO> vertex2AggId = aggregates.
GetVertex2AggId()->getDataNonConst(0);
55 ArrayRCP<LO> procWinner = aggregates.
GetProcWinner()->getDataNonConst(0);
59 LO numLocalNodes = procWinner.size();
60 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
62 const double aggFactor = params.get<
double>(
"aggregation: phase2a agg factor");
65 if (matchMLbehavior) {
68 factor = factorMLOverride_;
70 LO agg_stat_unaggregated = 0;
71 LO agg_stat_aggregated = 0;
73 for (LO i = 0; i < (LO)aggStat.size(); i++) {
75 agg_stat_aggregated++;
79 agg_stat_unaggregated++;
83 minNodesPerAggregate = 3;
87 factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
89 TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(Teuchos::ScalarTraits<double>::magnitude(factor)),
Exceptions::RuntimeError,
"Phase2a factor needs to be finite.");
92 factor = pow(factor, aggFactor);
98 for (LO rootCandidate = 0; rootCandidate < numRows; rootCandidate++) {
99 if (aggStat[rootCandidate] !=
READY) {
105 if (matchMLbehavior) {
106 aggList[aggSize++] = rootCandidate;
112 LO num_nonaggd_neighbors = 0, num_local_neighbors = 0;
113 for (
int j = 0; j < neighOfINode.length; j++) {
114 LO neigh = neighOfINode(j);
116 num_local_neighbors++;
118 if (neigh != rootCandidate) {
125 if (aggSize < as<size_t>(maxNodesPerAggregate))
126 aggList[aggSize++] = neigh;
127 num_nonaggd_neighbors++;
134 bool accept_aggregate;
135 if (matchMLbehavior) {
140 LO rowi_N = num_local_neighbors;
141 num_nonaggd_neighbors++;
142 accept_aggregate = (rowi_N > as<LO>(minNodesPerAggregate)) && (num_nonaggd_neighbors > (factor * rowi_N));
144 accept_aggregate = (aggSize > as<size_t>(minNodesPerAggregate)) && (aggSize > factor * numNeighbors);
147 if (accept_aggregate) {
151 aggIndex = numLocalAggregates++;
153 for (
size_t k = 0; k < aggSize; k++) {
155 vertex2AggId[aggList[k]] = aggIndex;
156 procWinner[aggList[k]] = myRank;
159 numNonAggregatedNodes -= aggSize;
190 LO& numNonAggregatedNodes)
const {
194 const int minNodesPerAggregate = params.get<
int>(
"aggregation: min agg size");
195 const int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
196 bool matchMLbehavior = params.get<
bool>(
"aggregation: match ML phase2a");
199 const int myRank = graph.
GetComm()->getRank();
201 auto vertex2AggId = aggregates.
GetVertex2AggId()->getLocalViewDevice(Tpetra::Access::ReadWrite);
202 auto procWinner = aggregates.
GetProcWinner()->getLocalViewDevice(Tpetra::Access::ReadWrite);
206 auto lclLWGraph = graph;
208 LO numLocalNodes = numRows;
209 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
211 const double aggFactor = 0.5;
212 double factor =
static_cast<double>(numLocalAggregated) / (numLocalNodes + 1);
213 factor = pow(factor, aggFactor);
219 Kokkos::View<LO, device_type> numLocalAggregates(
"numLocalAggregates");
220 typename Kokkos::View<LO, device_type>::host_mirror_type h_numLocalAggregates =
221 Kokkos::create_mirror_view(numLocalAggregates);
223 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
227 for (
int color = 2; color < numColors + 1; ++color) {
228 LO tmpNumNonAggregatedNodes = 0;
229 Kokkos::parallel_reduce(
230 "Aggregation Phase 2a: loop over each individual color",
231 Kokkos::RangePolicy<execution_space>(0, numRows),
232 KOKKOS_LAMBDA(
const LO rootCandidate, LO& lNumNonAggregatedNodes) {
233 if (aggStat(rootCandidate) ==
READY &&
234 colors(rootCandidate) == color) {
237 if (matchMLbehavior) {
242 auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
247 for (
int j = 0; j < neighbors.length; ++j) {
248 LO neigh = neighbors(j);
249 if (neigh != rootCandidate) {
250 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
251 (aggStat(neigh) ==
READY) &&
252 (aggSize < maxNodesPerAggregate)) {
261 if (aggSize > minNodesPerAggregate &&
262 (aggSize > factor * numNeighbors)) {
264 LO aggIndex = Kokkos::
265 atomic_fetch_add(&numLocalAggregates(), 1);
267 LO numAggregated = 0;
269 if (matchMLbehavior) {
272 vertex2AggId(rootCandidate, 0) = aggIndex;
273 procWinner(rootCandidate, 0) = myRank;
275 --lNumNonAggregatedNodes;
278 for (
int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) {
279 LO neigh = neighbors(neighIdx);
280 if (neigh != rootCandidate) {
281 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
282 (aggStat(neigh) ==
READY) &&
283 (numAggregated < aggSize)) {
285 vertex2AggId(neigh, 0) = aggIndex;
286 procWinner(neigh, 0) = myRank;
289 --lNumNonAggregatedNodes;
296 tmpNumNonAggregatedNodes);
297 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
301 Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates);
311 LO& numNonAggregatedNodes)
const {
315 const int minNodesPerAggregate = params.get<
int>(
"aggregation: min agg size");
316 const int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
319 const int myRank = graph.
GetComm()->getRank();
321 auto vertex2AggId = aggregates.
GetVertex2AggId()->getLocalViewDevice(Tpetra::Access::ReadWrite);
322 auto procWinner = aggregates.
GetProcWinner()->getLocalViewDevice(Tpetra::Access::ReadWrite);
326 auto lclLWGraph = graph;
328 LO numLocalNodes = procWinner.size();
329 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
331 const double aggFactor = 0.5;
332 double factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
333 factor = pow(factor, aggFactor);
335 Kokkos::View<LO, device_type> numLocalAggregates(
"numLocalAggregates");
336 typename Kokkos::View<LO, device_type>::host_mirror_type h_numLocalAggregates =
337 Kokkos::create_mirror_view(numLocalAggregates);
339 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
354 Kokkos::View<LO*, device_type> newRoots(
"New root LIDs", numNonAggregatedNodes);
355 Kokkos::View<LO, device_type> numNewRoots(
"Number of new aggregates of current color");
356 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
357 for (
int color = 1; color < numColors + 1; ++color) {
359 Kokkos::deep_copy(numNewRoots, h_numNewRoots);
360 Kokkos::parallel_for(
361 "Aggregation Phase 2a: determining new roots of current color",
362 Kokkos::RangePolicy<execution_space>(0, numRows),
363 KOKKOS_LAMBDA(
const LO rootCandidate) {
364 if (aggStat(rootCandidate) ==
READY &&
365 colors(rootCandidate) == color) {
367 auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
371 for (
int j = 0; j < neighbors.length; ++j) {
372 LO neigh = neighbors(j);
373 if (neigh != rootCandidate) {
374 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
375 aggStat(neigh) ==
READY &&
376 aggSize < maxNodesPerAggregate) {
384 if (aggSize > minNodesPerAggregate && aggSize > factor * numNeighbors) {
385 LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
386 newRoots(newRootIndex) = rootCandidate;
390 Kokkos::deep_copy(h_numNewRoots, numNewRoots);
392 if (h_numNewRoots() > 0) {
394 Kokkos::sort(newRoots, 0, h_numNewRoots());
396 LO tmpNumNonAggregatedNodes = 0;
398 Kokkos::parallel_reduce(
399 "Aggregation Phase 2a: create new aggregates",
400 Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
401 KOKKOS_LAMBDA(
const LO newRootIndex, LO& lNumNonAggregatedNodes) {
402 LO root = newRoots(newRootIndex);
403 LO newAggID = numLocalAggregates() + newRootIndex;
404 auto neighbors = lclLWGraph.getNeighborVertices(root);
407 vertex2AggId(root, 0) = newAggID;
409 for (
int j = 0; j < neighbors.length; ++j) {
410 LO neigh = neighbors(j);
412 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
413 aggStat(neigh) ==
READY &&
414 aggSize < maxNodesPerAggregate) {
416 vertex2AggId(neigh, 0) = newAggID;
417 procWinner(neigh, 0) = myRank;
422 lNumNonAggregatedNodes -= aggSize;
424 tmpNumNonAggregatedNodes);
425 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
426 h_numLocalAggregates() += h_numNewRoots();
427 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);