MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_AggregationPhase2aAlgorithm_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_
11#define MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_
12
13#include <Teuchos_Comm.hpp>
14#include <Teuchos_CommHelpers.hpp>
15
16#include <Xpetra_Vector.hpp>
17
19
20#include "MueLu_LWGraph.hpp"
21#include "MueLu_Aggregates.hpp"
22#include "MueLu_Exceptions.hpp"
23#include "MueLu_Monitor.hpp"
24
25#include "Kokkos_Sort.hpp"
26
27namespace MueLu {
28
29template <class LocalOrdinal, class GlobalOrdinal, class Node>
30void AggregationPhase2aAlgorithm<LocalOrdinal, GlobalOrdinal, Node>::SetupPhase(const ParameterList& params, Teuchos::RCP<const Teuchos::Comm<int>>& comm, LO& numLocalNodes, LO& numNonAggregatedNodes) {
31 bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2a");
32 if (matchMLbehavior) {
33 // Note: ML uses global counts to set the factor
34 // Passing # of nonaggregated nodes and # of nodes via aggStat
35 GO in_data[2] = {(GO)numNonAggregatedNodes, (GO)numLocalNodes};
36 GO out_data[2];
37 Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 2, in_data, out_data);
38 GO phase_one_aggregated = out_data[1] - out_data[0];
39 factorMLOverride_ = as<double>(phase_one_aggregated) / (out_data[1] + 1);
40 }
41}
42
43template <class LocalOrdinal, class GlobalOrdinal, class Node>
45 Monitor m(*this, "BuildAggregatesNonKokkos");
46
47 int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
48 int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
49 bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2a");
50
51 const LO numRows = graph.GetNodeNumVertices();
52 const int myRank = graph.GetComm()->getRank();
53
54 ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
55 ArrayRCP<LO> procWinner = aggregates.GetProcWinner()->getDataNonConst(0);
56
57 LO numLocalAggregates = aggregates.GetNumAggregates();
58
59 LO numLocalNodes = procWinner.size();
60 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
61
62 const double aggFactor = params.get<double>("aggregation: phase2a agg factor");
63 double factor;
64
65 if (matchMLbehavior) {
66 // Computed in SetupPhase.
67 // This needs to be precomputed in case some ranks are done after phase1.
68 factor = factorMLOverride_;
69
70 LO agg_stat_unaggregated = 0;
71 LO agg_stat_aggregated = 0;
72 LO agg_stat_bdry = 0;
73 for (LO i = 0; i < (LO)aggStat.size(); i++) {
74 if (aggStat[i] == AGGREGATED)
75 agg_stat_aggregated++;
76 else if (aggStat[i] == BOUNDARY)
77 agg_stat_bdry++;
78 else
79 agg_stat_unaggregated++;
80 }
81
82 // NOTE: ML always uses 3 as minNodesPerAggregate
83 minNodesPerAggregate = 3;
84
85 } else {
86 // MueLu defaults to using local counts to set the factor
87 factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
88 }
89 TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(Teuchos::ScalarTraits<double>::magnitude(factor)), Exceptions::RuntimeError, "Phase2a factor needs to be finite.");
90
91 // Now apply aggFactor
92 factor = pow(factor, aggFactor);
93
94 int aggIndex = -1;
95 size_t aggSize = 0;
96 std::vector<int> aggList(graph.getLocalMaxNumRowEntries());
97
98 for (LO rootCandidate = 0; rootCandidate < numRows; rootCandidate++) {
99 if (aggStat[rootCandidate] != READY) {
100 continue;
101 }
102
103 LO numNeighbors = 0;
104 aggSize = 0;
105 if (matchMLbehavior) {
106 aggList[aggSize++] = rootCandidate;
107 numNeighbors++;
108 }
109
110 auto neighOfINode = graph.getNeighborVertices(rootCandidate);
111
112 LO num_nonaggd_neighbors = 0, num_local_neighbors = 0;
113 for (int j = 0; j < neighOfINode.length; j++) {
114 LO neigh = neighOfINode(j);
115 if (graph.isLocalNeighborVertex(neigh))
116 num_local_neighbors++;
117
118 if (neigh != rootCandidate) {
119 if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) {
120 // If aggregate size does not exceed max size, add node to the tentative aggregate
121 // NOTE: We do not exit the loop over all neighbours since we have still
122 // to count all aggregated neighbour nodes for the aggregation criteria
123 // NOTE: We check here for the maximum aggregation size. If we would do it below
124 // with all the other check too big aggregates would not be accepted at all.
125 if (aggSize < as<size_t>(maxNodesPerAggregate))
126 aggList[aggSize++] = neigh;
127 num_nonaggd_neighbors++;
128 }
129
130 numNeighbors++;
131 }
132 }
133
134 bool accept_aggregate;
135 if (matchMLbehavior) {
136 // ML does this calculation slightly differently than MueLu does by default, specifically it
137 // uses the *local* number of neigbors, regardless of what they are.
138 // NOTE: ML does zero compression here. Not sure if it matters
139 // NOTE: ML uses a hardcoded value 3 instead of minNodesPerAggregate. This has been set above
140 LO rowi_N = num_local_neighbors;
141 num_nonaggd_neighbors++; // ML counts the node itself as a nonaggd_neighbor
142 accept_aggregate = (rowi_N > as<LO>(minNodesPerAggregate)) && (num_nonaggd_neighbors > (factor * rowi_N));
143 } else {
144 accept_aggregate = (aggSize > as<size_t>(minNodesPerAggregate)) && (aggSize > factor * numNeighbors);
145 }
146
147 if (accept_aggregate) {
148 // Accept new aggregate
149 // rootCandidate becomes the root of the newly formed aggregate
150 aggregates.SetIsRoot(rootCandidate);
151 aggIndex = numLocalAggregates++;
152
153 for (size_t k = 0; k < aggSize; k++) {
154 aggStat[aggList[k]] = AGGREGATED;
155 vertex2AggId[aggList[k]] = aggIndex;
156 procWinner[aggList[k]] = myRank;
157 }
158
159 numNonAggregatedNodes -= aggSize;
160 }
161 }
162
163 // update aggregate object
164 aggregates.SetNumAggregates(numLocalAggregates);
165}
166
167template <class LocalOrdinal, class GlobalOrdinal, class Node>
169 BuildAggregates(const ParameterList& params,
170 const LWGraph_kokkos& graph,
171 Aggregates& aggregates,
173 LO& numNonAggregatedNodes) const {
174 if (params.get<bool>("aggregation: deterministic")) {
175 Monitor m(*this, "BuildAggregatesDeterministic");
176 BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
177 } else {
178 Monitor m(*this, "BuildAggregatesRandom");
179 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
180 }
181
182} // BuildAggregates
183
184template <class LO, class GO, class Node>
186 BuildAggregatesRandom(const ParameterList& params,
187 const LWGraph_kokkos& graph,
188 Aggregates& aggregates,
190 LO& numNonAggregatedNodes) const {
191 using device_type = typename LWGraph_kokkos::device_type;
192 using execution_space = typename LWGraph_kokkos::execution_space;
193
194 const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
195 const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
196 bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2a");
197
198 const LO numRows = graph.GetNodeNumVertices();
199 const int myRank = graph.GetComm()->getRank();
200
201 auto vertex2AggId = aggregates.GetVertex2AggId()->getLocalViewDevice(Tpetra::Access::ReadWrite);
202 auto procWinner = aggregates.GetProcWinner()->getLocalViewDevice(Tpetra::Access::ReadWrite);
203 auto colors = aggregates.GetGraphColors();
204 const LO numColors = aggregates.GetGraphNumColors();
205
206 auto lclLWGraph = graph;
207
208 LO numLocalNodes = numRows;
209 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
210
211 const double aggFactor = 0.5;
212 double factor = static_cast<double>(numLocalAggregated) / (numLocalNodes + 1);
213 factor = pow(factor, aggFactor);
214
215 // LBV on Sept 12, 2019: this looks a little heavy handed,
216 // I'm not sure a view is needed to perform atomic updates.
217 // If we can avoid this and use a simple LO that would be
218 // simpler for later maintenance.
219 Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
220 typename Kokkos::View<LO, device_type>::host_mirror_type h_numLocalAggregates =
221 Kokkos::create_mirror_view(numLocalAggregates);
222 h_numLocalAggregates() = aggregates.GetNumAggregates();
223 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
224
225 // Now we create new aggregates using root nodes in all colors other than the first color,
226 // as the first color was already exhausted in Phase 1.
227 for (int color = 2; color < numColors + 1; ++color) {
228 LO tmpNumNonAggregatedNodes = 0;
229 Kokkos::parallel_reduce(
230 "Aggregation Phase 2a: loop over each individual color",
231 Kokkos::RangePolicy<execution_space>(0, numRows),
232 KOKKOS_LAMBDA(const LO rootCandidate, LO& lNumNonAggregatedNodes) {
233 if (aggStat(rootCandidate) == READY &&
234 colors(rootCandidate) == color) {
235 LO numNeighbors = 0;
236 LO aggSize = 0;
237 if (matchMLbehavior) {
238 aggSize += 1;
239 numNeighbors += 1;
240 }
241
242 auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
243
244 // Loop over neighbors to count how many nodes could join
245 // the new aggregate
246
247 for (int j = 0; j < neighbors.length; ++j) {
248 LO neigh = neighbors(j);
249 if (neigh != rootCandidate) {
250 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
251 (aggStat(neigh) == READY) &&
252 (aggSize < maxNodesPerAggregate)) {
253 ++aggSize;
254 }
255 ++numNeighbors;
256 }
257 }
258
259 // If a sufficient number of nodes can join the new aggregate
260 // then we actually create the aggregate.
261 if (aggSize > minNodesPerAggregate &&
262 (aggSize > factor * numNeighbors)) {
263 // aggregates.SetIsRoot(rootCandidate);
264 LO aggIndex = Kokkos::
265 atomic_fetch_add(&numLocalAggregates(), 1);
266
267 LO numAggregated = 0;
268
269 if (matchMLbehavior) {
270 // Add the root.
271 aggStat(rootCandidate) = AGGREGATED;
272 vertex2AggId(rootCandidate, 0) = aggIndex;
273 procWinner(rootCandidate, 0) = myRank;
274 ++numAggregated;
275 --lNumNonAggregatedNodes;
276 }
277
278 for (int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) {
279 LO neigh = neighbors(neighIdx);
280 if (neigh != rootCandidate) {
281 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
282 (aggStat(neigh) == READY) &&
283 (numAggregated < aggSize)) {
284 aggStat(neigh) = AGGREGATED;
285 vertex2AggId(neigh, 0) = aggIndex;
286 procWinner(neigh, 0) = myRank;
287
288 ++numAggregated;
289 --lNumNonAggregatedNodes;
290 }
291 }
292 }
293 }
294 }
295 },
296 tmpNumNonAggregatedNodes);
297 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
298 }
299
300 // update aggregate object
301 Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates);
302 aggregates.SetNumAggregates(h_numLocalAggregates());
303} // BuildAggregatesRandom
304
305template <class LO, class GO, class Node>
307 BuildAggregatesDeterministic(const ParameterList& params,
308 const LWGraph_kokkos& graph,
309 Aggregates& aggregates,
311 LO& numNonAggregatedNodes) const {
312 using device_type = typename LWGraph_kokkos::device_type;
313 using execution_space = typename LWGraph_kokkos::execution_space;
314
315 const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
316 const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
317
318 const LO numRows = graph.GetNodeNumVertices();
319 const int myRank = graph.GetComm()->getRank();
320
321 auto vertex2AggId = aggregates.GetVertex2AggId()->getLocalViewDevice(Tpetra::Access::ReadWrite);
322 auto procWinner = aggregates.GetProcWinner()->getLocalViewDevice(Tpetra::Access::ReadWrite);
323 auto colors = aggregates.GetGraphColors();
324 const LO numColors = aggregates.GetGraphNumColors();
325
326 auto lclLWGraph = graph;
327
328 LO numLocalNodes = procWinner.size();
329 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
330
331 const double aggFactor = 0.5;
332 double factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
333 factor = pow(factor, aggFactor);
334
335 Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
336 typename Kokkos::View<LO, device_type>::host_mirror_type h_numLocalAggregates =
337 Kokkos::create_mirror_view(numLocalAggregates);
338 h_numLocalAggregates() = aggregates.GetNumAggregates();
339 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
340
341 // Now we create new aggregates using root nodes in all colors other than the first color,
342 // as the first color was already exhausted in Phase 1.
343 //
344 // In the deterministic version, exactly the same set of aggregates will be created
345 // (as the nondeterministic version)
346 // because no vertex V can be a neighbor of two vertices of the same color, so two root
347 // candidates can't fight over V
348 //
349 // But, the precise values in vertex2AggId need to match exactly, so just sort the new
350 // roots of each color before assigning aggregate IDs
351
352 // numNonAggregatedNodes is the best available upper bound for the number of aggregates
353 // which may be created in this phase, so use it for the size of newRoots
354 Kokkos::View<LO*, device_type> newRoots("New root LIDs", numNonAggregatedNodes);
355 Kokkos::View<LO, device_type> numNewRoots("Number of new aggregates of current color");
356 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
357 for (int color = 1; color < numColors + 1; ++color) {
358 h_numNewRoots() = 0;
359 Kokkos::deep_copy(numNewRoots, h_numNewRoots);
360 Kokkos::parallel_for(
361 "Aggregation Phase 2a: determining new roots of current color",
362 Kokkos::RangePolicy<execution_space>(0, numRows),
363 KOKKOS_LAMBDA(const LO rootCandidate) {
364 if (aggStat(rootCandidate) == READY &&
365 colors(rootCandidate) == color) {
366 LO aggSize = 0;
367 auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
368 // Loop over neighbors to count how many nodes could join
369 // the new aggregate
370 LO numNeighbors = 0;
371 for (int j = 0; j < neighbors.length; ++j) {
372 LO neigh = neighbors(j);
373 if (neigh != rootCandidate) {
374 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
375 aggStat(neigh) == READY &&
376 aggSize < maxNodesPerAggregate) {
377 ++aggSize;
378 }
379 ++numNeighbors;
380 }
381 }
382 // If a sufficient number of nodes can join the new aggregate
383 // then we mark rootCandidate as a future root.
384 if (aggSize > minNodesPerAggregate && aggSize > factor * numNeighbors) {
385 LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
386 newRoots(newRootIndex) = rootCandidate;
387 }
388 }
389 });
390 Kokkos::deep_copy(h_numNewRoots, numNewRoots);
391
392 if (h_numNewRoots() > 0) {
393 // sort the new root indices
394 Kokkos::sort(newRoots, 0, h_numNewRoots());
395 // now, loop over all new roots again and actually create the aggregates
396 LO tmpNumNonAggregatedNodes = 0;
397 // First, just find the set of color vertices which will become aggregate roots
398 Kokkos::parallel_reduce(
399 "Aggregation Phase 2a: create new aggregates",
400 Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
401 KOKKOS_LAMBDA(const LO newRootIndex, LO& lNumNonAggregatedNodes) {
402 LO root = newRoots(newRootIndex);
403 LO newAggID = numLocalAggregates() + newRootIndex;
404 auto neighbors = lclLWGraph.getNeighborVertices(root);
405 // Loop over neighbors and add them to new aggregate
406 aggStat(root) = AGGREGATED;
407 vertex2AggId(root, 0) = newAggID;
408 LO aggSize = 1;
409 for (int j = 0; j < neighbors.length; ++j) {
410 LO neigh = neighbors(j);
411 if (neigh != root) {
412 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
413 aggStat(neigh) == READY &&
414 aggSize < maxNodesPerAggregate) {
415 aggStat(neigh) = AGGREGATED;
416 vertex2AggId(neigh, 0) = newAggID;
417 procWinner(neigh, 0) = myRank;
418 aggSize++;
419 }
420 }
421 }
422 lNumNonAggregatedNodes -= aggSize;
423 },
424 tmpNumNonAggregatedNodes);
425 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
426 h_numLocalAggregates() += h_numNewRoots();
427 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
428 }
429 }
430 aggregates.SetNumAggregates(h_numLocalAggregates());
431}
432
433} // namespace MueLu
434
435#endif /* MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_ */
Container class for aggregation information.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
void SetIsRoot(LO i, bool value=true)
Set root node information.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType
void BuildAggregatesNonKokkos(const ParameterList &params, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const override
void SetupPhase(const ParameterList &params, Teuchos::RCP< const Teuchos::Comm< int > > &comm, LO &numLocalNodes, LO &numNonAggregatedNodes) override
Local aggregation.
void BuildAggregatesDeterministic(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregates(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const override
void BuildAggregatesRandom(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
Exception throws to report errors in the internal logical of the program.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id 'v' is on current process.
const RCP< const Teuchos::Comm< int > > GetComm() const
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex 'v'.
Lightweight MueLu representation of a compressed row storage graph.
Lightweight MueLu representation of a compressed row storage graph.
Timer to be used in non-factories.
Namespace for MueLu classes and methods.