MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_AggregationPhase1Algorithm_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_AGGREGATIONPHASE1ALGORITHM_DEF_HPP_
11#define MUELU_AGGREGATIONPHASE1ALGORITHM_DEF_HPP_
12
13#include <queue>
14
15#include <Teuchos_Comm.hpp>
16#include <Teuchos_CommHelpers.hpp>
17
18#include <Xpetra_Vector.hpp>
19
21
22#include "MueLu_LWGraph.hpp"
23#include "MueLu_Aggregates.hpp"
24#include "MueLu_Exceptions.hpp"
25#include "MueLu_Monitor.hpp"
26
27#include "Kokkos_Sort.hpp"
28#include <Kokkos_ScatterView.hpp>
29
30namespace MueLu {
31
32template <class LocalOrdinal, class GlobalOrdinal, class Node>
34 BuildAggregatesNonKokkos(const ParameterList& params, const LWGraph& graph, Aggregates& aggregates, typename AggregationAlgorithmBase<LocalOrdinal, GlobalOrdinal, Node>::AggStatHostType& aggStat,
35 LO& numNonAggregatedNodes) const {
36 Monitor m(*this, "BuildAggregatesNonKokkos");
37
38 std::string orderingStr = params.get<std::string>("aggregation: ordering");
39 int maxNeighAlreadySelected = params.get<int>("aggregation: max selected neighbors");
40 int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
41 int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
42 bool matchMLBehavior = params.get<bool>("aggregation: match ML phase1");
43
44 TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate, Exceptions::RuntimeError,
45 "MueLu::UncoupledAggregationAlgorithm::BuildAggregatesNonKokkos: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
46
47 enum {
48 O_NATURAL,
49 O_RANDOM,
50 O_GRAPH
51 } ordering;
52 ordering = O_NATURAL; // initialize variable (fix CID 143665)
53 if (orderingStr == "natural") ordering = O_NATURAL;
54 if (orderingStr == "random") ordering = O_RANDOM;
55 if (orderingStr == "graph") ordering = O_GRAPH;
56
57 const LO numRows = graph.GetNodeNumVertices();
58 const int myRank = graph.GetComm()->getRank();
59
60 ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
61 ArrayRCP<LO> procWinner = aggregates.GetProcWinner()->getDataNonConst(0);
62
63 LO numLocalAggregates = aggregates.GetNumAggregates();
64
65 ArrayRCP<LO> randomVector;
66 if (ordering == O_RANDOM) {
67 randomVector = arcp<LO>(numRows);
68 for (LO i = 0; i < numRows; i++)
69 randomVector[i] = i;
70 RandomReorder(randomVector);
71 }
72
73 int aggIndex = -1;
74 size_t aggSize = 0;
75 std::vector<int> aggList(graph.getLocalMaxNumRowEntries());
76
77 std::queue<LO> graphOrderQueue;
78
79 // Main loop over all local rows of graph(A)
80 for (LO i = 0; i < numRows; i++) {
81 // Step 1: pick the next node to aggregate
82 LO rootCandidate = 0;
83 if (ordering == O_NATURAL)
84 rootCandidate = i;
85 else if (ordering == O_RANDOM)
86 rootCandidate = randomVector[i];
87 else if (ordering == O_GRAPH) {
88 if (graphOrderQueue.size() == 0) {
89 // Current queue is empty for "graph" ordering, populate with one READY node
90 for (LO jnode = 0; jnode < numRows; jnode++)
91 if (aggStat[jnode] == READY) {
92 graphOrderQueue.push(jnode);
93 break;
94 }
95 }
96 if (graphOrderQueue.size() == 0) {
97 // There are no more ready nodes, end the phase
98 break;
99 }
100 rootCandidate = graphOrderQueue.front(); // take next node from graph ordering queue
101 graphOrderQueue.pop(); // delete this node in list
102 }
103
104 if (aggStat[rootCandidate] != READY)
105 continue;
106
107 // Step 2: build tentative aggregate
108 aggSize = 0;
109 aggList[aggSize++] = rootCandidate;
110
111 auto neighOfINode = graph.getNeighborVertices(rootCandidate);
112
113 // If the number of neighbors is less than the minimum number of nodes
114 // per aggregate, we know this is not going to be a valid root, and we
115 // may skip it, but only for "natural" and "random" (for "graph" we still
116 // need to fetch the list of local neighbors to continue)
117 if ((ordering == O_NATURAL || ordering == O_RANDOM) &&
118 neighOfINode.length < minNodesPerAggregate) {
119 continue;
120 }
121
122 LO numAggregatedNeighbours = 0;
123
124 for (int j = 0; j < neighOfINode.length; j++) {
125 LO neigh = neighOfINode(j);
126
127 if (neigh != rootCandidate && graph.isLocalNeighborVertex(neigh)) {
128 if (aggStat[neigh] == READY || aggStat[neigh] == NOTSEL) {
129 // If aggregate size does not exceed max size, add node to the
130 // tentative aggregate
131 // NOTE: We do not exit the loop over all neighbours since we have
132 // still to count all aggregated neighbour nodes for the
133 // aggregation criteria
134 // NOTE: We check here for the maximum aggregation size. If we
135 // would do it below with all the other check too big aggregates
136 // would not be accepted at all.
137 if (aggSize < as<size_t>(maxNodesPerAggregate))
138 aggList[aggSize++] = neigh;
139
140 } else if (!matchMLBehavior || aggStat[neigh] != IGNORED) {
141 // NOTE: ML checks against BOUNDARY here, but boundary nodes are flagged as IGNORED by
142 // the time we get to Phase 1, so we check IGNORED instead
143 numAggregatedNeighbours++;
144 }
145 }
146 }
147
148 // Step 3: check if tentative aggregate is acceptable
149 if ((numAggregatedNeighbours <= maxNeighAlreadySelected) && // too many connections to other aggregates
150 (aggSize >= as<size_t>(minNodesPerAggregate))) { // too few nodes in the tentative aggregate
151 // Accept new aggregate
152 // rootCandidate becomes the root of the newly formed aggregate
153 aggregates.SetIsRoot(rootCandidate);
154 aggIndex = numLocalAggregates++;
155
156 for (size_t k = 0; k < aggSize; k++) {
157 aggStat[aggList[k]] = AGGREGATED;
158 vertex2AggId[aggList[k]] = aggIndex;
159 procWinner[aggList[k]] = myRank;
160 }
161
162 numNonAggregatedNodes -= aggSize;
163
164 } else {
165 // Aggregate is not accepted
166 aggStat[rootCandidate] = NOTSEL;
167
168 // Need this for the "graph" ordering below
169 // The original candidate is always aggList[0]
170 aggSize = 1;
171 }
172
173 if (ordering == O_GRAPH) {
174 // Add candidates to the list of nodes
175 // NOTE: the code have slightly different meanings depending on context:
176 // - if aggregate was accepted, we add neighbors of neighbors of the original candidate
177 // - if aggregate was not accepted, we add neighbors of the original candidate
178 for (size_t k = 0; k < aggSize; k++) {
179 auto neighOfJNode = graph.getNeighborVertices(aggList[k]);
180
181 for (int j = 0; j < neighOfJNode.length; j++) {
182 LO neigh = neighOfJNode(j);
183
184 if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY)
185 graphOrderQueue.push(neigh);
186 }
187 }
188 }
189 }
190
191 // Reset all NOTSEL vertices to READY
192 // This simplifies other algorithms
193 for (LO i = 0; i < numRows; i++)
194 if (aggStat[i] == NOTSEL)
195 aggStat[i] = READY;
196
197 // update aggregate object
198 aggregates.SetNumAggregates(numLocalAggregates);
199}
200
201template <class LocalOrdinal, class GlobalOrdinal, class Node>
203 // TODO: replace int
204 int n = list.size();
205 for (int i = 0; i < n - 1; i++)
206 std::swap(list[i], list[RandomOrdinal(i, n - 1)]);
207}
208
209template <class LocalOrdinal, class GlobalOrdinal, class Node>
211 return min + as<int>((max - min + 1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
212}
213
214template <class LocalOrdinal, class GlobalOrdinal, class Node>
216 BuildAggregates(const Teuchos::ParameterList& params,
217 const LWGraph_kokkos& graph,
218 Aggregates& aggregates,
220 LO& numNonAggregatedNodes) const {
221 int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
222 int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
223
224 TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate,
226 "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
227
228 // Distance-2 gives less control than serial uncoupled phase 1
229 // no custom row reordering because would require making deep copy
230 // of local matrix entries and permuting it can only enforce
231 // max aggregate size
232 {
233 if (params.get<bool>("aggregation: deterministic")) {
234 Monitor m(*this, "BuildAggregatesDeterministic");
235 BuildAggregatesDeterministic(maxNodesPerAggregate, graph,
236 aggregates, aggStat, numNonAggregatedNodes);
237 } else {
238 Monitor m(*this, "BuildAggregatesRandom");
239 BuildAggregatesRandom(maxNodesPerAggregate, graph,
240 aggregates, aggStat, numNonAggregatedNodes);
241 }
242 }
243}
244
245template <class LocalOrdinal, class GlobalOrdinal, class Node>
247 BuildAggregatesRandom(const LO maxAggSize,
248 const LWGraph_kokkos& graph,
249 Aggregates& aggregates,
251 LO& numNonAggregatedNodes) const {
252 using device_type = typename LWGraph_kokkos::device_type;
253 using execution_space = typename LWGraph_kokkos::execution_space;
254
255 const LO numRows = graph.GetNodeNumVertices();
256 const int myRank = graph.GetComm()->getRank();
257
258 // Extract data from aggregates
259 auto vertex2AggId = aggregates.GetVertex2AggId()->getLocalViewDevice(Tpetra::Access::ReadWrite);
260 auto procWinner = aggregates.GetProcWinner()->getLocalViewDevice(Tpetra::Access::ReadWrite);
261 auto colors = aggregates.GetGraphColors();
262
263 auto lclLWGraph = graph;
264
265 LO numAggregatedNodes = 0;
266 LO numLocalAggregates = aggregates.GetNumAggregates();
267 Kokkos::View<LO, device_type> aggCount("aggCount");
268 Kokkos::deep_copy(aggCount, numLocalAggregates);
269 Kokkos::parallel_for(
270 "Aggregation Phase 1: initial reduction over color == 1",
271 Kokkos::RangePolicy<LO, execution_space>(0, numRows),
272 KOKKOS_LAMBDA(const LO nodeIdx) {
273 if (colors(nodeIdx) == 1 && aggStat(nodeIdx) == READY) {
274 const LO aggIdx = Kokkos::atomic_fetch_add(&aggCount(), 1);
275 vertex2AggId(nodeIdx, 0) = aggIdx;
276 aggStat(nodeIdx) = AGGREGATED;
277 procWinner(nodeIdx, 0) = myRank;
278 }
279 });
280 // Truely we wish to compute: numAggregatedNodes = aggCount - numLocalAggregates
281 // before updating the value of numLocalAggregates.
282 // But since we also do not want to create a host mirror of aggCount we do some trickery...
283 numAggregatedNodes -= numLocalAggregates;
284 Kokkos::deep_copy(numLocalAggregates, aggCount);
285 numAggregatedNodes += numLocalAggregates;
286
287 // Compute the initial size of the aggregates.
288 // Note lbv 12-21-17: I am pretty sure that the aggregates will always be of size 1
289 // at this point so we could simplify the code below a lot if this
290 // assumption is correct...
291 Kokkos::View<LO*, device_type> aggSizesView("aggSizes", numLocalAggregates);
292 {
293 // Here there is a possibility that two vertices assigned to two different threads contribute
294 // to the same aggregate if somethings happened before phase 1?
295 auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView);
296 Kokkos::parallel_for(
297 "Aggregation Phase 1: compute initial aggregates size",
298 Kokkos::RangePolicy<LO, execution_space>(0, numRows),
299 KOKKOS_LAMBDA(const LO nodeIdx) {
300 auto aggSizesScatterViewAccess = aggSizesScatterView.access();
301 if (vertex2AggId(nodeIdx, 0) >= 0)
302 aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1;
303 });
304 Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView);
305 }
306
307 LO tmpNumAggregatedNodes = 0;
308 Kokkos::parallel_reduce(
309 "Aggregation Phase 1: main parallel_reduce over aggSizes",
310 Kokkos::RangePolicy<size_t, execution_space>(0, numRows),
311 KOKKOS_LAMBDA(const size_t nodeIdx, LO& lNumAggregatedNodes) {
312 if (colors(nodeIdx) != 1 && (aggStat(nodeIdx) == READY || aggStat(nodeIdx) == NOTSEL)) {
313 // Get neighbors of vertex i and look for local, aggregated,
314 // color 1 neighbor (valid root).
315 auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx);
316 for (LO j = 0; j < neighbors.length; ++j) {
317 auto nei = neighbors.colidx(j);
318 if (lclLWGraph.isLocalNeighborVertex(nei) && colors(nei) == 1 && aggStat(nei) == AGGREGATED) {
319 // This atomic guarentees that any other node trying to
320 // join aggregate agg has the correct size.
321 LO agg = vertex2AggId(nei, 0);
322 const LO aggSize = Kokkos::atomic_fetch_add(&aggSizesView(agg),
323 1);
324 if (aggSize < maxAggSize) {
325 // assign vertex i to aggregate with root j
326 vertex2AggId(nodeIdx, 0) = agg;
327 procWinner(nodeIdx, 0) = myRank;
328 aggStat(nodeIdx) = AGGREGATED;
329 ++lNumAggregatedNodes;
330 break;
331 } else {
332 // Decrement back the value of aggSizesView(agg)
333 Kokkos::atomic_dec(&aggSizesView(agg));
334 }
335 }
336 }
337 }
338 // if(aggStat(nodeIdx) != AGGREGATED) {
339 // lNumNonAggregatedNodes++;
340 if (aggStat(nodeIdx) == NOTSEL) {
341 aggStat(nodeIdx) = READY;
342 }
343 // }
344 },
345 tmpNumAggregatedNodes);
346 numAggregatedNodes += tmpNumAggregatedNodes;
347 numNonAggregatedNodes -= numAggregatedNodes;
348
349 // update aggregate object
350 aggregates.SetNumAggregates(numLocalAggregates);
351}
352
353template <class LocalOrdinal, class GlobalOrdinal, class Node>
355 BuildAggregatesDeterministic(const LO maxAggSize,
356 const LWGraph_kokkos& graph,
357 Aggregates& aggregates,
359 LO& numNonAggregatedNodes) const {
360 using device_type = typename LWGraph_kokkos::device_type;
361 using execution_space = typename LWGraph_kokkos::execution_space;
362
363 const LO numRows = graph.GetNodeNumVertices();
364 const int myRank = graph.GetComm()->getRank();
365
366 auto vertex2AggId = aggregates.GetVertex2AggId()->getLocalViewDevice(Tpetra::Access::ReadWrite);
367 auto procWinner = aggregates.GetProcWinner()->getLocalViewDevice(Tpetra::Access::ReadWrite);
368 auto colors = aggregates.GetGraphColors();
369
370 auto lclLWGraph = graph;
371
372 LO numLocalAggregates = aggregates.GetNumAggregates();
373 Kokkos::View<LO, device_type> numLocalAggregatesView("Num aggregates");
374 {
375 auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView);
376 h_nla() = numLocalAggregates;
377 Kokkos::deep_copy(numLocalAggregatesView, h_nla);
378 }
379
380 Kokkos::View<LO*, device_type> newRoots("New root LIDs", numNonAggregatedNodes);
381 Kokkos::View<LO, device_type> numNewRoots("Number of new aggregates of current color");
382 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
383
384 // first loop build the set of new roots
385 Kokkos::parallel_for(
386 "Aggregation Phase 1: building list of new roots",
387 Kokkos::RangePolicy<execution_space>(0, numRows),
388 KOKKOS_LAMBDA(const LO i) {
389 if (colors(i) == 1 && aggStat(i) == READY) {
390 // i will become a root
391 newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i;
392 }
393 });
394 Kokkos::deep_copy(h_numNewRoots, numNewRoots);
395 // sort new roots by LID to guarantee determinism in agg IDs
396 Kokkos::sort(newRoots, 0, h_numNewRoots());
397 LO numAggregated = 0;
398 Kokkos::parallel_reduce(
399 "Aggregation Phase 1: aggregating nodes",
400 Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
401 KOKKOS_LAMBDA(const LO rootIndex, LO& lnumAggregated) {
402 LO root = newRoots(rootIndex);
403 LO aggID = numLocalAggregatesView() + rootIndex;
404 LO aggSize = 1;
405 vertex2AggId(root, 0) = aggID;
406 procWinner(root, 0) = myRank;
407 aggStat(root) = AGGREGATED;
408 auto neighOfRoot = lclLWGraph.getNeighborVertices(root);
409 for (LO n = 0; n < neighOfRoot.length; n++) {
410 LO neigh = neighOfRoot(n);
411 if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) == READY) {
412 // add neigh to aggregate
413 vertex2AggId(neigh, 0) = aggID;
414 procWinner(neigh, 0) = myRank;
415 aggStat(neigh) = AGGREGATED;
416 aggSize++;
417 if (aggSize == maxAggSize) {
418 // can't add any more nodes
419 break;
420 }
421 }
422 }
423 lnumAggregated += aggSize;
424 },
425 numAggregated);
426 numNonAggregatedNodes -= numAggregated;
427 // update aggregate object
428 aggregates.SetNumAggregates(numLocalAggregates + h_numNewRoots());
429}
430
431} // namespace MueLu
432
433#endif /* MUELU_AGGREGATIONPHASE1ALGORITHM_DEF_HPP_ */
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
void SetIsRoot(LO i, bool value=true)
Set root node information.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType
void RandomReorder(ArrayRCP< LO > list) const
Utility to take a list of integers and reorder them randomly (by using a local permutation).
void BuildAggregates(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregatesNonKokkos(const ParameterList &params, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
void BuildAggregatesDeterministic(const LO maxAggSize, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
int RandomOrdinal(int min, int max) const
Generate a random number in the range [min, max].
void BuildAggregatesRandom(const LO maxAggSize, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
Exception throws to report errors in the internal logical of the program.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id 'v' is on current process.
const RCP< const Teuchos::Comm< int > > GetComm() const
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex 'v'.
Lightweight MueLu representation of a compressed row storage graph.
Lightweight MueLu representation of a compressed row storage graph.
Timer to be used in non-factories.
Namespace for MueLu classes and methods.