MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_AggregationPhase2bAlgorithm_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
11#define MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
12
13#include <Teuchos_Comm.hpp>
14#include <Teuchos_CommHelpers.hpp>
15
16#include <Xpetra_Vector.hpp>
17
19
20#include "MueLu_Aggregates.hpp"
21#include "MueLu_Exceptions.hpp"
22#include "MueLu_LWGraph.hpp"
23#include "MueLu_Monitor.hpp"
24
25namespace MueLu {
26
27// Try to stick unaggregated nodes into a neighboring aggregate if they are
28// not already too big
29template <class LocalOrdinal, class GlobalOrdinal, class Node>
31 Monitor m(*this, "BuildAggregatesNonKokkos");
32 bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2b");
33
34 const LO numRows = graph.GetNodeNumVertices();
35 const int myRank = graph.GetComm()->getRank();
36
37 ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
38 ArrayRCP<LO> procWinner = aggregates.GetProcWinner()->getDataNonConst(0);
39
40 LO numLocalAggregates = aggregates.GetNumAggregates();
41
42 const LO defaultConnectWeight = 100;
43 const LO penaltyConnectWeight = 10;
44
45 std::vector<LO> aggWeight(numLocalAggregates, 0);
46 std::vector<LO> connectWeight(numRows, defaultConnectWeight);
47 std::vector<LO> aggPenalties(numRows, 0);
48
49 // We do this cycle twice.
50 // I don't know why, but ML does it too
51 // taw: by running the aggregation routine more than once there is a chance that also
52 // non-aggregated nodes with a node distance of two are added to existing aggregates.
53 // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
54 // should be sufficient.
55 for (int k = 0; k < 2; k++) {
56 for (LO i = 0; i < numRows; i++) {
57 if (aggStat[i] != READY)
58 continue;
59
60 auto neighOfINode = graph.getNeighborVertices(i);
61
62 for (int j = 0; j < neighOfINode.length; j++) {
63 LO neigh = neighOfINode(j);
64
65 // We don't check (neigh != i), as it is covered by checking (aggStat[neigh] == AGGREGATED)
66 if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED)
67 aggWeight[vertex2AggId[neigh]] += connectWeight[neigh];
68 }
69
70 int bestScore = -100000;
71 int bestAggId = -1;
72 int bestConnect = -1;
73
74 for (int j = 0; j < neighOfINode.length; j++) {
75 LO neigh = neighOfINode(j);
76 int aggId = vertex2AggId[neigh];
77
78 // Note: The third condition is only relevant if the ML matching is enabled
79 if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED && (!matchMLbehavior || aggWeight[aggId] != 0)) {
80 int score = aggWeight[aggId] - aggPenalties[aggId];
81
82 if (score > bestScore) {
83 bestAggId = aggId;
84 bestScore = score;
85 bestConnect = connectWeight[neigh];
86
87 } else if (aggId == bestAggId && connectWeight[neigh] > bestConnect) {
88 bestConnect = connectWeight[neigh];
89 }
90
91 // Reset the weights for the next loop
92 aggWeight[aggId] = 0;
93 }
94 }
95
96 if (bestScore >= 0) {
97 aggStat[i] = AGGREGATED;
98 vertex2AggId[i] = bestAggId;
99 procWinner[i] = myRank;
100
101 numNonAggregatedNodes--;
102
103 aggPenalties[bestAggId]++;
104 connectWeight[i] = bestConnect - penaltyConnectWeight;
105 }
106 }
107 }
108}
109
110// Try to stick unaggregated nodes into a neighboring aggregate if they are
111// not already too big
112template <class LocalOrdinal, class GlobalOrdinal, class Node>
114 BuildAggregates(const ParameterList& params,
115 const LWGraph_kokkos& graph,
116 Aggregates& aggregates,
118 LO& numNonAggregatedNodes) const {
119 if (params.get<bool>("aggregation: deterministic")) {
120 Monitor m(*this, "BuildAggregatesDeterministic");
121 BuildAggregates<true>(params, graph, aggregates, aggStat, numNonAggregatedNodes);
122 } else {
123 Monitor m(*this, "BuildAggregatesRandom");
124 BuildAggregates<false>(params, graph, aggregates, aggStat, numNonAggregatedNodes);
125 }
126
127} // BuildAggregates
128
129template <class AggStatType, class ColorsType, class LO>
131 private:
132 AggStatType aggStat;
133 ColorsType colors;
134
135 public:
136 using value_type = LO[];
138
139 CountUnggregatedByColor(AggStatType& aggStat_, ColorsType& colors_, LO numColors_)
140 : aggStat(aggStat_)
141 , colors(colors_)
142 , value_count(numColors_) {}
143
144 KOKKOS_INLINE_FUNCTION
145 void operator()(const LO i, LO counts[]) const {
146 if (aggStat(i) == READY)
147 counts[colors(i) - 1] += 1;
148 }
149};
150
151template <class AggStatType, class ProcWinnerType, class Vertex2AggType, class ColorsType, class LocalGraphType, class AggPenaltyType, class LO, bool deterministic, bool matchMLbehavior>
153 private:
154 AggStatType aggStat;
155 ProcWinnerType procWinner;
156 Vertex2AggType vertex2AggId;
157 ColorsType colors;
158 LocalGraphType lclLWGraph;
159 AggPenaltyType aggPenalties;
160 AggPenaltyType aggPenaltyUpdates;
161 AggPenaltyType connectWeight;
165
166 public:
167 ExpansionFunctor(AggStatType& aggStat_, ProcWinnerType& procWinner_, Vertex2AggType& vertex2AggId_, ColorsType& colors_, LocalGraphType& lclLWGraph_, AggPenaltyType& aggPenalties_, AggPenaltyType& aggPenaltyUpdates_, AggPenaltyType& connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
168 : aggStat(aggStat_)
169 , procWinner(procWinner_)
170 , vertex2AggId(vertex2AggId_)
171 , colors(colors_)
172 , lclLWGraph(lclLWGraph_)
173 , aggPenalties(aggPenalties_)
174 , aggPenaltyUpdates(aggPenaltyUpdates_)
175 , connectWeight(connectWeight_)
176 , penaltyConnectWeight(penaltyConnectWeight_)
177 , color(color_)
178 , myRank(rank_) {}
179
180 ExpansionFunctor(AggStatType& aggStat_, ProcWinnerType& procWinner_, Vertex2AggType& vertex2AggId_, ColorsType& colors_, LocalGraphType& lclLWGraph_, AggPenaltyType& aggPenalties_, AggPenaltyType& connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
181 : aggStat(aggStat_)
182 , procWinner(procWinner_)
183 , vertex2AggId(vertex2AggId_)
184 , colors(colors_)
185 , lclLWGraph(lclLWGraph_)
186 , aggPenalties(aggPenalties_)
187 , connectWeight(connectWeight_)
188 , penaltyConnectWeight(penaltyConnectWeight_)
189 , color(color_)
190 , myRank(rank_) {}
191
192 KOKKOS_INLINE_FUNCTION
193 void operator()(const LO& i, LO& tmpNumAggregated) const {
194 if (aggStat(i) != READY || colors(i) != color)
195 return;
196
197 int bestScore = -100000;
198 int bestAggId = -1;
199 int bestConnect = -1;
200
201 auto neighOfINode = lclLWGraph.getNeighborVertices(i);
202
203 for (int j = 0; j < neighOfINode.length; j++) {
204 LO neigh = neighOfINode(j);
205
206 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
207 (aggStat(neigh) == AGGREGATED)) {
208 auto aggId = vertex2AggId(neigh, 0);
209 LO aggWeight = 0;
210 for (int k = 0; k < neighOfINode.length; k++) {
211 LO neigh2 = neighOfINode(k);
212 if (lclLWGraph.isLocalNeighborVertex(neigh2) &&
213 (aggStat(neigh2) == AGGREGATED) &&
214 (vertex2AggId(neigh2, 0) == aggId))
215 aggWeight += connectWeight(neigh2);
216 }
217
218 if (matchMLbehavior && (aggWeight == 0))
219 return;
220
221 int score = aggWeight - aggPenalties(aggId);
222
223 if (score > bestScore) {
224 bestAggId = aggId;
225 bestScore = score;
226 bestConnect = connectWeight(neigh);
227
228 } else if (aggId == bestAggId &&
229 connectWeight(neigh) > bestConnect) {
230 bestConnect = connectWeight(neigh);
231 }
232 }
233 }
234 if (bestScore >= 0) {
235 aggStat(i) = AGGREGATED;
236 vertex2AggId(i, 0) = bestAggId;
237 procWinner(i, 0) = myRank;
238
239 if constexpr (deterministic) {
240 Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
241 } else {
242 Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
243 }
244 connectWeight(i) = bestConnect - penaltyConnectWeight;
245 tmpNumAggregated++;
246 }
247 }
248};
249
250template <class LocalOrdinal, class GlobalOrdinal, class Node>
251template <bool deterministic>
253 BuildAggregates(const ParameterList& params,
254 const LWGraph_kokkos graph,
255 Aggregates& aggregates,
257 LO& numNonAggregatedNodes) const {
258 using device_type = typename LWGraph_kokkos::device_type;
259 using execution_space = typename LWGraph_kokkos::execution_space;
260
261 bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2b");
262
263 const LO numRows = graph.GetNodeNumVertices();
264 const int myRank = graph.GetComm()->getRank();
265
266 auto vertex2AggId = aggregates.GetVertex2AggId()->getLocalViewDevice(Tpetra::Access::ReadWrite);
267 auto procWinner = aggregates.GetProcWinner()->getLocalViewDevice(Tpetra::Access::ReadWrite);
268 auto colors = aggregates.GetGraphColors();
269 const LO numColors = aggregates.GetGraphNumColors();
270 const LO numLocalAggregates = aggregates.GetNumAggregates();
271
272 const LO defaultConnectWeight = 100;
273 const LO penaltyConnectWeight = 10;
274
275 Kokkos::View<LO*, device_type> connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows);
276 Kokkos::View<LO*, device_type> aggPenalties("aggPenalties", numLocalAggregates); // This gets initialized to zero here
277 Kokkos::View<LO*, device_type> aggPenaltyUpdates;
278 // if constexpr (deterministic)
279 aggPenaltyUpdates = Kokkos::View<LO*, device_type>("aggPenaltyUpdates", numLocalAggregates);
280
281 Kokkos::deep_copy(connectWeight, defaultConnectWeight);
282
283 // taw: by running the aggregation routine more than once there is a chance that also
284 // non-aggregated nodes with a node distance of two are added to existing aggregates.
285 // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
286 // should be sufficient.
287 // lbv: If the prior phase of aggregation where run without specifying an aggregate size,
288 // the distance 2 coloring and phase 1 aggregation actually guarantee that only one iteration
289 // is needed to reach distance 2 neighbors.
290 int maxIters = 2;
291 int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
292 if (maxNodesPerAggregate == std::numeric_limits<int>::max()) {
293 maxIters = 1;
294 }
295
296 Kokkos::View<LO*, Kokkos::HostSpace> numUnaggregatedNodesPerColor("numUnaggregatedNodesPerColor", numColors);
297 for (int iter = 0; iter < maxIters; ++iter) {
298 {
299 auto functor = CountUnggregatedByColor<decltype(aggStat), decltype(colors), LO>(aggStat, colors, numColors);
300 Kokkos::parallel_reduce(
301 "Aggregation Phase 2b: count unaggregated nodes per color",
302 Kokkos::RangePolicy<execution_space>(0, numRows),
303 functor,
304 numUnaggregatedNodesPerColor);
305 }
306 for (LO color = 1; color <= numColors; ++color) {
307 if (numUnaggregatedNodesPerColor(color - 1) == 0)
308 continue;
309
310 // the reduce counts how many nodes are aggregated by this phase,
311 // which will then be subtracted from numNonAggregatedNodes
312 LO numAggregated = 0;
313
314 if constexpr (deterministic) {
315 if (matchMLbehavior) {
316 auto functor = ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, true, true>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, aggPenaltyUpdates, connectWeight, penaltyConnectWeight, color, myRank);
317
318 Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
319 Kokkos::RangePolicy<execution_space>(0, numRows),
320 functor,
321 numAggregated);
322 } else {
323 auto functor = ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, true, false>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, aggPenaltyUpdates, connectWeight, penaltyConnectWeight, color, myRank);
324
325 Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
326 Kokkos::RangePolicy<execution_space>(0, numRows),
327 functor,
328 numAggregated);
329 }
330 } else {
331 if (matchMLbehavior) {
332 auto functor = ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, false, true>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, connectWeight, penaltyConnectWeight, color, myRank);
333
334 Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
335 Kokkos::RangePolicy<execution_space>(0, numRows),
336 functor,
337 numAggregated);
338 } else {
339 auto functor = ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, false, false>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, connectWeight, penaltyConnectWeight, color, myRank);
340
341 Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
342 Kokkos::RangePolicy<execution_space>(0, numRows),
343 functor,
344 numAggregated);
345 }
346 }
347
348 numNonAggregatedNodes -= numAggregated;
349
350 if (numNonAggregatedNodes == 0)
351 break;
352
353 if constexpr (deterministic) {
354 Kokkos::parallel_for(
355 "Aggregation Phase 2b: updating agg penalties",
356 Kokkos::RangePolicy<execution_space>(0, numLocalAggregates),
357 KOKKOS_LAMBDA(const LO agg) {
358 aggPenalties(agg) += aggPenaltyUpdates(agg);
359 aggPenaltyUpdates(agg) = 0;
360 });
361 }
362 }
363 } // loop over maxIters
364
365} // BuildAggregates
366
367} // namespace MueLu
368
369#endif /* MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_ */
Container class for aggregation information.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType
void BuildAggregatesNonKokkos(const ParameterList &params, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
void BuildAggregates(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
KOKKOS_INLINE_FUNCTION void operator()(const LO i, LO counts[]) const
CountUnggregatedByColor(AggStatType &aggStat_, ColorsType &colors_, LO numColors_)
ExpansionFunctor(AggStatType &aggStat_, ProcWinnerType &procWinner_, Vertex2AggType &vertex2AggId_, ColorsType &colors_, LocalGraphType &lclLWGraph_, AggPenaltyType &aggPenalties_, AggPenaltyType &connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
KOKKOS_INLINE_FUNCTION void operator()(const LO &i, LO &tmpNumAggregated) const
ExpansionFunctor(AggStatType &aggStat_, ProcWinnerType &procWinner_, Vertex2AggType &vertex2AggId_, ColorsType &colors_, LocalGraphType &lclLWGraph_, AggPenaltyType &aggPenalties_, AggPenaltyType &aggPenaltyUpdates_, AggPenaltyType &connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id 'v' is on current process.
const RCP< const Teuchos::Comm< int > > GetComm() const
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex 'v'.
Lightweight MueLu representation of a compressed row storage graph.
Lightweight MueLu representation of a compressed row storage graph.
Timer to be used in non-factories.
Namespace for MueLu classes and methods.