MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_Aggregates_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_AGGREGATES_DEF_HPP
11#define MUELU_AGGREGATES_DEF_HPP
12
13#include <Xpetra_Map.hpp>
14#include <Xpetra_Vector.hpp>
15#include <Xpetra_MultiVectorFactory.hpp>
16#include <Xpetra_VectorFactory.hpp>
17
18#include "MueLu_LWGraph_kokkos.hpp"
19
20#include "MueLu_LWGraph.hpp"
23
24namespace MueLu {
25
26template <class LocalOrdinal, class GlobalOrdinal, class Node>
28 numAggregates_ = 0;
29 numGlobalAggregates_ = 0;
30
31 vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1);
32 vertex2AggId_->putScalar(MUELU_UNAGGREGATED);
33
34 procWinner_ = LOVectorFactory::Build(graph.GetImportMap());
35 procWinner_->putScalar(MUELU_UNASSIGNED);
36
37 isRoot_ = Teuchos::ArrayRCP<bool>(graph.GetImportMap()->getLocalNumElements(), false);
38
39 // slow but safe, force TentativePFactory to build column map for P itself
40 aggregatesIncludeGhosts_ = true;
41}
42
43template <class LocalOrdinal, class GlobalOrdinal, class Node>
46 numAggregates_ = 0;
47 numGlobalAggregates_ = 0;
48
49 vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1);
50 vertex2AggId_->putScalar(MUELU_UNAGGREGATED);
51
52 procWinner_ = LOVectorFactory::Build(graph.GetImportMap());
53 procWinner_->putScalar(MUELU_UNASSIGNED);
54
55 isRoot_ = Teuchos::ArrayRCP<bool>(graph.GetImportMap()->getLocalNumElements(), false);
56
57 // slow but safe, force TentativePFactory to build column map for P itself
58 aggregatesIncludeGhosts_ = true;
59}
60
61template <class LocalOrdinal, class GlobalOrdinal, class Node>
63 Aggregates(const RCP<const Map>& map) {
64 numAggregates_ = 0;
65 numGlobalAggregates_ = 0;
66
67 vertex2AggId_ = LOMultiVectorFactory::Build(map, 1);
68 vertex2AggId_->putScalar(MUELU_UNAGGREGATED);
69
70 procWinner_ = LOVectorFactory::Build(map);
71 procWinner_->putScalar(MUELU_UNASSIGNED);
72
73 isRoot_ = Teuchos::ArrayRCP<bool>(map->getLocalNumElements(), false);
74
75 // slow but safe, force TentativePFactory to build column map for P itself
76 aggregatesIncludeGhosts_ = true;
77}
78
79template <class LocalOrdinal, class GlobalOrdinal, class Node>
82 if (aggregateSizes_.size() && !forceRecompute) {
83 return aggregateSizes_;
84
85 } else {
86 // It is necessary to initialize this to 0
87 aggregates_sizes_type aggregateSizes("aggregates", numAggregates_);
88
89 int myPID = GetMap()->getComm()->getRank();
90
91 auto vertex2AggId = vertex2AggId_->getLocalViewDevice(Tpetra::Access::ReadOnly);
92 auto procWinner = procWinner_->getLocalViewDevice(Tpetra::Access::ReadOnly);
93
94 typename AppendTrait<decltype(aggregateSizes_), Kokkos::Atomic>::type aggregateSizesAtomic = aggregateSizes;
95 Kokkos::parallel_for(
96 "MueLu:Aggregates:ComputeAggregateSizes:for", range_type(0, procWinner.size()),
97 KOKKOS_LAMBDA(const LO i) {
98 if (procWinner(i, 0) == myPID)
99 aggregateSizesAtomic(vertex2AggId(i, 0))++;
100 });
101
102 aggregateSizes_ = aggregateSizes;
103
104 return aggregateSizes;
105 }
106}
107
108template <class LocalOrdinal, class GlobalOrdinal, class Node>
109typename Teuchos::ArrayRCP<LocalOrdinal>
111 ComputeAggregateSizesArrayRCP(bool forceRecompute) const {
112 auto aggregateSizes = this->ComputeAggregateSizes(forceRecompute);
113
114 // if this is the first time this is called, setup the host mirror and fill it
115 if (!aggregateSizesHost_.is_allocated()) {
116 aggregateSizesHost_ = Kokkos::create_mirror_view(aggregateSizes);
117 Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes);
118 } else {
119 // otherwise, only update if we forced a recompute
120 if (forceRecompute)
121 Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes);
122 }
123
124 // put the data in an ArrayRCP, but do not give it ownership of the data
125 Teuchos::ArrayRCP<LocalOrdinal> aggregateSizesArrayRCP(aggregateSizesHost_.data(), 0, aggregateSizesHost_.extent(0), false);
126
127 return aggregateSizesArrayRCP;
128}
129
130template <class LocalOrdinal, class GlobalOrdinal, class Node>
133 using row_map_type = typename local_graph_type::row_map_type;
134 using entries_type = typename local_graph_type::entries_type;
135 using size_type = typename local_graph_type::size_type;
136
137 auto numAggregates = numAggregates_;
138
139 if (static_cast<LO>(graph_.numRows()) == numAggregates)
140 return graph_;
141
142 auto vertex2AggId = vertex2AggId_->getLocalViewDevice(Tpetra::Access::ReadOnly);
143 auto procWinner = procWinner_->getLocalViewDevice(Tpetra::Access::ReadOnly);
144 auto sizes = ComputeAggregateSizes();
145
146 // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + rows(0) = 0.
147 typename row_map_type::non_const_type rows("Agg_rows", numAggregates + 1); // rows(0) = 0 automatically
148
149 // parallel_scan (exclusive)
150 Kokkos::parallel_scan(
151 "MueLu:Aggregates:GetGraph:compute_rows", range_type(0, numAggregates),
152 KOKKOS_LAMBDA(const LO i, LO& update, const bool& final_pass) {
153 update += sizes(i);
154 if (final_pass)
155 rows(i + 1) = update;
156 });
157
158 decltype(rows) offsets(Kokkos::ViewAllocateWithoutInitializing("Agg_offsets"), numAggregates + 1); // +1 is just for ease
159 Kokkos::deep_copy(offsets, rows);
160
161 int myPID = GetMap()->getComm()->getRank();
162
163 size_type numNNZ;
164 {
165 Kokkos::View<size_type, device_type> numNNZ_device = Kokkos::subview(rows, numAggregates);
166 typename Kokkos::View<size_type, device_type>::host_mirror_type numNNZ_host = Kokkos::create_mirror_view(numNNZ_device);
167 Kokkos::deep_copy(numNNZ_host, numNNZ_device);
168 numNNZ = numNNZ_host();
169 }
170 typename entries_type::non_const_type cols(Kokkos::ViewAllocateWithoutInitializing("Agg_cols"), numNNZ);
171 size_t realnnz = 0;
172 Kokkos::parallel_reduce(
173 "MueLu:Aggregates:GetGraph:compute_cols", range_type(0, procWinner.size()),
174 KOKKOS_LAMBDA(const LO i, size_t& nnz) {
175 if (procWinner(i, 0) == myPID) {
176 typedef typename std::remove_reference<decltype(offsets(0))>::type atomic_incr_type;
177 auto idx = Kokkos::atomic_fetch_add(&offsets(vertex2AggId(i, 0)), atomic_incr_type(1));
178 cols(idx) = i;
179 nnz++;
180 }
181 },
182 realnnz);
183 TEUCHOS_TEST_FOR_EXCEPTION(realnnz != numNNZ, Exceptions::RuntimeError,
184 "MueLu: Internal error: Something is wrong with aggregates graph construction: numNNZ = " << numNNZ << " != " << realnnz << " = realnnz");
185
186 graph_ = local_graph_type(cols, rows);
187
188 return graph_;
189}
190
191template <class LocalOrdinal, class GlobalOrdinal, class Node>
193 LO numAggs = GetNumAggregates();
194 LO numNodes = vertex2AggId_->getLocalLength();
195 auto vertex2AggId = vertex2AggId_->getLocalViewDevice(Tpetra::Access::ReadOnly);
196 typename aggregates_sizes_type::const_type aggSizes = ComputeAggregateSizes(true);
197 LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
198
199 aggPtr = LO_view("aggPtr", numAggs + 1);
200 aggNodes = LO_view("aggNodes", numNodes);
201 LO_view aggCurr("agg curr", numAggs + 1);
202
203 // Construct the "rowptr" and the counter
204 Kokkos::parallel_scan(
205 "MueLu:Aggregates:ComputeNodesInAggregate:scan", range_type(0, numAggs + 1),
206 KOKKOS_LAMBDA(const LO aggIdx, LO& aggOffset, bool final_pass) {
207 LO count = 0;
208 if (aggIdx < numAggs)
209 count = aggSizes(aggIdx);
210 if (final_pass) {
211 aggPtr(aggIdx) = aggOffset;
212 aggCurr(aggIdx) = aggOffset;
213 if (aggIdx == numAggs)
214 aggCurr(numAggs) = 0; // use this for counting unaggregated nodes
215 }
216 aggOffset += count;
217 });
218
219 // Preallocate unaggregated to the correct size
220 LO numUnaggregated = 0;
221 Kokkos::parallel_reduce(
222 "MueLu:Aggregates:ComputeNodesInAggregate:unaggregatedSize", range_type(0, numNodes),
223 KOKKOS_LAMBDA(const LO nodeIdx, LO& count) {
224 if (vertex2AggId(nodeIdx, 0) == INVALID)
225 count++;
226 },
227 numUnaggregated);
228 unaggregated = LO_view("unaggregated", numUnaggregated);
229
230 // Stick the nodes in each aggregate's spot
231 Kokkos::parallel_for(
232 "MueLu:Aggregates:ComputeNodesInAggregate:for", range_type(0, numNodes),
233 KOKKOS_LAMBDA(const LO nodeIdx) {
234 LO aggIdx = vertex2AggId(nodeIdx, 0);
235 if (aggIdx != INVALID) {
236 // atomic postincrement aggCurr(aggIdx) each time
237 aggNodes(Kokkos::atomic_fetch_add(&aggCurr(aggIdx), 1)) = nodeIdx;
238 } else {
239 // same, but using last entry of aggCurr for unaggregated nodes
240 unaggregated(Kokkos::atomic_fetch_add(&aggCurr(numAggs), 1)) = nodeIdx;
241 }
242 });
243}
244
245template <class LocalOrdinal, class GlobalOrdinal, class Node>
247 if (numGlobalAggregates_ == -1)
248 return BaseClass::description() + "{nGlobalAggregates = not computed}";
249 else
250 return BaseClass::description() + "{nGlobalAggregates = " + toString(numGlobalAggregates_) + "}";
251}
252
253template <class LocalOrdinal, class GlobalOrdinal, class Node>
254void Aggregates<LocalOrdinal, GlobalOrdinal, Node>::print(Teuchos::FancyOStream& out, const VerbLevel verbLevel) const {
256
257 if (verbLevel & Statistics1) {
258 if (numGlobalAggregates_ == -1)
259 out0 << "Global number of aggregates: not computed " << std::endl;
260 else
261 out0 << "Global number of aggregates: " << numGlobalAggregates_ << std::endl;
262 }
263}
264
265template <class LocalOrdinal, class GlobalOrdinal, class Node>
267 if (numGlobalAggregates_ != -1) {
268 LO nAggregates = GetNumAggregates();
269 GO nGlobalAggregates;
270 MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (GO)nAggregates, nGlobalAggregates);
271 SetNumGlobalAggregates(nGlobalAggregates);
272 }
273 return numGlobalAggregates_;
274}
275
276template <class LocalOrdinal, class GlobalOrdinal, class Node>
277const RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
279 return vertex2AggId_->getMap();
280}
281
282} // namespace MueLu
283
284#endif // MUELU_AGGREGATES_DEF_HPP
#define MUELU_UNAGGREGATED
#define MUELU_UNASSIGNED
#define MUELU_DESCRIBE
Helper macro for implementing Describable::describe() for BaseClass objects.
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Container class for aggregation information.
typename LWGraph_kokkos::local_graph_type local_graph_type
void ComputeNodesInAggregate(LO_view &aggPtr, LO_view &aggNodes, LO_view &unaggregated) const
Generates a compressed list of nodes in each aggregate, where the entries in aggNodes[aggPtr[i]] up t...
Teuchos::ArrayRCP< LocalOrdinal > ComputeAggregateSizesArrayRCP(bool forceRecompute=false) const
Compute sizes of aggregates.
void print(Teuchos::FancyOStream &out, const VerbLevel verbLevel=verbLevel_default) const
Print the object with some verbosity level to an FancyOStream object.
aggregates_sizes_type::const_type ComputeAggregateSizes(bool forceRecompute=false) const
Compute sizes of aggregates.
local_graph_type GetGraph() const
Aggregates(const LWGraph &graph)
Standard constructor for Aggregates structure.
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
const RCP< const Map > GetMap() const
returns (overlapping) map of aggregate/node distribution
GO GetNumGlobalAggregatesComputeIfNeeded()
Get global number of aggregates.
std::string description() const
Return a simple one-line description of this object.
Kokkos::View< LocalOrdinal *, device_type > aggregates_sizes_type
Kokkos::View< local_ordinal_type *, device_type > LO_view
virtual std::string description() const
Return a simple one-line description of this object.
Exception throws to report errors in the internal logical of the program.
const RCP< const Map > GetImportMap() const
Return overlapping import map (nodes).
Lightweight MueLu representation of a compressed row storage graph.
Lightweight MueLu representation of a compressed row storage graph.
Namespace for MueLu classes and methods.
@ Statistics1
Print more statistics.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.