MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_CoalesceDropFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
11#define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
12
13#include <Kokkos_Core.hpp>
14#include <KokkosSparse_CrsMatrix.hpp>
15#include <sstream>
16#include <string>
17#include <tuple>
18
19#include "Xpetra_Matrix.hpp"
20
22
23#include "MueLu_AmalgamationInfo.hpp"
24#include "MueLu_Exceptions.hpp"
25#include "MueLu_Level.hpp"
26#include "MueLu_LWGraph_kokkos.hpp"
27#include "MueLu_MasterList.hpp"
28#include "MueLu_Monitor.hpp"
29#include "MueLu_Utilities.hpp"
30
34
35// The different dropping algorithms are split up over several translation units. This speeds up compilation and also avoids launch latencies on GPU.
37#include "MueLu_ScalarDroppingClassical.hpp"
38#include "MueLu_ScalarDroppingDistanceLaplacian.hpp"
40#include "MueLu_VectorDroppingClassical.hpp"
41#include "MueLu_VectorDroppingDistanceLaplacian.hpp"
42
43namespace MueLu {
44
45template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
47 RCP<ParameterList> validParamList = rcp(new ParameterList());
48
49#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
50 SET_VALID_ENTRY("aggregation: use blocking");
51 SET_VALID_ENTRY("aggregation: drop tol");
52 SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
53 SET_VALID_ENTRY("aggregation: Dirichlet threshold");
54 SET_VALID_ENTRY("aggregation: greedy Dirichlet");
55 SET_VALID_ENTRY("aggregation: row sum drop tol");
56 SET_VALID_ENTRY("aggregation: strength-of-connection: matrix");
57 SET_VALID_ENTRY("aggregation: strength-of-connection: measure");
58 SET_VALID_ENTRY("aggregation: drop scheme");
59 SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
60 SET_VALID_ENTRY("aggregation: distance laplacian metric");
61 SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
62 SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
63#ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
64 SET_VALID_ENTRY("aggregation: distance laplacian algo");
65 SET_VALID_ENTRY("aggregation: classical algo");
66#endif
67 SET_VALID_ENTRY("aggregation: symmetrize graph after dropping");
68 SET_VALID_ENTRY("aggregation: coloring: use color graph");
69 SET_VALID_ENTRY("aggregation: coloring: localize color graph");
70
71 SET_VALID_ENTRY("filtered matrix: use lumping");
72 SET_VALID_ENTRY("filtered matrix: reuse graph");
73 SET_VALID_ENTRY("filtered matrix: reuse eigenvalue");
74
75 SET_VALID_ENTRY("filtered matrix: use root stencil");
76 SET_VALID_ENTRY("filtered matrix: lumping choice");
77 SET_VALID_ENTRY("filtered matrix: use spread lumping");
78 SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor");
79 SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap");
80 SET_VALID_ENTRY("filtered matrix: Dirichlet threshold");
81 SET_VALID_ENTRY("filtered matrix: count negative diagonals");
82
83#undef SET_VALID_ENTRY
84 validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
85#ifndef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
86 validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("point-wise", "cut-drop"))));
87#else
88 validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("point-wise", "cut-drop", "signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical", "signed classical distance laplacian", "signed classical sa distance laplacian"))));
89 validParamList->getEntry("aggregation: classical algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
90 validParamList->getEntry("aggregation: distance laplacian algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
91#endif
92 validParamList->getEntry("aggregation: strength-of-connection: matrix").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("A", "distance laplacian"))));
93 validParamList->getEntry("aggregation: strength-of-connection: measure").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("smoothed aggregation", "signed smoothed aggregation", "signed ruge-stueben", "unscaled"))));
94 validParamList->getEntry("aggregation: distance laplacian metric").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("unweighted", "material"))));
95
96 validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
97 validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
98 validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
99 validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNumber");
100 validParamList->set<RCP<const FactoryBase>>("Material", Teuchos::null, "Generating factory for Material");
101
102 return validParamList;
103}
104
105template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
107 Input(currentLevel, "A");
108 Input(currentLevel, "UnAmalgamationInfo");
109
110 const ParameterList& pL = GetParameterList();
111
112 std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
113 bool needCoords = (socUsesMatrix == "distance laplacian");
114#ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
115 std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
116 needCoords |= (droppingMethod.find("distance laplacian") != std::string::npos);
117#endif
118 if (needCoords) {
119 Input(currentLevel, "Coordinates");
120 std::string distLaplMetric = pL.get<std::string>("aggregation: distance laplacian metric");
121 if (distLaplMetric == "material")
122 Input(currentLevel, "Material");
123 }
124
125 bool useBlocking = pL.get<bool>("aggregation: use blocking");
126#ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
127 useBlocking |= (droppingMethod.find("block diagonal") != std::string::npos);
128#endif
129 if (useBlocking) {
130 Input(currentLevel, "BlockNumber");
131 }
132}
133
134template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
136 Build(Level& currentLevel) const {
137 auto A = Get<RCP<Matrix>>(currentLevel, "A");
138 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
139 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
140
141 std::tuple<GlobalOrdinal, boundary_nodes_type> results;
142 if (blkSize == 1)
143 results = BuildScalar(currentLevel);
144 else
145 results = BuildVector(currentLevel);
146
147 if (GetVerbLevel() & Statistics1) {
148 GlobalOrdinal numDropped = std::get<0>(results);
149 auto boundaryNodes = std::get<1>(results);
150
151 GO numLocalBoundaryNodes = 0;
152
153 Kokkos::parallel_reduce(
154 "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)),
155 KOKKOS_LAMBDA(const LO i, GO& n) {
156 if (boundaryNodes(i))
157 n++;
158 },
159 numLocalBoundaryNodes);
160
161 if (IsPrint(Statistics1)) {
162 auto comm = A->getRowMap()->getComm();
163
164 std::vector<GlobalOrdinal> localStats = {numLocalBoundaryNodes, numDropped};
165 std::vector<GlobalOrdinal> globalStats(2);
166 Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 2, localStats.data(), globalStats.data());
167
168 GO numGlobalTotal = A->getGlobalNumEntries();
169 GO numGlobalBoundaryNodes = globalStats[0];
170 GO numGlobalDropped = globalStats[1];
171
172 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
173 if (numGlobalTotal != 0) {
174 GetOStream(Statistics1) << "Number of dropped entries: "
175 << numGlobalDropped << "/" << numGlobalTotal
176 << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)" << std::endl;
177 }
178 }
179 }
180}
181
182template <class local_matrix_type, class boundary_nodes_view, class... Functors>
183void runBoundaryFunctors(local_matrix_type& lclA, boundary_nodes_view& boundaryNodes, Functors&... functors) {
184 using local_ordinal_type = typename local_matrix_type::ordinal_type;
185 using execution_space = typename local_matrix_type::execution_space;
186 using range_type = Kokkos::RangePolicy<local_ordinal_type, execution_space>;
187 auto range = range_type(0, boundaryNodes.extent(0));
188 auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, functors...);
189 Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries);
190}
191
192template <class magnitudeType>
193void translateOldAlgoParam(const Teuchos::ParameterList& pL, std::string& droppingMethod, bool& useBlocking, std::string& socUsesMatrix, std::string& socUsesMeasure, bool& symmetrizeDroppedGraph, bool& generateColoringGraph, magnitudeType& threshold, MueLu::MatrixConstruction::lumpingType& lumpingChoice) {
194 std::set<std::string> validDroppingMethods = {"piece-wise", "cut-drop"};
195
196 if (!pL.get<bool>("filtered matrix: use lumping")) lumpingChoice = MueLu::MatrixConstruction::no_lumping;
197
198 if (validDroppingMethods.find(droppingMethod) == validDroppingMethods.end()) {
199 std::string algo = droppingMethod;
200 std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
201 std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
202
203 // Remove prefix "block diagonal" from algo
204 if (algo.find("block diagonal") == 0) {
205 useBlocking = true;
206 algo = algo.substr(14);
207 if (algo != "") {
208 algo = algo.substr(1);
209 }
210 }
211
212 if ((algo == "classical") || (algo == "signed classical sa") || (algo == "signed classical") || (algo == "colored signed classical")) {
213 socUsesMatrix = "A";
214
215 if (algo == "classical") {
216 socUsesMeasure = "smoothed aggregation";
217 } else if (algo == "signed classical sa") {
218 socUsesMeasure = "signed smoothed aggregation";
219 } else if (algo == "signed classical") {
220 socUsesMeasure = "signed ruge-stueben";
221 } else if (algo == "colored signed classical") {
222 socUsesMeasure = "signed ruge-stueben";
223 generateColoringGraph = true;
224 }
225
226 if (classicalAlgoStr == "default")
227 droppingMethod = "point-wise";
228 else if (classicalAlgoStr == "unscaled cut") {
229 socUsesMeasure = "unscaled";
230 droppingMethod = "cut-drop";
231 } else if (classicalAlgoStr == "scaled cut") {
232 droppingMethod = "cut-drop";
233 } else if (classicalAlgoStr == "scaled cut symmetric") {
234 droppingMethod = "cut-drop";
235 symmetrizeDroppedGraph = true;
236 }
237 } else if ((algo == "distance laplacian") || (algo == "signed classical sa distance laplacian") || (algo == "signed classical distance laplacian")) {
238 socUsesMatrix = "distance laplacian";
239
240 if (algo == "distance laplacian") {
241 socUsesMeasure = "smoothed aggregation";
242 } else if (algo == "signed classical sa distance laplacian") {
243 socUsesMeasure = "signed smoothed aggregation";
244 } else if (algo == "signed classical distance laplacian") {
245 socUsesMeasure = "signed ruge-stueben";
246 }
247
248 if (distanceLaplacianAlgoStr == "default")
249 droppingMethod = "point-wise";
250 else if (distanceLaplacianAlgoStr == "unscaled cut") {
251 socUsesMeasure = "unscaled";
252 droppingMethod = "cut-drop";
253 } else if (distanceLaplacianAlgoStr == "scaled cut") {
254 droppingMethod = "cut-drop";
255 } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
256 droppingMethod = "cut-drop";
257 symmetrizeDroppedGraph = true;
258 }
259 } else if (algo == "") {
260 // algo was "block diagonal", but we process and remove the "block diagonal" part
261 socUsesMatrix = "A";
262 threshold = Teuchos::ScalarTraits<magnitudeType>::zero();
263 }
264 }
265}
266
267template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
268std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
269 BuildScalar(Level& currentLevel) const {
270 FactoryMonitor m(*this, "BuildScalar", currentLevel);
271
272 using MatrixType = Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
273 using GraphType = Xpetra::CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
274 using local_matrix_type = typename MatrixType::local_matrix_device_type;
275 using local_graph_type = typename GraphType::local_graph_device_type;
276 using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
277 using entries_type = typename local_graph_type::entries_type::non_const_type;
278 using values_type = typename local_matrix_type::values_type::non_const_type;
279 using device_type = typename Node::device_type;
280 using memory_space = typename device_type::memory_space;
281 using results_view_type = Kokkos::View<DecisionType*, memory_space>;
282 using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
283 using doubleMultiVector = Xpetra::MultiVector<magnitudeType, LO, GO, NO>;
284
285 typedef Teuchos::ScalarTraits<Scalar> STS;
286 const magnitudeType zero = Teuchos::ScalarTraits<magnitudeType>::zero();
287
288 auto A = Get<RCP<Matrix>>(currentLevel, "A");
289
291 // Process parameterlist
292 const ParameterList& pL = GetParameterList();
293
294 // Boundary detection
295 const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
296 const magnitudeType rowSumTol = as<magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
297 const LocalOrdinal dirichletNonzeroThreshold = 1;
298
299 // Dropping
300 bool useBlocking = pL.get<bool>("aggregation: use blocking");
301 std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
302 std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
303 std::string socUsesMeasure = pL.get<std::string>("aggregation: strength-of-connection: measure");
304 std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
305 bool symmetrizeDroppedGraph = pL.get<bool>("aggregation: symmetrize graph after dropping");
306 magnitudeType threshold;
307 // If we're doing the ML-style halving of the drop tol at each level, we do that here.
308 if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
309 threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
310 else
311 threshold = as<magnitudeType>(pL.get<double>("aggregation: drop tol"));
312 bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
313
314 // Fill
315 const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
316 const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
317
318 const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
319 const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
320 const std::string lumpingChoiceString = pL.get<std::string>("filtered matrix: lumping choice");
322 if (lumpingChoiceString == "diag lumping")
324 else if (lumpingChoiceString == "distributed lumping")
326
327 const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<double>("filtered matrix: Dirichlet threshold"));
328
329 // coloring graph
330 bool generateColoringGraph = pL.get<bool>("aggregation: coloring: use color graph");
331 const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
332 const bool symmetrizeColoringGraph = true;
333
334#ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
335 translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
336#endif
337
338 {
339 std::stringstream ss;
340 ss << "dropping scheme = \"" << droppingMethod << "\", strength-of-connection measure = \"" << socUsesMeasure << "\", strength-of-connection matrix = \"" << socUsesMatrix << "\", ";
341 if (socUsesMatrix == "distance laplacian")
342 ss << "distance laplacian metric = \"" << distanceLaplacianMetric << "\", ";
343 ss << "threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << ", useBlocking = " << useBlocking;
344 ss << ", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
345
346 GetOStream(Runtime0) << ss.str();
347 }
348
349 TEUCHOS_ASSERT(!useRootStencil);
350 TEUCHOS_ASSERT(!useSpreadLumping);
351 TEUCHOS_ASSERT((lumpingChoice != MueLu::MatrixConstruction::distributed_lumping) || !reuseGraph);
352 if (droppingMethod == "cut-drop")
353 TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
354
356 // We perform four sweeps over the rows of A:
357 // Pass 1: detection of boundary nodes
358 // Pass 2: diagonal extraction
359 // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
360 // Pass 4: fill of the filtered matrix
361 //
362 // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
363
364 // TODO: We could merge pass 1 and 2.
365
366 auto crsA = toCrsMatrix(A);
367 auto lclA = crsA->getLocalMatrixDevice();
368 auto range = range_type(0, lclA.numRows());
369
371 // Pass 1: Detect boundary nodes
372 //
373 // The following criteria are available:
374 // - BoundaryDetection::PointDirichletFunctor
375 // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
376 // - BoundaryDetection::RowSumFunctor
377 // Marks rows as Dirichlet bases on row-sum criterion
378
379 // Dirichlet nodes
380 auto boundaryNodes = boundary_nodes_type("boundaryNodes", lclA.numRows()); // initialized to false
381 {
382 SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
383
384 // macro that applies boundary detection functors
385 auto dirichlet_detection = BoundaryDetection::PointDirichletFunctor(lclA, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
386
387 if (rowSumTol <= 0.) {
388 runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
389 } else {
390 auto apply_rowsum = BoundaryDetection::RowSumFunctor(lclA, boundaryNodes, rowSumTol);
391 runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection, apply_rowsum);
392 }
393 }
394 // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
395 // Otherwise we're now done with it now.
396
398 // Pass 2 & 3: Diagonal extraction and determine dropping and construct
399 // rowptr of filtered matrix
400 //
401 // The following criteria are available:
402 // - Misc::PointwiseDropBoundaryFunctor
403 // Drop all rows that have been marked as Dirichlet
404 // - Misc::DropOffRankFunctor
405 // Drop all entries that are off-rank
406 // - ClassicalDropping::DropFunctor
407 // Classical dropping
408 // - DistanceLaplacian::DropFunctor
409 // Distance Laplacian dropping
410 // - Misc::KeepDiagonalFunctor
411 // Mark diagonal as KEEP
412 // - Misc::MarkSingletonFunctor
413 // Mark singletons after dropping as Dirichlet
414 // - Misc::BlockDiagonalizeFunctor
415 // Drop coupling between blocks
416 //
417 // For the block diagonal variants we first block diagonalized and then apply "blocksize = 1" algorithms.
418
419 // rowptr of filtered A
420 auto filtered_rowptr = rowptr_type("filtered_rowptr", lclA.numRows() + 1);
421 // Number of nonzeros of filtered A
422 LocalOrdinal nnz_filtered = 0;
423 // dropping decisions for each entry
424 auto results = results_view_type("results", lclA.nnz()); // initialized to UNDECIDED
425 {
426 SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
427
428 if (threshold != zero) {
429 if (socUsesMatrix == "A") {
430 if (socUsesMeasure == "unscaled") {
431 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
432 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
433 } else if (socUsesMeasure == "smoothed aggregation") {
434 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
435 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
436 } else if (socUsesMeasure == "signed ruge-stueben") {
437 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
438 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
439 } else if (socUsesMeasure == "signed smoothed aggregation") {
440 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
441 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
442 }
443 } else if (socUsesMatrix == "distance laplacian") {
444 auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
445 if (socUsesMeasure == "unscaled") {
446 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
447 } else if (socUsesMeasure == "smoothed aggregation") {
448 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
449 } else if (socUsesMeasure == "signed ruge-stueben") {
450 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
451 } else if (socUsesMeasure == "signed smoothed aggregation") {
452 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
453 }
454 }
455 } else {
456 Kokkos::deep_copy(results, KEEP);
457
458 if (symmetrizeDroppedGraph) {
459 auto drop_boundaries = Misc::PointwiseSymmetricDropBoundaryFunctor(*A, boundaryNodes, results);
460 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, drop_boundaries);
461 } else {
462 auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
463 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, no_op);
464 }
465 }
466
467 if (symmetrizeDroppedGraph) {
468 auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
469 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, symmetrize);
470 }
471 }
472 GO numDropped = lclA.nnz() - nnz_filtered;
473 GO numGlobalDropped;
474 Teuchos::reduceAll(*A->getRowMap()->getComm(), Teuchos::REDUCE_SUM, 1, &numDropped, &numGlobalDropped);
475 // We now know the number of entries of filtered A and have the final rowptr.
476
478 // Pass 4: Create local matrix for filtered A
479 //
480 // Dropped entries are optionally lumped to the diagonal.
481
482 RCP<Matrix> filteredA;
483 RCP<LWGraph_kokkos> graph;
484 if (numGlobalDropped > 0) {
485 SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
486
487 local_matrix_type lclFilteredA;
488 local_graph_type lclGraph;
489 if (reuseGraph) {
490 filteredA = MatrixFactory::BuildCopy(A);
491 lclFilteredA = filteredA->getLocalMatrixDevice();
492
493 auto colidx = entries_type("entries", nnz_filtered);
494 lclGraph = local_graph_type(colidx, filtered_rowptr);
495 } else {
496 auto colidx = entries_type("entries", nnz_filtered);
497 auto values = values_type("values", nnz_filtered);
498 lclFilteredA = local_matrix_type("filteredA",
499 lclA.numRows(), lclA.numCols(),
500 nnz_filtered,
501 values, filtered_rowptr, colidx);
502 }
503
504 if (lumpingChoice != MueLu::MatrixConstruction::no_lumping) {
505 if (reuseGraph) {
506 auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, true>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
507 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
508 } else {
509 if (lumpingChoice == MueLu::MatrixConstruction::diag_lumping) {
510 auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::diag_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
511 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
512 } else if (lumpingChoice == MueLu::MatrixConstruction::distributed_lumping) {
513 auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::distributed_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
514 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
515 }
516 }
517 } else {
518 if (reuseGraph) {
519 auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, false>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
520 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
521 } else {
522 auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::no_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
523 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
524 }
525 }
526
527 if (!reuseGraph)
528 filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
529 filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
530
531 if (reuseEigenvalue) {
532 // Reuse max eigenvalue from A
533 // It is unclear what eigenvalue is the best for the smoothing, but we already may have
534 // the D^{-1}A estimate in A, may as well use it.
535 // NOTE: ML does that too
536 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
537 } else {
538 filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
539 }
540
541 if (!reuseGraph) {
542 // Use graph of filteredA as graph.
543 lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice();
544 }
545 graph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "amalgamated graph of A"));
546 graph->SetBoundaryNodeMap(boundaryNodes);
547 } else {
548 filteredA = A;
549 graph = rcp(new LWGraph_kokkos(filteredA->getCrsGraph()->getLocalGraphDevice(), filteredA->getRowMap(), filteredA->getColMap(), "amalgamated graph of A"));
550 graph->SetBoundaryNodeMap(boundaryNodes);
551 }
552
553 // Construct a second graph for coloring
554 if (generateColoringGraph) {
555 SubFactoryMonitor mColoringGraph(*this, "Construct coloring graph", currentLevel);
556
557 filtered_rowptr = rowptr_type("rowptr_coloring_graph", lclA.numRows() + 1);
558 if (localizeColoringGraph) {
559 auto drop_offrank = Misc::DropOffRankFunctor(lclA, results);
560 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, drop_offrank);
561 }
562 if (symmetrizeColoringGraph) {
563 auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
564 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, symmetrize);
565 }
566 auto colidx = entries_type("entries_coloring_graph", nnz_filtered);
567 auto lclGraph = local_graph_type(colidx, filtered_rowptr);
568 auto graphConstruction = MatrixConstruction::GraphConstruction<local_matrix_type, local_graph_type>(lclA, results, lclGraph);
569 Kokkos::parallel_for("MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
570
571 auto colorGraph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "coloring graph"));
572 Set(currentLevel, "Coloring Graph", colorGraph);
573 }
574
575 if (pL.get<bool>("filtered matrix: count negative diagonals")) {
576 // Count the negative diagonals (and display that information)
578 GetOStream(Runtime0) << "CoalesceDrop: Negative diagonals: " << neg_count << std::endl;
579 }
580
581 LO dofsPerNode = 1;
582 Set(currentLevel, "DofsPerNode", dofsPerNode);
583 Set(currentLevel, "Graph", graph);
584 Set(currentLevel, "A", filteredA);
585
586 return std::make_tuple(numDropped, boundaryNodes);
587}
588
589template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
590std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
591 BuildVector(Level& currentLevel) const {
592 FactoryMonitor m(*this, "BuildVector", currentLevel);
593
594 using MatrixType = Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
595 using GraphType = Xpetra::CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
596 using local_matrix_type = typename MatrixType::local_matrix_device_type;
597 using local_graph_type = typename GraphType::local_graph_device_type;
598 using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
599 using entries_type = typename local_graph_type::entries_type::non_const_type;
600 using values_type = typename local_matrix_type::values_type::non_const_type;
601 using device_type = typename Node::device_type;
602 using memory_space = typename device_type::memory_space;
603 using results_view_type = Kokkos::View<DecisionType*, memory_space>;
604 using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
605 using doubleMultiVector = Xpetra::MultiVector<magnitudeType, LO, GO, NO>;
606
607 typedef Teuchos::ScalarTraits<Scalar> STS;
608 const magnitudeType zero = Teuchos::ScalarTraits<magnitudeType>::zero();
609
610 auto A = Get<RCP<Matrix>>(currentLevel, "A");
611
612 /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
613 blkSize is the number of storage blocks that must kept together during the amalgamation process.
614
615 Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
616
617 numPDEs = blkSize * storageblocksize.
618
619 If numPDEs==1
620 Matrix is point storage (classical CRS storage). storageblocksize=1 and blkSize=1
621 No other values makes sense.
622
623 If numPDEs>1
624 If matrix uses point storage, then storageblocksize=1 and blkSize=numPDEs.
625 If matrix uses block storage, with block size of n, then storageblocksize=n, and blkSize=numPDEs/n.
626 Thus far, only storageblocksize=numPDEs and blkSize=1 has been tested.
627 */
628
629 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
630 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
631
632 auto amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
633
634 const RCP<const Map> rowMap = A->getRowMap();
635 const RCP<const Map> colMap = A->getColMap();
636
637 // build a node row map (uniqueMap = non-overlapping) and a node column map
638 // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation
639 // stored in the AmalgamationInfo class container contain the local node id
640 // given a local dof id. The data is calculated in the AmalgamationFactory and
641 // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo)
642 const RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
643 const RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
644 Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View?
645 Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
646
647 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
648 rowTranslationView(rowTranslationArray.getRawPtr(), rowTranslationArray.size());
649 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
650 colTranslationView(colTranslationArray.getRawPtr(), colTranslationArray.size());
651
652 // get number of local nodes
653 LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
654 typedef typename Kokkos::View<LocalOrdinal*, typename Node::device_type> id_translation_type;
655 id_translation_type rowTranslation("dofId2nodeId", rowTranslationArray.size());
656 id_translation_type colTranslation("ov_dofId2nodeId", colTranslationArray.size());
657 Kokkos::deep_copy(rowTranslation, rowTranslationView);
658 Kokkos::deep_copy(colTranslation, colTranslationView);
659
660 // extract striding information
661 blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
662 LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map
663 LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size)
664 if (A->IsView("stridedMaps") == true) {
665 const RCP<const Map> myMap = A->getRowMap("stridedMaps");
666 const RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
667 TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap");
668 blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
669 blkId = strMap->getStridedBlockId();
670 if (blkId > -1)
671 blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
672 }
673
674 TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkPartSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiple of " << blkPartSize);
675
677 // Process parameterlist
678 const ParameterList& pL = GetParameterList();
679
680 // Boundary detection
681 const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
682 const magnitudeType rowSumTol = as<magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
683 const LocalOrdinal dirichletNonzeroThreshold = 1;
684 const bool useGreedyDirichlet = pL.get<bool>("aggregation: greedy Dirichlet");
685 TEUCHOS_TEST_FOR_EXCEPTION(rowSumTol > zero, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: RowSum is not implemented for vectorial problems.");
686
687 // Dropping
688 bool useBlocking = pL.get<bool>("aggregation: use blocking");
689 std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
690 std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
691 std::string socUsesMeasure = pL.get<std::string>("aggregation: strength-of-connection: measure");
692 std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
693 bool symmetrizeDroppedGraph = pL.get<bool>("aggregation: symmetrize graph after dropping");
694 magnitudeType threshold;
695 // If we're doing the ML-style halving of the drop tol at each level, we do that here.
696 if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
697 threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
698 else
699 threshold = as<magnitudeType>(pL.get<double>("aggregation: drop tol"));
700 bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
701
702 // Fill
703 const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
704 const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
705
706 const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
707 const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
708 const std::string lumpingChoiceString = pL.get<std::string>("filtered matrix: lumping choice");
710 if (lumpingChoiceString == "diag lumping")
712 else if (lumpingChoiceString == "distributed lumping")
714
715 const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<double>("filtered matrix: Dirichlet threshold"));
716
717 // coloring graph
718 bool generateColoringGraph = pL.get<bool>("aggregation: coloring: use color graph");
719 const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
720 const bool symmetrizeColoringGraph = true;
721
722#ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
723 translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
724#endif
725 {
726 std::stringstream ss;
727 ss << "dropping scheme = \"" << droppingMethod << "\", strength-of-connection measure = \"" << socUsesMeasure << "\", strength-of-connection matrix = \"" << socUsesMatrix << "\", ";
728 if (socUsesMatrix == "distance laplacian")
729 ss << "distance laplacian metric = \"" << distanceLaplacianMetric << "\", ";
730 ss << "threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << ", useBlocking = " << useBlocking;
731 ss << ", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
732
733 GetOStream(Runtime0) << ss.str();
734 }
735
736 TEUCHOS_ASSERT(!useRootStencil);
737 TEUCHOS_ASSERT(!useSpreadLumping);
738 TEUCHOS_ASSERT((lumpingChoice != MueLu::MatrixConstruction::distributed_lumping) || !reuseGraph);
739 if (droppingMethod == "cut-drop")
740 TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
741
743 // We perform four sweeps over the rows of A:
744 // Pass 1: detection of boundary nodes
745 // Pass 2: diagonal extraction
746 // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
747 // Pass 4: fill of the filtered matrix
748 //
749 // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
750
751 // TODO: We could merge pass 1 and 2.
752
753 auto crsA = toCrsMatrix(A);
754 auto lclA = crsA->getLocalMatrixDevice();
755 auto range = range_type(0, numNodes);
756
758 // Pass 1: Detect boundary nodes
759 //
760 // The following criteria are available:
761 // - BoundaryDetection::VectorDirichletFunctor
762 // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
763
764 // Dirichlet nodes
765 auto boundaryNodes = boundary_nodes_type("boundaryNodes", numNodes); // initialized to false
766 {
767 SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
768
769 if (useGreedyDirichlet) {
770 auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, true>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
771 runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
772 } else {
773 auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, false>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
774 runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
775 }
776 }
777 // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
778 // Otherwise we're now done with it now.
779
781 // Pass 2 & 3: Diagonal extraction and determine dropping and construct
782 // rowptr of filtered matrix
783 //
784 // The following criteria are available:
785 // - Misc::VectorDropBoundaryFunctor
786 // Drop all rows that have been marked as Dirichlet
787 // - Misc::DropOffRankFunctor
788 // Drop all entries that are off-rank
789 // - ClassicalDropping::DropFunctor
790 // Classical dropping
791 // - DistanceLaplacian::VectorDropFunctor
792 // Distance Laplacian dropping
793 // - Misc::KeepDiagonalFunctor
794 // Mark diagonal as KEEP
795 // - Misc::MarkSingletonFunctor
796 // Mark singletons after dropping as Dirichlet
797
798 // rowptr of filtered A
799 auto filtered_rowptr = rowptr_type("rowptr", lclA.numRows() + 1);
800 auto graph_rowptr = rowptr_type("rowptr", numNodes + 1);
801 // Number of nonzeros of filtered A and graph
802 Kokkos::pair<LocalOrdinal, LocalOrdinal> nnz = {0, 0};
803
804 // dropping decisions for each entry
805 auto results = results_view_type("results", lclA.nnz()); // initialized to UNDECIDED
806
807 RCP<Matrix> mergedA;
808 {
809 SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
810
811 {
812 // Construct merged A.
813
814 auto merged_rowptr = rowptr_type("rowptr", numNodes + 1);
815 LocalOrdinal nnz_merged = 0;
816
817 auto functor = MatrixConstruction::MergeCountFunctor(lclA, blkPartSize, colTranslation, merged_rowptr);
818 Kokkos::parallel_scan("MergeCount", range, functor, nnz_merged);
819
820 local_graph_type lclMergedGraph;
821 auto colidx_merged = entries_type("entries", nnz_merged);
822 auto values_merged = values_type("values", nnz_merged);
823
824 local_matrix_type lclMergedA = local_matrix_type("mergedA",
825 numNodes, nonUniqueMap->getLocalNumElements(),
826 nnz_merged,
827 values_merged, merged_rowptr, colidx_merged);
828
829 auto fillFunctor = MatrixConstruction::MergeFillFunctor<local_matrix_type>(lclA, blkSize, colTranslation, lclMergedA);
830 Kokkos::parallel_for("MueLu::CoalesceDrop::MergeFill", range, fillFunctor);
831
832 mergedA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclMergedA, uniqueMap, nonUniqueMap, uniqueMap, uniqueMap);
833 }
834
835 if (threshold != zero) {
836 if (socUsesMatrix == "A") {
837 if (socUsesMeasure == "unscaled") {
838 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
839 } else if (socUsesMeasure == "smoothed aggregation") {
840 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
841 } else if (socUsesMeasure == "signed ruge-stueben") {
842 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
843 } else if (socUsesMeasure == "signed smoothed aggregation") {
844 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
845 }
846 } else if (socUsesMatrix == "distance laplacian") {
847 auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
848
849 Array<double> dlap_weights = pL.get<Array<double>>("aggregation: distance laplacian directional weights");
850 LocalOrdinal interleaved_blocksize = as<LocalOrdinal>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
851 if (socUsesMeasure == "distance laplacian") {
852 LO dim = (LO)coords->getNumVectors();
853 // If anything isn't 1.0 we need to turn on the weighting
854 bool non_unity = false;
855 for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) {
856 if (dlap_weights[i] != 1.0) {
857 non_unity = true;
858 }
859 }
860 if (non_unity) {
861 if ((LO)dlap_weights.size() == dim) {
862 distanceLaplacianMetric = "weighted";
863 } else if ((LO)dlap_weights.size() == interleaved_blocksize * dim)
864 distanceLaplacianMetric = "block weighted";
865 else {
866 TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError,
867 "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
868 }
869 if (GetVerbLevel() & Statistics1)
870 GetOStream(Statistics1) << "Using distance laplacian weights: " << dlap_weights << std::endl;
871 }
872 }
873
874 if (socUsesMeasure == "unscaled") {
875 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
876 } else if (socUsesMeasure == "smoothed aggregation") {
877 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
878 } else if (socUsesMeasure == "signed ruge-stueben") {
879 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
880 } else if (socUsesMeasure == "signed smoothed aggregation") {
881 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
882 }
883 }
884 } else {
885 Kokkos::deep_copy(results, KEEP);
886
887 auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
888 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, no_op);
889 }
890
891 if (symmetrizeDroppedGraph) {
892 auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
893 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, symmetrize);
894 }
895 }
896 LocalOrdinal nnz_filtered = nnz.first;
897 LocalOrdinal nnz_graph = nnz.second;
898 GO numTotal = lclA.nnz();
899 GO numDropped = numTotal - nnz_filtered;
900 GO numGlobalDropped;
901 Teuchos::reduceAll(*A->getRowMap()->getComm(), Teuchos::REDUCE_SUM, 1, &numDropped, &numGlobalDropped);
902 // We now know the number of entries of filtered A and have the final rowptr.
903
905 // Pass 4: Create local matrix for filtered A
906 //
907 // Dropped entries are optionally lumped to the diagonal.
908
909 RCP<Matrix> filteredA;
910 RCP<LWGraph_kokkos> graph;
911 if (numGlobalDropped > 0) {
912 SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
913
914 local_matrix_type lclFilteredA;
915 if (reuseGraph) {
916 lclFilteredA = local_matrix_type("filteredA", lclA.graph, lclA.numCols());
917 } else {
918 auto colidx = entries_type("entries", nnz_filtered);
919 auto values = values_type("values", nnz_filtered);
920 lclFilteredA = local_matrix_type("filteredA",
921 lclA.numRows(), lclA.numCols(),
922 nnz_filtered,
923 values, filtered_rowptr, colidx);
924 }
925
926 local_graph_type lclGraph;
927 {
928 auto colidx = entries_type("entries", nnz_graph);
929 lclGraph = local_graph_type(colidx, graph_rowptr);
930 }
931
932 if (lumpingChoice != MueLu::MatrixConstruction::no_lumping) {
933 if (reuseGraph) {
934 auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, true>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
935 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
936 } else {
937 auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, false>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
938 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
939 }
940 } else {
941 if (reuseGraph) {
942 auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, true>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
943 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
944 } else {
945 auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, false>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
946 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
947 }
948 }
949
950 filteredA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
951 filteredA->SetFixedBlockSize(blkSize);
952
953 if (reuseEigenvalue) {
954 // Reuse max eigenvalue from A
955 // It is unclear what eigenvalue is the best for the smoothing, but we already may have
956 // the D^{-1}A estimate in A, may as well use it.
957 // NOTE: ML does that too
958 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
959 } else {
960 filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
961 }
962
963 graph = rcp(new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
964 graph->SetBoundaryNodeMap(boundaryNodes);
965 } else {
966 filteredA = A;
967 graph = rcp(new LWGraph_kokkos(mergedA->getCrsGraph()->getLocalGraphDevice(), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
968 graph->SetBoundaryNodeMap(boundaryNodes);
969 }
970
971 // Construct a second graph for coloring
972 if (generateColoringGraph) {
973 SubFactoryMonitor mColoringGraph(*this, "Construct coloring graph", currentLevel);
974
975 filtered_rowptr = rowptr_type("rowptr_coloring_graph", lclA.numRows() + 1);
976 graph_rowptr = rowptr_type("rowptr", numNodes + 1);
977 if (localizeColoringGraph) {
978 auto drop_offrank = Misc::DropOffRankFunctor(lclA, results);
979 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, drop_offrank);
980 }
981 if (symmetrizeColoringGraph) {
982 auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
983 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, symmetrize);
984 }
985 auto colidx = entries_type("entries_coloring_graph", nnz_filtered);
986 auto lclGraph = local_graph_type(colidx, filtered_rowptr);
987 auto graphConstruction = MatrixConstruction::GraphConstruction<local_matrix_type, local_graph_type>(lclA, results, lclGraph);
988 Kokkos::parallel_for("MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
989
990 auto colorGraph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "coloring graph"));
991 Set(currentLevel, "Coloring Graph", colorGraph);
992 }
993
994 LO dofsPerNode = blkSize;
995
996 Set(currentLevel, "DofsPerNode", dofsPerNode);
997 Set(currentLevel, "Graph", graph);
998 Set(currentLevel, "A", filteredA);
999
1000 return std::make_tuple(numDropped, boundaryNodes);
1001}
1002
1003} // namespace MueLu
1004#endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Functor that serially applies sub-functors to rows.
Functor for marking nodes as Dirichlet based on rowsum.
Functor for marking nodes as Dirichlet in a block operator.
void DeclareInput(Level &currentLevel) const
Input.
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
void Build(Level &currentLevel) const
Build an object with this factory.
typename MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node >::boundary_nodes_type boundary_nodes_type
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildVector(Level &currentLevel) const
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildScalar(Level &currentLevel) const
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Lightweight MueLu representation of a compressed row storage graph.
Class that holds all level-specific information.
int GetLevelID() const
Return level number.
Functor does not reuse the graph of the matrix for a problem with blockSize == 1.
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping,...
Functor that drops off-rank entries.
Functor that drops boundary nodes for a blockSize == 1 problem.
Functor that symmetrizes the dropping decisions.
static void runDroppingFunctors_on_A(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
static void runDroppingFunctors_on_dlap(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Level &level, const Factory &factory)
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static GlobalOrdinal CountNegativeDiagonalEntries(const Matrix &A)
Counts the number of negative diagonal entries.
static void runDroppingFunctors_on_A(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
static void runDroppingFunctors_on_dlap(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Teuchos::Array< double > &dlap_weights, LocalOrdinal interleaved_blocksize, Level &level, const Factory &factory)
Namespace for MueLu classes and methods.
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
void runBoundaryFunctors(local_matrix_type &lclA, boundary_nodes_view &boundaryNodes, Functors &... functors)
void translateOldAlgoParam(const Teuchos::ParameterList &pL, std::string &droppingMethod, bool &useBlocking, std::string &socUsesMatrix, std::string &socUsesMeasure, bool &symmetrizeDroppedGraph, bool &generateColoringGraph, magnitudeType &threshold, MueLu::MatrixConstruction::lumpingType &lumpingChoice)