MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_CoalesceDropFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
11#define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
12
13#include <Kokkos_Core.hpp>
14#include <KokkosSparse_CrsMatrix.hpp>
15#include <sstream>
16#include <string>
17#include <tuple>
18
19#include "Xpetra_Matrix.hpp"
20
22
23#include "MueLu_AmalgamationInfo.hpp"
24#include "MueLu_Exceptions.hpp"
25#include "MueLu_Level.hpp"
26#include "MueLu_LWGraph_kokkos.hpp"
27#include "MueLu_MasterList.hpp"
28#include "MueLu_Monitor.hpp"
29#include "MueLu_Utilities.hpp"
30
34
35// The different dropping algorithms are split up over several translation units. This speeds up compilation and also avoids launch latencies on GPU.
37#include "MueLu_ScalarDroppingClassical.hpp"
38#include "MueLu_ScalarDroppingDistanceLaplacian.hpp"
40#include "MueLu_VectorDroppingClassical.hpp"
41#include "MueLu_VectorDroppingDistanceLaplacian.hpp"
42
43namespace MueLu {
44
45template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
47 RCP<ParameterList> validParamList = rcp(new ParameterList());
48
49#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
50 SET_VALID_ENTRY("aggregation: use blocking");
51 SET_VALID_ENTRY("aggregation: drop tol");
52 SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
53 SET_VALID_ENTRY("aggregation: Dirichlet threshold");
54 SET_VALID_ENTRY("aggregation: greedy Dirichlet");
55 SET_VALID_ENTRY("aggregation: row sum drop tol");
56 SET_VALID_ENTRY("aggregation: strength-of-connection: matrix");
57 SET_VALID_ENTRY("aggregation: strength-of-connection: measure");
58 SET_VALID_ENTRY("aggregation: drop scheme");
59 SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
60 SET_VALID_ENTRY("aggregation: distance laplacian metric");
61 SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
62 SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
63#ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
64 SET_VALID_ENTRY("aggregation: distance laplacian algo");
65 SET_VALID_ENTRY("aggregation: classical algo");
66#endif
67 SET_VALID_ENTRY("aggregation: symmetrize graph after dropping");
68 SET_VALID_ENTRY("aggregation: coloring: use color graph");
69 SET_VALID_ENTRY("aggregation: coloring: localize color graph");
70
71 SET_VALID_ENTRY("filtered matrix: use lumping");
72 SET_VALID_ENTRY("filtered matrix: reuse graph");
73 SET_VALID_ENTRY("filtered matrix: reuse eigenvalue");
74
75 SET_VALID_ENTRY("filtered matrix: use root stencil");
76 SET_VALID_ENTRY("filtered matrix: lumping choice");
77 SET_VALID_ENTRY("filtered matrix: use spread lumping");
78 SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor");
79 SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap");
80 SET_VALID_ENTRY("filtered matrix: Dirichlet threshold");
81 SET_VALID_ENTRY("filtered matrix: count negative diagonals");
82
83#undef SET_VALID_ENTRY
84 validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
85#ifndef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
86 validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("point-wise", "cut-drop"))));
87#else
88 validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("point-wise", "cut-drop", "signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical", "signed classical distance laplacian", "signed classical sa distance laplacian"))));
89 validParamList->getEntry("aggregation: classical algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
90 validParamList->getEntry("aggregation: distance laplacian algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
91#endif
92 validParamList->getEntry("aggregation: strength-of-connection: matrix").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("A", "distance laplacian"))));
93 validParamList->getEntry("aggregation: strength-of-connection: measure").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("smoothed aggregation", "signed smoothed aggregation", "signed ruge-stueben", "unscaled"))));
94 validParamList->getEntry("aggregation: distance laplacian metric").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("unweighted", "material"))));
95
96 validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
97 validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
98 validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
99 validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNumber");
100 validParamList->set<RCP<const FactoryBase>>("Material", Teuchos::null, "Generating factory for Material");
101
102 return validParamList;
103}
104
105template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
107 Input(currentLevel, "A");
108 Input(currentLevel, "UnAmalgamationInfo");
109
110 const ParameterList& pL = GetParameterList();
111
112 std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
113 bool needCoords = (socUsesMatrix == "distance laplacian");
114#ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
115 std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
116 needCoords |= (droppingMethod.find("distance laplacian") != std::string::npos);
117#endif
118 if (needCoords) {
119 Input(currentLevel, "Coordinates");
120 std::string distLaplMetric = pL.get<std::string>("aggregation: distance laplacian metric");
121 if (distLaplMetric == "material")
122 Input(currentLevel, "Material");
123 }
124
125 bool useBlocking = pL.get<bool>("aggregation: use blocking");
126#ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
127 useBlocking |= (droppingMethod.find("block diagonal") != std::string::npos);
128#endif
129 if (useBlocking) {
130 Input(currentLevel, "BlockNumber");
131 }
132}
133
134template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
136 Build(Level& currentLevel) const {
137 auto A = Get<RCP<Matrix>>(currentLevel, "A");
138 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
139 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
140
141 std::tuple<GlobalOrdinal, boundary_nodes_type> results;
142 if (blkSize == 1)
143 results = BuildScalar(currentLevel);
144 else
145 results = BuildVector(currentLevel);
146
147 if (GetVerbLevel() & Statistics1) {
148 GlobalOrdinal numDropped = std::get<0>(results);
149 auto boundaryNodes = std::get<1>(results);
150
151 GO numLocalBoundaryNodes = 0;
152
153 Kokkos::parallel_reduce(
154 "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)),
155 KOKKOS_LAMBDA(const LO i, GO& n) {
156 if (boundaryNodes(i))
157 n++;
158 },
159 numLocalBoundaryNodes);
160
161 if (IsPrint(Statistics1)) {
162 auto comm = A->getRowMap()->getComm();
163
164 std::vector<GlobalOrdinal> localStats = {numLocalBoundaryNodes, numDropped};
165 std::vector<GlobalOrdinal> globalStats(2);
166 Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 2, localStats.data(), globalStats.data());
167
168 GO numGlobalTotal = A->getGlobalNumEntries();
169 GO numGlobalBoundaryNodes = globalStats[0];
170 GO numGlobalDropped = globalStats[1];
171
172 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
173 if (numGlobalTotal != 0) {
174 GetOStream(Statistics1) << "Number of dropped entries: "
175 << numGlobalDropped << "/" << numGlobalTotal
176 << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)" << std::endl;
177 }
178 }
179 }
180}
181
182template <class local_matrix_type, class boundary_nodes_view, class... Functors>
183void runBoundaryFunctors(local_matrix_type& lclA, boundary_nodes_view& boundaryNodes, Functors&... functors) {
184 using local_ordinal_type = typename local_matrix_type::ordinal_type;
185 using execution_space = typename local_matrix_type::execution_space;
186 using range_type = Kokkos::RangePolicy<local_ordinal_type, execution_space>;
187 auto range = range_type(0, boundaryNodes.extent(0));
188 auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, functors...);
189 Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries);
190}
191
192template <class magnitudeType>
193void translateOldAlgoParam(const Teuchos::ParameterList& pL, std::string& droppingMethod, bool& useBlocking, std::string& socUsesMatrix, std::string& socUsesMeasure, bool& symmetrizeDroppedGraph, bool& generateColoringGraph, magnitudeType& threshold, MueLu::MatrixConstruction::lumpingType& lumpingChoice) {
194 std::set<std::string> validDroppingMethods = {"piece-wise", "cut-drop"};
195
196 if (!pL.get<bool>("filtered matrix: use lumping")) lumpingChoice = MueLu::MatrixConstruction::no_lumping;
197
198 if (validDroppingMethods.find(droppingMethod) == validDroppingMethods.end()) {
199 std::string algo = droppingMethod;
200 std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
201 std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
202
203 // Remove prefix "block diagonal" from algo
204 if (algo.find("block diagonal") == 0) {
205 useBlocking = true;
206 algo = algo.substr(14);
207 if (algo != "") {
208 algo = algo.substr(1);
209 }
210 }
211
212 if ((algo == "classical") || (algo == "signed classical sa") || (algo == "signed classical") || (algo == "colored signed classical")) {
213 socUsesMatrix = "A";
214
215 if (algo == "classical") {
216 socUsesMeasure = "smoothed aggregation";
217 } else if (algo == "signed classical sa") {
218 socUsesMeasure = "signed smoothed aggregation";
219 } else if (algo == "signed classical") {
220 socUsesMeasure = "signed ruge-stueben";
221 } else if (algo == "colored signed classical") {
222 socUsesMeasure = "signed ruge-stueben";
223 generateColoringGraph = true;
224 }
225
226 if (classicalAlgoStr == "default")
227 droppingMethod = "point-wise";
228 else if (classicalAlgoStr == "unscaled cut") {
229 socUsesMeasure = "unscaled";
230 droppingMethod = "cut-drop";
231 } else if (classicalAlgoStr == "scaled cut") {
232 droppingMethod = "cut-drop";
233 } else if (classicalAlgoStr == "scaled cut symmetric") {
234 droppingMethod = "cut-drop";
235 symmetrizeDroppedGraph = true;
236 }
237 } else if ((algo == "distance laplacian") || (algo == "signed classical sa distance laplacian") || (algo == "signed classical distance laplacian")) {
238 socUsesMatrix = "distance laplacian";
239
240 if (algo == "distance laplacian") {
241 socUsesMeasure = "smoothed aggregation";
242 } else if (algo == "signed classical sa distance laplacian") {
243 socUsesMeasure = "signed smoothed aggregation";
244 } else if (algo == "signed classical distance laplacian") {
245 socUsesMeasure = "signed ruge-stueben";
246 }
247
248 if (distanceLaplacianAlgoStr == "default")
249 droppingMethod = "point-wise";
250 else if (distanceLaplacianAlgoStr == "unscaled cut") {
251 socUsesMeasure = "unscaled";
252 droppingMethod = "cut-drop";
253 } else if (distanceLaplacianAlgoStr == "scaled cut") {
254 droppingMethod = "cut-drop";
255 } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
256 droppingMethod = "cut-drop";
257 symmetrizeDroppedGraph = true;
258 }
259 } else if (algo == "") {
260 // algo was "block diagonal", but we process and remove the "block diagonal" part
261 socUsesMatrix = "A";
262 threshold = Teuchos::ScalarTraits<magnitudeType>::zero();
263 }
264 }
265}
266
267template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
268std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
269 BuildScalar(Level& currentLevel) const {
270 FactoryMonitor m(*this, "BuildScalar", currentLevel);
271
272 using MatrixType = Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
273 using GraphType = Xpetra::CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
274 using local_matrix_type = typename MatrixType::local_matrix_device_type;
275 using local_graph_type = typename GraphType::local_graph_device_type;
276 using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
277 using entries_type = typename local_graph_type::entries_type::non_const_type;
278 using values_type = typename local_matrix_type::values_type::non_const_type;
279 using device_type = typename Node::device_type;
280 using memory_space = typename device_type::memory_space;
281 using results_view_type = Kokkos::View<DecisionType*, memory_space>;
282 using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
283 using doubleMultiVector = Xpetra::MultiVector<magnitudeType, LO, GO, NO>;
284
285 typedef Teuchos::ScalarTraits<Scalar> STS;
286 const magnitudeType zero = Teuchos::ScalarTraits<magnitudeType>::zero();
287
288 auto A = Get<RCP<Matrix>>(currentLevel, "A");
289
291 // Process parameterlist
292 const ParameterList& pL = GetParameterList();
293
294 // Boundary detection
295 const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
296 const magnitudeType rowSumTol = as<magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
297 const LocalOrdinal dirichletNonzeroThreshold = 1;
298
299 // Dropping
300 bool useBlocking = pL.get<bool>("aggregation: use blocking");
301 std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
302 std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
303 std::string socUsesMeasure = pL.get<std::string>("aggregation: strength-of-connection: measure");
304 std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
305 bool symmetrizeDroppedGraph = pL.get<bool>("aggregation: symmetrize graph after dropping");
306 magnitudeType threshold;
307 // If we're doing the ML-style halving of the drop tol at each level, we do that here.
308 if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
309 threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
310 else
311 threshold = as<magnitudeType>(pL.get<double>("aggregation: drop tol"));
312 bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
313
314 // Fill
315 const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
316 const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
317
318 const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
319 const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
320 const std::string lumpingChoiceString = pL.get<std::string>("filtered matrix: lumping choice");
322 if (lumpingChoiceString == "diag lumping")
324 else if (lumpingChoiceString == "distributed lumping")
326
327 const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<double>("filtered matrix: Dirichlet threshold"));
328
329 // coloring graph
330 bool generateColoringGraph = pL.get<bool>("aggregation: coloring: use color graph");
331 const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
332 const bool symmetrizeColoringGraph = true;
333
334#ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
335 translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
336#endif
337
338 {
339 std::stringstream ss;
340 ss << "dropping scheme = \"" << droppingMethod << "\", strength-of-connection measure = \"" << socUsesMeasure << "\", strength-of-connection matrix = \"" << socUsesMatrix << "\", ";
341 if (socUsesMatrix == "distance laplacian")
342 ss << "distance laplacian metric = \"" << distanceLaplacianMetric << "\", ";
343 ss << "threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << ", useBlocking = " << useBlocking;
344 ss << ", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
345
346 GetOStream(Runtime0) << ss.str();
347 }
348
349 TEUCHOS_ASSERT(!useRootStencil);
350 TEUCHOS_ASSERT(!useSpreadLumping);
351 TEUCHOS_ASSERT((lumpingChoice != MueLu::MatrixConstruction::distributed_lumping) || !reuseGraph);
352 if (droppingMethod == "cut-drop")
353 TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
354
356 // We perform four sweeps over the rows of A:
357 // Pass 1: detection of boundary nodes
358 // Pass 2: diagonal extraction
359 // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
360 // Pass 4: fill of the filtered matrix
361 //
362 // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
363
364 // TODO: We could merge pass 1 and 2.
365
366 auto crsA = toCrsMatrix(A);
367 auto lclA = crsA->getLocalMatrixDevice();
368 auto range = range_type(0, lclA.numRows());
369
371 // Pass 1: Detect boundary nodes
372 //
373 // The following criteria are available:
374 // - BoundaryDetection::PointDirichletFunctor
375 // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
376 // - BoundaryDetection::RowSumFunctor
377 // Marks rows as Dirichlet bases on row-sum criterion
378
379 // Dirichlet nodes
380 auto boundaryNodes = boundary_nodes_type("boundaryNodes", lclA.numRows()); // initialized to false
381 {
382 SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
383
384 // macro that applies boundary detection functors
385 auto dirichlet_detection = BoundaryDetection::PointDirichletFunctor(lclA, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
386
387 if (rowSumTol <= 0.) {
388 runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
389 } else {
390 auto apply_rowsum = BoundaryDetection::RowSumFunctor(lclA, boundaryNodes, rowSumTol);
391 runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection, apply_rowsum);
392 }
393 }
394 // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
395 // Otherwise we're now done with it now.
396
398 // Pass 2 & 3: Diagonal extraction and determine dropping and construct
399 // rowptr of filtered matrix
400 //
401 // The following criteria are available:
402 // - Misc::PointwiseDropBoundaryFunctor
403 // Drop all rows that have been marked as Dirichlet
404 // - Misc::DropOffRankFunctor
405 // Drop all entries that are off-rank
406 // - ClassicalDropping::DropFunctor
407 // Classical dropping
408 // - DistanceLaplacian::DropFunctor
409 // Distance Laplacian dropping
410 // - Misc::KeepDiagonalFunctor
411 // Mark diagonal as KEEP
412 // - Misc::MarkSingletonFunctor
413 // Mark singletons after dropping as Dirichlet
414 // - Misc::BlockDiagonalizeFunctor
415 // Drop coupling between blocks
416 //
417 // For the block diagonal variants we first block diagonalized and then apply "blocksize = 1" algorithms.
418
419 // rowptr of filtered A
420 auto filtered_rowptr = rowptr_type("filtered_rowptr", lclA.numRows() + 1);
421 // Number of nonzeros of filtered A
422 LocalOrdinal nnz_filtered = 0;
423 // dropping decisions for each entry
424 auto results = results_view_type("results", lclA.nnz()); // initialized to UNDECIDED
425 {
426 SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
427
428 if (threshold != zero) {
429 if (socUsesMatrix == "A") {
430 if (socUsesMeasure == "unscaled") {
431 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
432 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
433 } else if (socUsesMeasure == "smoothed aggregation") {
434 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
435 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
436 } else if (socUsesMeasure == "signed ruge-stueben") {
437 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
438 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
439 } else if (socUsesMeasure == "signed smoothed aggregation") {
440 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
441 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
442 }
443 } else if (socUsesMatrix == "distance laplacian") {
444 auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
445 if (socUsesMeasure == "unscaled") {
446 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
447 } else if (socUsesMeasure == "smoothed aggregation") {
448 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
449 } else if (socUsesMeasure == "signed ruge-stueben") {
450 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
451 } else if (socUsesMeasure == "signed smoothed aggregation") {
452 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
453 }
454 }
455 } else {
456 Kokkos::deep_copy(results, KEEP);
457
458 if (symmetrizeDroppedGraph) {
459 auto drop_boundaries = Misc::PointwiseSymmetricDropBoundaryFunctor(*A, boundaryNodes, results);
460 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, drop_boundaries);
461 } else {
462 auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
463 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, no_op);
464 }
465 }
466
467 if (symmetrizeDroppedGraph) {
468 auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
469 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, symmetrize);
470 }
471 }
472 GO numDropped = lclA.nnz() - nnz_filtered;
473 // We now know the number of entries of filtered A and have the final rowptr.
474
476 // Pass 4: Create local matrix for filtered A
477 //
478 // Dropped entries are optionally lumped to the diagonal.
479
480 RCP<Matrix> filteredA;
481 RCP<LWGraph_kokkos> graph;
482 {
483 SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
484
485 local_matrix_type lclFilteredA;
486 local_graph_type lclGraph;
487 if (reuseGraph) {
488 filteredA = MatrixFactory::BuildCopy(A);
489 lclFilteredA = filteredA->getLocalMatrixDevice();
490
491 auto colidx = entries_type("entries", nnz_filtered);
492 lclGraph = local_graph_type(colidx, filtered_rowptr);
493 } else {
494 auto colidx = entries_type("entries", nnz_filtered);
495 auto values = values_type("values", nnz_filtered);
496 lclFilteredA = local_matrix_type("filteredA",
497 lclA.numRows(), lclA.numCols(),
498 nnz_filtered,
499 values, filtered_rowptr, colidx);
500 }
501
502 if (lumpingChoice != MueLu::MatrixConstruction::no_lumping) {
503 if (reuseGraph) {
504 auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, true>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
505 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
506 } else {
507 if (lumpingChoice == MueLu::MatrixConstruction::diag_lumping) {
508 auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::diag_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
509 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
510 } else if (lumpingChoice == MueLu::MatrixConstruction::distributed_lumping) {
511 auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::distributed_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
512 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
513 }
514 }
515 } else {
516 if (reuseGraph) {
517 auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, false>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
518 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
519 } else {
520 auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::no_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
521 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
522 }
523 }
524
525 if (!reuseGraph)
526 filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
527 filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
528
529 if (reuseEigenvalue) {
530 // Reuse max eigenvalue from A
531 // It is unclear what eigenvalue is the best for the smoothing, but we already may have
532 // the D^{-1}A estimate in A, may as well use it.
533 // NOTE: ML does that too
534 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
535 } else {
536 filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
537 }
538
539 if (!reuseGraph) {
540 // Use graph of filteredA as graph.
541 lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice();
542 }
543 graph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "amalgamated graph of A"));
544 graph->SetBoundaryNodeMap(boundaryNodes);
545 }
546
547 // Construct a second graph for coloring
548 if (generateColoringGraph) {
549 SubFactoryMonitor mColoringGraph(*this, "Construct coloring graph", currentLevel);
550
551 filtered_rowptr = rowptr_type("rowptr_coloring_graph", lclA.numRows() + 1);
552 if (localizeColoringGraph) {
553 auto drop_offrank = Misc::DropOffRankFunctor(lclA, results);
554 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, drop_offrank);
555 }
556 if (symmetrizeColoringGraph) {
557 auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
558 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, symmetrize);
559 }
560 auto colidx = entries_type("entries_coloring_graph", nnz_filtered);
561 auto lclGraph = local_graph_type(colidx, filtered_rowptr);
562 auto graphConstruction = MatrixConstruction::GraphConstruction<local_matrix_type, local_graph_type>(lclA, results, lclGraph);
563 Kokkos::parallel_for("MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
564
565 auto colorGraph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "coloring graph"));
566 Set(currentLevel, "Coloring Graph", colorGraph);
567 }
568
569 if (pL.get<bool>("filtered matrix: count negative diagonals")) {
570 // Count the negative diagonals (and display that information)
572 GetOStream(Runtime0) << "CoalesceDrop: Negative diagonals: " << neg_count << std::endl;
573 }
574
575 LO dofsPerNode = 1;
576 Set(currentLevel, "DofsPerNode", dofsPerNode);
577 Set(currentLevel, "Graph", graph);
578 Set(currentLevel, "A", filteredA);
579
580 return std::make_tuple(numDropped, boundaryNodes);
581}
582
583template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
584std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
585 BuildVector(Level& currentLevel) const {
586 FactoryMonitor m(*this, "BuildVector", currentLevel);
587
588 using MatrixType = Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
589 using GraphType = Xpetra::CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
590 using local_matrix_type = typename MatrixType::local_matrix_device_type;
591 using local_graph_type = typename GraphType::local_graph_device_type;
592 using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
593 using entries_type = typename local_graph_type::entries_type::non_const_type;
594 using values_type = typename local_matrix_type::values_type::non_const_type;
595 using device_type = typename Node::device_type;
596 using memory_space = typename device_type::memory_space;
597 using results_view_type = Kokkos::View<DecisionType*, memory_space>;
598 using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
599 using doubleMultiVector = Xpetra::MultiVector<magnitudeType, LO, GO, NO>;
600
601 typedef Teuchos::ScalarTraits<Scalar> STS;
602 const magnitudeType zero = Teuchos::ScalarTraits<magnitudeType>::zero();
603
604 auto A = Get<RCP<Matrix>>(currentLevel, "A");
605
606 /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
607 blkSize is the number of storage blocks that must kept together during the amalgamation process.
608
609 Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
610
611 numPDEs = blkSize * storageblocksize.
612
613 If numPDEs==1
614 Matrix is point storage (classical CRS storage). storageblocksize=1 and blkSize=1
615 No other values makes sense.
616
617 If numPDEs>1
618 If matrix uses point storage, then storageblocksize=1 and blkSize=numPDEs.
619 If matrix uses block storage, with block size of n, then storageblocksize=n, and blkSize=numPDEs/n.
620 Thus far, only storageblocksize=numPDEs and blkSize=1 has been tested.
621 */
622
623 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
624 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
625
626 auto amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
627
628 const RCP<const Map> rowMap = A->getRowMap();
629 const RCP<const Map> colMap = A->getColMap();
630
631 // build a node row map (uniqueMap = non-overlapping) and a node column map
632 // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation
633 // stored in the AmalgamationInfo class container contain the local node id
634 // given a local dof id. The data is calculated in the AmalgamationFactory and
635 // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo)
636 const RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
637 const RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
638 Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View?
639 Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
640
641 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
642 rowTranslationView(rowTranslationArray.getRawPtr(), rowTranslationArray.size());
643 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
644 colTranslationView(colTranslationArray.getRawPtr(), colTranslationArray.size());
645
646 // get number of local nodes
647 LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
648 typedef typename Kokkos::View<LocalOrdinal*, typename Node::device_type> id_translation_type;
649 id_translation_type rowTranslation("dofId2nodeId", rowTranslationArray.size());
650 id_translation_type colTranslation("ov_dofId2nodeId", colTranslationArray.size());
651 Kokkos::deep_copy(rowTranslation, rowTranslationView);
652 Kokkos::deep_copy(colTranslation, colTranslationView);
653
654 // extract striding information
655 blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
656 LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map
657 LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size)
658 if (A->IsView("stridedMaps") == true) {
659 const RCP<const Map> myMap = A->getRowMap("stridedMaps");
660 const RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
661 TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap");
662 blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
663 blkId = strMap->getStridedBlockId();
664 if (blkId > -1)
665 blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
666 }
667
668 TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkPartSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiple of " << blkPartSize);
669
671 // Process parameterlist
672 const ParameterList& pL = GetParameterList();
673
674 // Boundary detection
675 const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
676 const magnitudeType rowSumTol = as<magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
677 const LocalOrdinal dirichletNonzeroThreshold = 1;
678 const bool useGreedyDirichlet = pL.get<bool>("aggregation: greedy Dirichlet");
679 TEUCHOS_TEST_FOR_EXCEPTION(rowSumTol > zero, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: RowSum is not implemented for vectorial problems.");
680
681 // Dropping
682 bool useBlocking = pL.get<bool>("aggregation: use blocking");
683 std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
684 std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
685 std::string socUsesMeasure = pL.get<std::string>("aggregation: strength-of-connection: measure");
686 std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
687 bool symmetrizeDroppedGraph = pL.get<bool>("aggregation: symmetrize graph after dropping");
688 magnitudeType threshold;
689 // If we're doing the ML-style halving of the drop tol at each level, we do that here.
690 if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
691 threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
692 else
693 threshold = as<magnitudeType>(pL.get<double>("aggregation: drop tol"));
694 bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
695
696 // Fill
697 const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
698 const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
699
700 const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
701 const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
702 const std::string lumpingChoiceString = pL.get<std::string>("filtered matrix: lumping choice");
704 if (lumpingChoiceString == "diag lumping")
706 else if (lumpingChoiceString == "distributed lumping")
708
709 const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<double>("filtered matrix: Dirichlet threshold"));
710
711 // coloring graph
712 bool generateColoringGraph = pL.get<bool>("aggregation: coloring: use color graph");
713 const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
714 const bool symmetrizeColoringGraph = true;
715
716#ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
717 translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
718#endif
719 {
720 std::stringstream ss;
721 ss << "dropping scheme = \"" << droppingMethod << "\", strength-of-connection measure = \"" << socUsesMeasure << "\", strength-of-connection matrix = \"" << socUsesMatrix << "\", ";
722 if (socUsesMatrix == "distance laplacian")
723 ss << "distance laplacian metric = \"" << distanceLaplacianMetric << "\", ";
724 ss << "threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << ", useBlocking = " << useBlocking;
725 ss << ", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
726
727 GetOStream(Runtime0) << ss.str();
728 }
729
730 TEUCHOS_ASSERT(!useRootStencil);
731 TEUCHOS_ASSERT(!useSpreadLumping);
732 TEUCHOS_ASSERT((lumpingChoice != MueLu::MatrixConstruction::distributed_lumping) || !reuseGraph);
733 if (droppingMethod == "cut-drop")
734 TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
735
737 // We perform four sweeps over the rows of A:
738 // Pass 1: detection of boundary nodes
739 // Pass 2: diagonal extraction
740 // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
741 // Pass 4: fill of the filtered matrix
742 //
743 // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
744
745 // TODO: We could merge pass 1 and 2.
746
747 auto crsA = toCrsMatrix(A);
748 auto lclA = crsA->getLocalMatrixDevice();
749 auto range = range_type(0, numNodes);
750
752 // Pass 1: Detect boundary nodes
753 //
754 // The following criteria are available:
755 // - BoundaryDetection::VectorDirichletFunctor
756 // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
757
758 // Dirichlet nodes
759 auto boundaryNodes = boundary_nodes_type("boundaryNodes", numNodes); // initialized to false
760 {
761 SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
762
763 if (useGreedyDirichlet) {
764 auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, true>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
765 runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
766 } else {
767 auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, false>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
768 runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
769 }
770 }
771 // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
772 // Otherwise we're now done with it now.
773
775 // Pass 2 & 3: Diagonal extraction and determine dropping and construct
776 // rowptr of filtered matrix
777 //
778 // The following criteria are available:
779 // - Misc::VectorDropBoundaryFunctor
780 // Drop all rows that have been marked as Dirichlet
781 // - Misc::DropOffRankFunctor
782 // Drop all entries that are off-rank
783 // - ClassicalDropping::DropFunctor
784 // Classical dropping
785 // - DistanceLaplacian::VectorDropFunctor
786 // Distance Laplacian dropping
787 // - Misc::KeepDiagonalFunctor
788 // Mark diagonal as KEEP
789 // - Misc::MarkSingletonFunctor
790 // Mark singletons after dropping as Dirichlet
791
792 // rowptr of filtered A
793 auto filtered_rowptr = rowptr_type("rowptr", lclA.numRows() + 1);
794 auto graph_rowptr = rowptr_type("rowptr", numNodes + 1);
795 // Number of nonzeros of filtered A and graph
796 Kokkos::pair<LocalOrdinal, LocalOrdinal> nnz = {0, 0};
797
798 // dropping decisions for each entry
799 auto results = results_view_type("results", lclA.nnz()); // initialized to UNDECIDED
800
801 RCP<Matrix> mergedA;
802 {
803 SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
804
805 {
806 // Construct merged A.
807
808 auto merged_rowptr = rowptr_type("rowptr", numNodes + 1);
809 LocalOrdinal nnz_merged = 0;
810
811 auto functor = MatrixConstruction::MergeCountFunctor(lclA, blkPartSize, colTranslation, merged_rowptr);
812 Kokkos::parallel_scan("MergeCount", range, functor, nnz_merged);
813
814 local_graph_type lclMergedGraph;
815 auto colidx_merged = entries_type("entries", nnz_merged);
816 auto values_merged = values_type("values", nnz_merged);
817
818 local_matrix_type lclMergedA = local_matrix_type("mergedA",
819 numNodes, nonUniqueMap->getLocalNumElements(),
820 nnz_merged,
821 values_merged, merged_rowptr, colidx_merged);
822
823 auto fillFunctor = MatrixConstruction::MergeFillFunctor<local_matrix_type>(lclA, blkSize, colTranslation, lclMergedA);
824 Kokkos::parallel_for("MueLu::CoalesceDrop::MergeFill", range, fillFunctor);
825
826 mergedA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclMergedA, uniqueMap, nonUniqueMap, uniqueMap, uniqueMap);
827 }
828
829 if (threshold != zero) {
830 if (socUsesMatrix == "A") {
831 if (socUsesMeasure == "unscaled") {
832 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
833 } else if (socUsesMeasure == "smoothed aggregation") {
834 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
835 } else if (socUsesMeasure == "signed ruge-stueben") {
836 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
837 } else if (socUsesMeasure == "signed smoothed aggregation") {
838 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
839 }
840 } else if (socUsesMatrix == "distance laplacian") {
841 auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
842
843 Array<double> dlap_weights = pL.get<Array<double>>("aggregation: distance laplacian directional weights");
844 LocalOrdinal interleaved_blocksize = as<LocalOrdinal>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
845 if (socUsesMeasure == "distance laplacian") {
846 LO dim = (LO)coords->getNumVectors();
847 // If anything isn't 1.0 we need to turn on the weighting
848 bool non_unity = false;
849 for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) {
850 if (dlap_weights[i] != 1.0) {
851 non_unity = true;
852 }
853 }
854 if (non_unity) {
855 if ((LO)dlap_weights.size() == dim) {
856 distanceLaplacianMetric = "weighted";
857 } else if ((LO)dlap_weights.size() == interleaved_blocksize * dim)
858 distanceLaplacianMetric = "block weighted";
859 else {
860 TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError,
861 "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
862 }
863 if (GetVerbLevel() & Statistics1)
864 GetOStream(Statistics1) << "Using distance laplacian weights: " << dlap_weights << std::endl;
865 }
866 }
867
868 if (socUsesMeasure == "unscaled") {
869 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
870 } else if (socUsesMeasure == "smoothed aggregation") {
871 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
872 } else if (socUsesMeasure == "signed ruge-stueben") {
873 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
874 } else if (socUsesMeasure == "signed smoothed aggregation") {
875 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
876 }
877 }
878 } else {
879 Kokkos::deep_copy(results, KEEP);
880
881 auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
882 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, no_op);
883 }
884
885 if (symmetrizeDroppedGraph) {
886 auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
887 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, symmetrize);
888 }
889 }
890 LocalOrdinal nnz_filtered = nnz.first;
891 LocalOrdinal nnz_graph = nnz.second;
892 GO numTotal = lclA.nnz();
893 GO numDropped = numTotal - nnz_filtered;
894 // We now know the number of entries of filtered A and have the final rowptr.
895
897 // Pass 4: Create local matrix for filtered A
898 //
899 // Dropped entries are optionally lumped to the diagonal.
900
901 RCP<Matrix> filteredA;
902 RCP<LWGraph_kokkos> graph;
903 {
904 SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
905
906 local_matrix_type lclFilteredA;
907 if (reuseGraph) {
908 lclFilteredA = local_matrix_type("filteredA", lclA.graph, lclA.numCols());
909 } else {
910 auto colidx = entries_type("entries", nnz_filtered);
911 auto values = values_type("values", nnz_filtered);
912 lclFilteredA = local_matrix_type("filteredA",
913 lclA.numRows(), lclA.numCols(),
914 nnz_filtered,
915 values, filtered_rowptr, colidx);
916 }
917
918 local_graph_type lclGraph;
919 {
920 auto colidx = entries_type("entries", nnz_graph);
921 lclGraph = local_graph_type(colidx, graph_rowptr);
922 }
923
924 if (lumpingChoice != MueLu::MatrixConstruction::no_lumping) {
925 if (reuseGraph) {
926 auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, true>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
927 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
928 } else {
929 auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, false>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
930 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
931 }
932 } else {
933 if (reuseGraph) {
934 auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, true>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
935 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
936 } else {
937 auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, false>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
938 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
939 }
940 }
941
942 filteredA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
943 filteredA->SetFixedBlockSize(blkSize);
944
945 if (reuseEigenvalue) {
946 // Reuse max eigenvalue from A
947 // It is unclear what eigenvalue is the best for the smoothing, but we already may have
948 // the D^{-1}A estimate in A, may as well use it.
949 // NOTE: ML does that too
950 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
951 } else {
952 filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
953 }
954
955 graph = rcp(new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
956 graph->SetBoundaryNodeMap(boundaryNodes);
957 }
958
959 // Construct a second graph for coloring
960 if (generateColoringGraph) {
961 SubFactoryMonitor mColoringGraph(*this, "Construct coloring graph", currentLevel);
962
963 filtered_rowptr = rowptr_type("rowptr_coloring_graph", lclA.numRows() + 1);
964 graph_rowptr = rowptr_type("rowptr", numNodes + 1);
965 if (localizeColoringGraph) {
966 auto drop_offrank = Misc::DropOffRankFunctor(lclA, results);
967 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, drop_offrank);
968 }
969 if (symmetrizeColoringGraph) {
970 auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
971 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, symmetrize);
972 }
973 auto colidx = entries_type("entries_coloring_graph", nnz_filtered);
974 auto lclGraph = local_graph_type(colidx, filtered_rowptr);
975 auto graphConstruction = MatrixConstruction::GraphConstruction<local_matrix_type, local_graph_type>(lclA, results, lclGraph);
976 Kokkos::parallel_for("MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
977
978 auto colorGraph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "coloring graph"));
979 Set(currentLevel, "Coloring Graph", colorGraph);
980 }
981
982 LO dofsPerNode = blkSize;
983
984 Set(currentLevel, "DofsPerNode", dofsPerNode);
985 Set(currentLevel, "Graph", graph);
986 Set(currentLevel, "A", filteredA);
987
988 return std::make_tuple(numDropped, boundaryNodes);
989}
990
991} // namespace MueLu
992#endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Functor that serially applies sub-functors to rows.
Functor for marking nodes as Dirichlet based on rowsum.
Functor for marking nodes as Dirichlet in a block operator.
void DeclareInput(Level &currentLevel) const
Input.
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
void Build(Level &currentLevel) const
Build an object with this factory.
typename MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node >::boundary_nodes_type boundary_nodes_type
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildVector(Level &currentLevel) const
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildScalar(Level &currentLevel) const
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Lightweight MueLu representation of a compressed row storage graph.
Class that holds all level-specific information.
int GetLevelID() const
Return level number.
Functor does not reuse the graph of the matrix for a problem with blockSize == 1.
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping,...
Functor that drops off-rank entries.
Functor that drops boundary nodes for a blockSize == 1 problem.
Functor that symmetrizes the dropping decisions.
static void runDroppingFunctors_on_A(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
static void runDroppingFunctors_on_dlap(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Level &level, const Factory &factory)
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static GlobalOrdinal CountNegativeDiagonalEntries(const Matrix &A)
Counts the number of negative diagonal entries.
static void runDroppingFunctors_on_A(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
static void runDroppingFunctors_on_dlap(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Teuchos::Array< double > &dlap_weights, LocalOrdinal interleaved_blocksize, Level &level, const Factory &factory)
Namespace for MueLu classes and methods.
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
void runBoundaryFunctors(local_matrix_type &lclA, boundary_nodes_view &boundaryNodes, Functors &... functors)
void translateOldAlgoParam(const Teuchos::ParameterList &pL, std::string &droppingMethod, bool &useBlocking, std::string &socUsesMatrix, std::string &socUsesMeasure, bool &symmetrizeDroppedGraph, bool &generateColoringGraph, magnitudeType &threshold, MueLu::MatrixConstruction::lumpingType &lumpingChoice)