MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_CoalesceDropFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
11#define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
12
13#include <Kokkos_Core.hpp>
14#include <KokkosSparse_CrsMatrix.hpp>
15#include <sstream>
16#include <string>
17#include <tuple>
18
19#include "Xpetra_Matrix.hpp"
20
22
23#include "MueLu_AmalgamationInfo.hpp"
24#include "MueLu_Exceptions.hpp"
25#include "MueLu_Level.hpp"
26#include "MueLu_LWGraph_kokkos.hpp"
27#include "MueLu_MasterList.hpp"
28#include "MueLu_Monitor.hpp"
29#include "MueLu_Utilities.hpp"
30
34
35// The different dropping algorithms are split up over several translation units. This speeds up compilation and also avoids launch latencies on GPU.
37#include "MueLu_ScalarDroppingClassical.hpp"
38#include "MueLu_ScalarDroppingDistanceLaplacian.hpp"
40#include "MueLu_VectorDroppingClassical.hpp"
41#include "MueLu_VectorDroppingDistanceLaplacian.hpp"
42
43namespace MueLu {
44
45template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
47 RCP<ParameterList> validParamList = rcp(new ParameterList());
48
49#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
50 SET_VALID_ENTRY("aggregation: use blocking");
51 SET_VALID_ENTRY("aggregation: drop tol");
52 SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
53 SET_VALID_ENTRY("aggregation: Dirichlet threshold");
54 SET_VALID_ENTRY("aggregation: greedy Dirichlet");
55 SET_VALID_ENTRY("aggregation: row sum drop tol");
56 SET_VALID_ENTRY("aggregation: strength-of-connection: matrix");
57 SET_VALID_ENTRY("aggregation: strength-of-connection: measure");
58 SET_VALID_ENTRY("aggregation: drop scheme");
59 SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
60 SET_VALID_ENTRY("aggregation: distance laplacian metric");
61 SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
62 SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
63#ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
64 SET_VALID_ENTRY("aggregation: distance laplacian algo");
65 SET_VALID_ENTRY("aggregation: classical algo");
66#endif
67 SET_VALID_ENTRY("aggregation: symmetrize graph after dropping");
68 SET_VALID_ENTRY("aggregation: coloring: use color graph");
69 SET_VALID_ENTRY("aggregation: coloring: localize color graph");
70
71 SET_VALID_ENTRY("filtered matrix: use lumping");
72 SET_VALID_ENTRY("filtered matrix: reuse graph");
73 SET_VALID_ENTRY("filtered matrix: reuse eigenvalue");
74
75 SET_VALID_ENTRY("filtered matrix: use root stencil");
76 SET_VALID_ENTRY("filtered matrix: lumping choice");
77 SET_VALID_ENTRY("filtered matrix: use spread lumping");
78 SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor");
79 SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap");
80 SET_VALID_ENTRY("filtered matrix: Dirichlet threshold");
81 SET_VALID_ENTRY("filtered matrix: count negative diagonals");
82
83#undef SET_VALID_ENTRY
84 validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
85#ifndef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
86 validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("point-wise", "cut-drop"))));
87#else
88 validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("point-wise", "cut-drop", "signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical", "signed classical distance laplacian", "signed classical sa distance laplacian"))));
89 validParamList->getEntry("aggregation: classical algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
90 validParamList->getEntry("aggregation: distance laplacian algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
91#endif
92 validParamList->getEntry("aggregation: strength-of-connection: matrix").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("A", "distance laplacian", "MinvA"))));
93 validParamList->getEntry("aggregation: strength-of-connection: measure").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("smoothed aggregation", "signed smoothed aggregation", "signed ruge-stueben", "unscaled"))));
94 validParamList->getEntry("aggregation: distance laplacian metric").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("unweighted", "material"))));
95
96 validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
97 validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
98 validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
99 validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNumber");
100 validParamList->set<RCP<const FactoryBase>>("Material", Teuchos::null, "Generating factory for Material");
101 validParamList->set<RCP<const FactoryBase>>("M", Teuchos::null, "Generating factory for M");
102 validParamList->set<RCP<const FactoryBase>>("Minv", Teuchos::null, "Generating factory for Minv");
103 validParamList->set<RCP<const FactoryBase>>("MinvA", Teuchos::null, "Generating factory for MinvA");
104
105 return validParamList;
106}
107
108template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
110 Input(currentLevel, "A");
111 Input(currentLevel, "UnAmalgamationInfo");
112
113 const ParameterList& pL = GetParameterList();
114
115 std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
116 bool needCoords = (socUsesMatrix == "distance laplacian");
117 const bool needM = (socUsesMatrix == "MinvA");
118#ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
119 std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
120 needCoords |= (droppingMethod.find("distance laplacian") != std::string::npos);
121#endif
122 if (needCoords) {
123 Input(currentLevel, "Coordinates");
124 std::string distLaplMetric = pL.get<std::string>("aggregation: distance laplacian metric");
125 if (distLaplMetric == "material")
126 Input(currentLevel, "Material");
127 }
128 if (needM)
129 Input(currentLevel, "M");
130
131 bool useBlocking = pL.get<bool>("aggregation: use blocking");
132#ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
133 useBlocking |= (droppingMethod.find("block diagonal") != std::string::npos);
134#endif
135 if (useBlocking) {
136 Input(currentLevel, "BlockNumber");
137 }
138}
139
140template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
142 Build(Level& currentLevel) const {
143 auto A = Get<RCP<Matrix>>(currentLevel, "A");
144 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
145 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
146
147 std::tuple<GlobalOrdinal, boundary_nodes_type> results;
148 if (blkSize == 1)
149 results = BuildScalar(currentLevel);
150 else
151 results = BuildVector(currentLevel);
152
153 if (GetVerbLevel() & Statistics1) {
154 GlobalOrdinal numDropped = std::get<0>(results);
155 auto boundaryNodes = std::get<1>(results);
156
157 GO numLocalBoundaryNodes = 0;
158
159 Kokkos::parallel_reduce(
160 "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)),
161 KOKKOS_LAMBDA(const LO i, GO& n) {
162 if (boundaryNodes(i))
163 n++;
164 },
165 numLocalBoundaryNodes);
166
167 if (IsPrint(Statistics1)) {
168 auto comm = A->getRowMap()->getComm();
169
170 std::vector<GlobalOrdinal> localStats = {numLocalBoundaryNodes, numDropped};
171 std::vector<GlobalOrdinal> globalStats(2);
172 Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 2, localStats.data(), globalStats.data());
173
174 GO numGlobalTotal = A->getGlobalNumEntries();
175 GO numGlobalBoundaryNodes = globalStats[0];
176 GO numGlobalDropped = globalStats[1];
177
178 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
179 if (numGlobalTotal != 0) {
180 GetOStream(Statistics1) << "Number of dropped entries: "
181 << numGlobalDropped << "/" << numGlobalTotal
182 << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)" << std::endl;
183 }
184 }
185 }
186}
187
188template <class local_matrix_type, class boundary_nodes_view, class... Functors>
189void runBoundaryFunctors(local_matrix_type& lclA, boundary_nodes_view& boundaryNodes, Functors&... functors) {
190 using local_ordinal_type = typename local_matrix_type::ordinal_type;
191 using execution_space = typename local_matrix_type::execution_space;
192 using range_type = Kokkos::RangePolicy<local_ordinal_type, execution_space>;
193 auto range = range_type(0, boundaryNodes.extent(0));
194 auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, functors...);
195 Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries);
196}
197
198template <class magnitudeType>
199void translateOldAlgoParam(const Teuchos::ParameterList& pL, std::string& droppingMethod, bool& useBlocking, std::string& socUsesMatrix, std::string& socUsesMeasure, bool& symmetrizeDroppedGraph, bool& generateColoringGraph, magnitudeType& threshold, MueLu::MatrixConstruction::lumpingType& lumpingChoice) {
200 std::set<std::string> validDroppingMethods = {"piece-wise", "cut-drop"};
201
202 if (!pL.get<bool>("filtered matrix: use lumping")) lumpingChoice = MueLu::MatrixConstruction::no_lumping;
203
204 if (validDroppingMethods.find(droppingMethod) == validDroppingMethods.end()) {
205 std::string algo = droppingMethod;
206 std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
207 std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
208
209 // Remove prefix "block diagonal" from algo
210 if (algo.find("block diagonal") == 0) {
211 useBlocking = true;
212 algo = algo.substr(14);
213 if (algo != "") {
214 algo = algo.substr(1);
215 }
216 }
217
218 if ((algo == "classical") || (algo == "signed classical sa") || (algo == "signed classical") || (algo == "colored signed classical")) {
219 socUsesMatrix = "A";
220
221 if (algo == "classical") {
222 socUsesMeasure = "smoothed aggregation";
223 } else if (algo == "signed classical sa") {
224 socUsesMeasure = "signed smoothed aggregation";
225 } else if (algo == "signed classical") {
226 socUsesMeasure = "signed ruge-stueben";
227 } else if (algo == "colored signed classical") {
228 socUsesMeasure = "signed ruge-stueben";
229 generateColoringGraph = true;
230 }
231
232 if (classicalAlgoStr == "default")
233 droppingMethod = "point-wise";
234 else if (classicalAlgoStr == "unscaled cut") {
235 socUsesMeasure = "unscaled";
236 droppingMethod = "cut-drop";
237 } else if (classicalAlgoStr == "scaled cut") {
238 droppingMethod = "cut-drop";
239 } else if (classicalAlgoStr == "scaled cut symmetric") {
240 droppingMethod = "cut-drop";
241 symmetrizeDroppedGraph = true;
242 }
243 } else if ((algo == "distance laplacian") || (algo == "signed classical sa distance laplacian") || (algo == "signed classical distance laplacian")) {
244 socUsesMatrix = "distance laplacian";
245
246 if (algo == "distance laplacian") {
247 socUsesMeasure = "smoothed aggregation";
248 } else if (algo == "signed classical sa distance laplacian") {
249 socUsesMeasure = "signed smoothed aggregation";
250 } else if (algo == "signed classical distance laplacian") {
251 socUsesMeasure = "signed ruge-stueben";
252 }
253
254 if (distanceLaplacianAlgoStr == "default")
255 droppingMethod = "point-wise";
256 else if (distanceLaplacianAlgoStr == "unscaled cut") {
257 socUsesMeasure = "unscaled";
258 droppingMethod = "cut-drop";
259 } else if (distanceLaplacianAlgoStr == "scaled cut") {
260 droppingMethod = "cut-drop";
261 } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
262 droppingMethod = "cut-drop";
263 symmetrizeDroppedGraph = true;
264 }
265 } else if (algo == "") {
266 // algo was "block diagonal", but we process and remove the "block diagonal" part
267 socUsesMatrix = "A";
268 threshold = Teuchos::ScalarTraits<magnitudeType>::zero();
269 }
270 }
271}
272
273template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
274std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
275 BuildScalar(Level& currentLevel) const {
276 FactoryMonitor m(*this, "BuildScalar", currentLevel);
277
278 using MatrixType = Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
279 using GraphType = Xpetra::CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
280 using local_matrix_type = typename MatrixType::local_matrix_device_type;
281 using local_graph_type = typename GraphType::local_graph_device_type;
282 using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
283 using entries_type = typename local_graph_type::entries_type::non_const_type;
284 using values_type = typename local_matrix_type::values_type::non_const_type;
285 using device_type = typename Node::device_type;
286 using memory_space = typename device_type::memory_space;
287 using results_view_type = Kokkos::View<DecisionType*, memory_space>;
288 using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
289 using doubleMultiVector = Xpetra::MultiVector<magnitudeType, LO, GO, NO>;
290
291 typedef Teuchos::ScalarTraits<Scalar> STS;
292 const magnitudeType zero = Teuchos::ScalarTraits<magnitudeType>::zero();
293
294 auto A = Get<RCP<Matrix>>(currentLevel, "A");
295
297 // Process parameterlist
298 const ParameterList& pL = GetParameterList();
299
300 // Boundary detection
301 const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
302 const magnitudeType rowSumTol = as<magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
303 const LocalOrdinal dirichletNonzeroThreshold = 1;
304
305 // Dropping
306 bool useBlocking = pL.get<bool>("aggregation: use blocking");
307 std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
308 std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
309 std::string socUsesMeasure = pL.get<std::string>("aggregation: strength-of-connection: measure");
310 std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
311 bool symmetrizeDroppedGraph = pL.get<bool>("aggregation: symmetrize graph after dropping");
312 magnitudeType threshold;
313 // If we're doing the ML-style halving of the drop tol at each level, we do that here.
314 if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
315 threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
316 else
317 threshold = as<magnitudeType>(pL.get<double>("aggregation: drop tol"));
318 bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
319
320 // Fill
321 const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
322 const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
323
324 const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
325 const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
326 const std::string lumpingChoiceString = pL.get<std::string>("filtered matrix: lumping choice");
328 if (lumpingChoiceString == "diag lumping")
330 else if (lumpingChoiceString == "distributed lumping")
332
333 const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<double>("filtered matrix: Dirichlet threshold"));
334
335 // coloring graph
336 bool generateColoringGraph = pL.get<bool>("aggregation: coloring: use color graph");
337 const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
338 const bool symmetrizeColoringGraph = true;
339
340#ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
341 translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
342#endif
343
344 {
345 std::stringstream ss;
346 ss << "dropping scheme = \"" << droppingMethod << "\", strength-of-connection measure = \"" << socUsesMeasure << "\", strength-of-connection matrix = \"" << socUsesMatrix << "\", ";
347 if (socUsesMatrix == "distance laplacian")
348 ss << "distance laplacian metric = \"" << distanceLaplacianMetric << "\", ";
349 ss << "threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << ", useBlocking = " << useBlocking;
350 ss << ", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
351
352 GetOStream(Runtime0) << ss.str();
353 }
354
355 TEUCHOS_ASSERT(!useRootStencil);
356 TEUCHOS_ASSERT(!useSpreadLumping);
357 TEUCHOS_ASSERT((lumpingChoice != MueLu::MatrixConstruction::distributed_lumping) || !reuseGraph);
358 if (droppingMethod == "cut-drop")
359 TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
360
362 // We perform four sweeps over the rows of A:
363 // Pass 1: detection of boundary nodes
364 // Pass 2: diagonal extraction
365 // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
366 // Pass 4: fill of the filtered matrix
367 //
368 // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
369
370 // TODO: We could merge pass 1 and 2.
371
372 // Compute matrix that is used for dropping
373 RCP<Matrix> A_drop;
374 if (threshold != zero) {
375 if ((socUsesMatrix == "A") || (socUsesMatrix == "MinvA")) {
376 // Get matrix used for dropping
377 if (socUsesMatrix == "A")
378 A_drop = A;
379 else if (socUsesMatrix == "MinvA") {
380 if (currentLevel.IsAvailable("MinvA", NoFactory::get())) {
381 A_drop = currentLevel.Get<RCP<Matrix>>("MinvA", NoFactory::get());
382 } else {
383 RCP<Matrix> Minv;
384 if (currentLevel.IsAvailable("Minv", NoFactory::get())) {
385 Minv = Get<RCP<Matrix>>(currentLevel, "Minv");
386 } else {
387 auto M = Get<RCP<Matrix>>(currentLevel, "M");
388 // Create Minv via sparse approximate inverse
389 Minv = Utilities::SPAI(M);
390 }
391
392 // build MinvA matrix with same sparsity pattern as A.
393 A_drop = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BuildCopy(A);
394 auto params = Teuchos::rcp(new Teuchos::ParameterList());
395 params->set("MM Throw For Non-Existent Entries", false);
396 A_drop = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*Minv, false, *A, false, A_drop, GetOStream(Statistics2), true, true, std::string("MinvA"), params);
397
398#ifdef JustUsingDiagMforInverse
399 {
400 // could perhaps be useful for debugging?
401 //
402 // get the diagonal inverse of M (TODO: using InverseApproximationFactory is likely better)
403 Teuchos::RCP<Vector> MinvDiag = Utilities::GetMatrixDiagonalInverse(*M);
404 // build MinvA using the graph of A
405 A_drop = MatrixFactory::BuildCopy(A);
406 // multiply MinvDiag through
407 A_drop->leftScale(*MinvDiag);
408 }
409#endif
410 }
411 }
412 }
413 }
414
415 auto crsA = toCrsMatrix(A);
416 auto lclA = crsA->getLocalMatrixDevice();
417 auto range = range_type(0, lclA.numRows());
418
420 // Pass 1: Detect boundary nodes
421 //
422 // The following criteria are available:
423 // - BoundaryDetection::PointDirichletFunctor
424 // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
425 // - BoundaryDetection::RowSumFunctor
426 // Marks rows as Dirichlet bases on row-sum criterion
427
428 // Dirichlet nodes
429 auto boundaryNodes = boundary_nodes_type("boundaryNodes", lclA.numRows()); // initialized to false
430 {
431 SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
432
433 // macro that applies boundary detection functors
434 auto dirichlet_detection = BoundaryDetection::PointDirichletFunctor(lclA, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
435
436 if (rowSumTol <= 0.) {
437 runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
438 } else {
439 auto apply_rowsum = BoundaryDetection::RowSumFunctor(lclA, boundaryNodes, rowSumTol);
440 runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection, apply_rowsum);
441 }
442 }
443 // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
444 // Otherwise we're now done with it now.
445
447 // Pass 2 & 3: Diagonal extraction and determine dropping and construct
448 // rowptr of filtered matrix
449 //
450 // The following criteria are available:
451 // - Misc::PointwiseDropBoundaryFunctor
452 // Drop all rows that have been marked as Dirichlet
453 // - Misc::DropOffRankFunctor
454 // Drop all entries that are off-rank
455 // - ClassicalDropping::DropFunctor
456 // Classical dropping
457 // - DistanceLaplacian::DropFunctor
458 // Distance Laplacian dropping
459 // - Misc::KeepDiagonalFunctor
460 // Mark diagonal as KEEP
461 // - Misc::MarkSingletonFunctor
462 // Mark singletons after dropping as Dirichlet
463 // - Misc::BlockDiagonalizeFunctor
464 // Drop coupling between blocks
465 //
466 // For the block diagonal variants we first block diagonalized and then apply "blocksize = 1" algorithms.
467
468 // rowptr of filtered A
469 auto filtered_rowptr = rowptr_type("filtered_rowptr", lclA.numRows() + 1);
470 // Number of nonzeros of filtered A
471 LocalOrdinal nnz_filtered = 0;
472 // dropping decisions for each entry
473 auto results = results_view_type("results", lclA.nnz()); // initialized to UNDECIDED
474 {
475 SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
476
477 if (threshold != zero) {
478 if ((socUsesMatrix == "A") || (socUsesMatrix == "MinvA")) {
479 if (socUsesMeasure == "unscaled") {
480 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A_drop, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
481 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
482 } else if (socUsesMeasure == "smoothed aggregation") {
483 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A_drop, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
484 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
485 } else if (socUsesMeasure == "signed ruge-stueben") {
486 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A_drop, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
487 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
488 } else if (socUsesMeasure == "signed smoothed aggregation") {
489 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A_drop, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
490 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
491 }
492 } else if (socUsesMatrix == "distance laplacian") {
493 auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
494 if (socUsesMeasure == "unscaled") {
495 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
496 } else if (socUsesMeasure == "smoothed aggregation") {
497 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
498 } else if (socUsesMeasure == "signed ruge-stueben") {
499 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
500 } else if (socUsesMeasure == "signed smoothed aggregation") {
501 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *this);
502 }
503 }
504 } else {
505 Kokkos::deep_copy(results, KEEP);
506
507 if (symmetrizeDroppedGraph) {
508 auto drop_boundaries = Misc::PointwiseSymmetricDropBoundaryFunctor(*A, boundaryNodes, results);
509 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, drop_boundaries);
510 } else {
511 auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
512 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, no_op);
513 }
514 }
515
516 if (symmetrizeDroppedGraph) {
517 auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
518 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, symmetrize);
519 }
520 }
521 GO numDropped = lclA.nnz() - nnz_filtered;
522 GO numGlobalDropped;
523 Teuchos::reduceAll(*A->getRowMap()->getComm(), Teuchos::REDUCE_SUM, 1, &numDropped, &numGlobalDropped);
524 // We now know the number of entries of filtered A and have the final rowptr.
525
527 // Pass 4: Create local matrix for filtered A
528 //
529 // Dropped entries are optionally lumped to the diagonal.
530
531 RCP<Matrix> filteredA;
532 RCP<LWGraph_kokkos> graph;
533 if (numGlobalDropped > 0) {
534 SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
535
536 local_matrix_type lclFilteredA;
537 local_graph_type lclGraph;
538 if (reuseGraph) {
539 filteredA = MatrixFactory::BuildCopy(A);
540 lclFilteredA = filteredA->getLocalMatrixDevice();
541
542 auto colidx = entries_type("entries", nnz_filtered);
543 lclGraph = local_graph_type(colidx, filtered_rowptr);
544 } else {
545 auto colidx = entries_type("entries", nnz_filtered);
546 auto values = values_type("values", nnz_filtered);
547 lclFilteredA = local_matrix_type("filteredA",
548 lclA.numRows(), lclA.numCols(),
549 nnz_filtered,
550 values, filtered_rowptr, colidx);
551 }
552
553 if (lumpingChoice != MueLu::MatrixConstruction::no_lumping) {
554 if (reuseGraph) {
555 auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, true>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
556 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
557 } else {
558 if (lumpingChoice == MueLu::MatrixConstruction::diag_lumping) {
559 auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::diag_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
560 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
561 } else if (lumpingChoice == MueLu::MatrixConstruction::distributed_lumping) {
562 auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::distributed_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
563 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
564 }
565 }
566 } else {
567 if (reuseGraph) {
568 auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, false>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
569 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
570 } else {
571 auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, MueLu::MatrixConstruction::no_lumping>(lclA, results, lclFilteredA, filteringDirichletThreshold);
572 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
573 }
574 }
575
576 if (!reuseGraph)
577 filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
578 filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
579
580 if (reuseEigenvalue) {
581 // Reuse max eigenvalue from A
582 // It is unclear what eigenvalue is the best for the smoothing, but we already may have
583 // the D^{-1}A estimate in A, may as well use it.
584 // NOTE: ML does that too
585 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
586 } else {
587 filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
588 }
589
590 if (!reuseGraph) {
591 // Use graph of filteredA as graph.
592 lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice();
593 }
594 graph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "amalgamated graph of A"));
595 graph->SetBoundaryNodeMap(boundaryNodes);
596 } else {
597 filteredA = A;
598 graph = rcp(new LWGraph_kokkos(filteredA->getCrsGraph()->getLocalGraphDevice(), filteredA->getRowMap(), filteredA->getColMap(), "amalgamated graph of A"));
599 graph->SetBoundaryNodeMap(boundaryNodes);
600 }
601
602 // Construct a second graph for coloring
603 if (generateColoringGraph) {
604 SubFactoryMonitor mColoringGraph(*this, "Construct coloring graph", currentLevel);
605
606 filtered_rowptr = rowptr_type("rowptr_coloring_graph", lclA.numRows() + 1);
607 if (localizeColoringGraph) {
608 auto drop_offrank = Misc::DropOffRankFunctor(lclA, results);
609 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, drop_offrank);
610 }
611 if (symmetrizeColoringGraph) {
612 auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
613 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *this, symmetrize);
614 }
615 auto colidx = entries_type("entries_coloring_graph", nnz_filtered);
616 auto lclGraph = local_graph_type(colidx, filtered_rowptr);
617 auto graphConstruction = MatrixConstruction::GraphConstruction<local_matrix_type, local_graph_type>(lclA, results, lclGraph);
618 Kokkos::parallel_for("MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
619
620 auto colorGraph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "coloring graph"));
621 Set(currentLevel, "Coloring Graph", colorGraph);
622 }
623
624 if (pL.get<bool>("filtered matrix: count negative diagonals")) {
625 // Count the negative diagonals (and display that information)
627 GetOStream(Runtime0) << "CoalesceDrop: Negative diagonals: " << neg_count << std::endl;
628 }
629
630 LO dofsPerNode = 1;
631 Set(currentLevel, "DofsPerNode", dofsPerNode);
632 Set(currentLevel, "Graph", graph);
633 Set(currentLevel, "A", filteredA);
634
635 return std::make_tuple(numDropped, boundaryNodes);
636}
637
638template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
639std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
640 BuildVector(Level& currentLevel) const {
641 FactoryMonitor m(*this, "BuildVector", currentLevel);
642
643 using MatrixType = Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
644 using GraphType = Xpetra::CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
645 using local_matrix_type = typename MatrixType::local_matrix_device_type;
646 using local_graph_type = typename GraphType::local_graph_device_type;
647 using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
648 using entries_type = typename local_graph_type::entries_type::non_const_type;
649 using values_type = typename local_matrix_type::values_type::non_const_type;
650 using device_type = typename Node::device_type;
651 using memory_space = typename device_type::memory_space;
652 using results_view_type = Kokkos::View<DecisionType*, memory_space>;
653 using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
654 using doubleMultiVector = Xpetra::MultiVector<magnitudeType, LO, GO, NO>;
655
656 typedef Teuchos::ScalarTraits<Scalar> STS;
657 const magnitudeType zero = Teuchos::ScalarTraits<magnitudeType>::zero();
658
659 auto A = Get<RCP<Matrix>>(currentLevel, "A");
660
661 /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
662 blkSize is the number of storage blocks that must kept together during the amalgamation process.
663
664 Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
665
666 numPDEs = blkSize * storageblocksize.
667
668 If numPDEs==1
669 Matrix is point storage (classical CRS storage). storageblocksize=1 and blkSize=1
670 No other values makes sense.
671
672 If numPDEs>1
673 If matrix uses point storage, then storageblocksize=1 and blkSize=numPDEs.
674 If matrix uses block storage, with block size of n, then storageblocksize=n, and blkSize=numPDEs/n.
675 Thus far, only storageblocksize=numPDEs and blkSize=1 has been tested.
676 */
677
678 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
679 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
680
681 auto amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
682
683 const RCP<const Map> rowMap = A->getRowMap();
684 const RCP<const Map> colMap = A->getColMap();
685
686 // build a node row map (uniqueMap = non-overlapping) and a node column map
687 // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation
688 // stored in the AmalgamationInfo class container contain the local node id
689 // given a local dof id. The data is calculated in the AmalgamationFactory and
690 // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo)
691 const RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
692 const RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
693 Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View?
694 Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
695
696 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
697 rowTranslationView(rowTranslationArray.getRawPtr(), rowTranslationArray.size());
698 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
699 colTranslationView(colTranslationArray.getRawPtr(), colTranslationArray.size());
700
701 // get number of local nodes
702 LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
703 typedef typename Kokkos::View<LocalOrdinal*, typename Node::device_type> id_translation_type;
704 id_translation_type rowTranslation("dofId2nodeId", rowTranslationArray.size());
705 id_translation_type colTranslation("ov_dofId2nodeId", colTranslationArray.size());
706 Kokkos::deep_copy(rowTranslation, rowTranslationView);
707 Kokkos::deep_copy(colTranslation, colTranslationView);
708
709 // extract striding information
710 blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
711 LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map
712 LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size)
713 if (A->IsView("stridedMaps") == true) {
714 const RCP<const Map> myMap = A->getRowMap("stridedMaps");
715 const RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
716 TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap");
717 blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
718 blkId = strMap->getStridedBlockId();
719 if (blkId > -1)
720 blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
721 }
722
723 TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkPartSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiple of " << blkPartSize);
724
726 // Process parameterlist
727 const ParameterList& pL = GetParameterList();
728
729 // Boundary detection
730 const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
731 const magnitudeType rowSumTol = as<magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
732 const LocalOrdinal dirichletNonzeroThreshold = 1;
733 const bool useGreedyDirichlet = pL.get<bool>("aggregation: greedy Dirichlet");
734 TEUCHOS_TEST_FOR_EXCEPTION(rowSumTol > zero, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: RowSum is not implemented for vectorial problems.");
735
736 // Dropping
737 bool useBlocking = pL.get<bool>("aggregation: use blocking");
738 std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
739 std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
740 std::string socUsesMeasure = pL.get<std::string>("aggregation: strength-of-connection: measure");
741 std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
742 bool symmetrizeDroppedGraph = pL.get<bool>("aggregation: symmetrize graph after dropping");
743 magnitudeType threshold;
744 // If we're doing the ML-style halving of the drop tol at each level, we do that here.
745 if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
746 threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
747 else
748 threshold = as<magnitudeType>(pL.get<double>("aggregation: drop tol"));
749 bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
750
751 // Fill
752 const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
753 const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
754
755 const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
756 const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
757 const std::string lumpingChoiceString = pL.get<std::string>("filtered matrix: lumping choice");
759 if (lumpingChoiceString == "diag lumping")
761 else if (lumpingChoiceString == "distributed lumping")
763
764 const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<double>("filtered matrix: Dirichlet threshold"));
765
766 // coloring graph
767 bool generateColoringGraph = pL.get<bool>("aggregation: coloring: use color graph");
768 const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
769 const bool symmetrizeColoringGraph = true;
770
771#ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
772 translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
773#endif
774 {
775 std::stringstream ss;
776 ss << "dropping scheme = \"" << droppingMethod << "\", strength-of-connection measure = \"" << socUsesMeasure << "\", strength-of-connection matrix = \"" << socUsesMatrix << "\", ";
777 if (socUsesMatrix == "distance laplacian")
778 ss << "distance laplacian metric = \"" << distanceLaplacianMetric << "\", ";
779 ss << "threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << ", useBlocking = " << useBlocking;
780 ss << ", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
781
782 GetOStream(Runtime0) << ss.str();
783 }
784
785 TEUCHOS_ASSERT(!useRootStencil);
786 TEUCHOS_ASSERT(!useSpreadLumping);
787 TEUCHOS_ASSERT((lumpingChoice != MueLu::MatrixConstruction::distributed_lumping) || !reuseGraph);
788 if (droppingMethod == "cut-drop")
789 TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
790
792 // We perform four sweeps over the rows of A:
793 // Pass 1: detection of boundary nodes
794 // Pass 2: diagonal extraction
795 // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
796 // Pass 4: fill of the filtered matrix
797 //
798 // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
799
800 // TODO: We could merge pass 1 and 2.
801
802 auto crsA = toCrsMatrix(A);
803 auto lclA = crsA->getLocalMatrixDevice();
804 auto range = range_type(0, numNodes);
805
807 // Pass 1: Detect boundary nodes
808 //
809 // The following criteria are available:
810 // - BoundaryDetection::VectorDirichletFunctor
811 // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
812
813 // Dirichlet nodes
814 auto boundaryNodes = boundary_nodes_type("boundaryNodes", numNodes); // initialized to false
815 {
816 SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
817
818 if (useGreedyDirichlet) {
819 auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, true>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
820 runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
821 } else {
822 auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, false>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
823 runBoundaryFunctors(lclA, boundaryNodes, dirichlet_detection);
824 }
825 }
826 // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
827 // Otherwise we're now done with it now.
828
830 // Pass 2 & 3: Diagonal extraction and determine dropping and construct
831 // rowptr of filtered matrix
832 //
833 // The following criteria are available:
834 // - Misc::VectorDropBoundaryFunctor
835 // Drop all rows that have been marked as Dirichlet
836 // - Misc::DropOffRankFunctor
837 // Drop all entries that are off-rank
838 // - ClassicalDropping::DropFunctor
839 // Classical dropping
840 // - DistanceLaplacian::VectorDropFunctor
841 // Distance Laplacian dropping
842 // - Misc::KeepDiagonalFunctor
843 // Mark diagonal as KEEP
844 // - Misc::MarkSingletonFunctor
845 // Mark singletons after dropping as Dirichlet
846
847 // rowptr of filtered A
848 auto filtered_rowptr = rowptr_type("rowptr", lclA.numRows() + 1);
849 auto graph_rowptr = rowptr_type("rowptr", numNodes + 1);
850 // Number of nonzeros of filtered A and graph
851 Kokkos::pair<LocalOrdinal, LocalOrdinal> nnz = {0, 0};
852
853 // dropping decisions for each entry
854 auto results = results_view_type("results", lclA.nnz()); // initialized to UNDECIDED
855
856 RCP<Matrix> mergedA;
857 {
858 SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
859
860 {
861 // Construct merged A.
862
863 auto merged_rowptr = rowptr_type("rowptr", numNodes + 1);
864 LocalOrdinal nnz_merged = 0;
865
866 auto functor = MatrixConstruction::MergeCountFunctor(lclA, blkPartSize, colTranslation, merged_rowptr);
867 Kokkos::parallel_scan("MergeCount", range, functor, nnz_merged);
868
869 local_graph_type lclMergedGraph;
870 auto colidx_merged = entries_type("entries", nnz_merged);
871 auto values_merged = values_type("values", nnz_merged);
872
873 local_matrix_type lclMergedA = local_matrix_type("mergedA",
874 numNodes, nonUniqueMap->getLocalNumElements(),
875 nnz_merged,
876 values_merged, merged_rowptr, colidx_merged);
877
878 auto fillFunctor = MatrixConstruction::MergeFillFunctor<local_matrix_type>(lclA, blkSize, colTranslation, lclMergedA);
879 Kokkos::parallel_for("MueLu::CoalesceDrop::MergeFill", range, fillFunctor);
880
881 mergedA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclMergedA, uniqueMap, nonUniqueMap, uniqueMap, uniqueMap);
882 }
883
884 if (threshold != zero) {
885 if (socUsesMatrix == "A") {
886 if (socUsesMeasure == "unscaled") {
887 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
888 } else if (socUsesMeasure == "smoothed aggregation") {
889 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
890 } else if (socUsesMeasure == "signed ruge-stueben") {
891 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
892 } else if (socUsesMeasure == "signed smoothed aggregation") {
893 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *this);
894 }
895 } else if (socUsesMatrix == "distance laplacian") {
896 auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
897
898 Array<double> dlap_weights = pL.get<Array<double>>("aggregation: distance laplacian directional weights");
899 LocalOrdinal interleaved_blocksize = as<LocalOrdinal>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
900 if (socUsesMeasure == "distance laplacian") {
901 LO dim = (LO)coords->getNumVectors();
902 // If anything isn't 1.0 we need to turn on the weighting
903 bool non_unity = false;
904 for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) {
905 if (dlap_weights[i] != 1.0) {
906 non_unity = true;
907 }
908 }
909 if (non_unity) {
910 if ((LO)dlap_weights.size() == dim) {
911 distanceLaplacianMetric = "weighted";
912 } else if ((LO)dlap_weights.size() == interleaved_blocksize * dim)
913 distanceLaplacianMetric = "block weighted";
914 else {
915 TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError,
916 "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
917 }
918 if (GetVerbLevel() & Statistics1)
919 GetOStream(Statistics1) << "Using distance laplacian weights: " << dlap_weights << std::endl;
920 }
921 }
922
923 if (socUsesMeasure == "unscaled") {
924 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
925 } else if (socUsesMeasure == "smoothed aggregation") {
926 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
927 } else if (socUsesMeasure == "signed ruge-stueben") {
928 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
929 } else if (socUsesMeasure == "signed smoothed aggregation") {
930 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *this);
931 }
932 }
933 } else {
934 Kokkos::deep_copy(results, KEEP);
935
936 auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
937 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, no_op);
938 }
939
940 if (symmetrizeDroppedGraph) {
941 auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
942 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, symmetrize);
943 }
944 }
945 LocalOrdinal nnz_filtered = nnz.first;
946 LocalOrdinal nnz_graph = nnz.second;
947 GO numTotal = lclA.nnz();
948 GO numDropped = numTotal - nnz_filtered;
949 GO numGlobalDropped;
950 Teuchos::reduceAll(*A->getRowMap()->getComm(), Teuchos::REDUCE_SUM, 1, &numDropped, &numGlobalDropped);
951 // We now know the number of entries of filtered A and have the final rowptr.
952
954 // Pass 4: Create local matrix for filtered A
955 //
956 // Dropped entries are optionally lumped to the diagonal.
957
958 RCP<Matrix> filteredA;
959 RCP<LWGraph_kokkos> graph;
960 if (numGlobalDropped > 0) {
961 SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
962
963 local_matrix_type lclFilteredA;
964 if (reuseGraph) {
965 lclFilteredA = local_matrix_type("filteredA", lclA.graph, lclA.numCols());
966 } else {
967 auto colidx = entries_type("entries", nnz_filtered);
968 auto values = values_type("values", nnz_filtered);
969 lclFilteredA = local_matrix_type("filteredA",
970 lclA.numRows(), lclA.numCols(),
971 nnz_filtered,
972 values, filtered_rowptr, colidx);
973 }
974
975 local_graph_type lclGraph;
976 {
977 auto colidx = entries_type("entries", nnz_graph);
978 lclGraph = local_graph_type(colidx, graph_rowptr);
979 }
980
981 if (lumpingChoice != MueLu::MatrixConstruction::no_lumping) {
982 if (reuseGraph) {
983 auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, true>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
984 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
985 } else {
986 auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, false>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
987 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
988 }
989 } else {
990 if (reuseGraph) {
991 auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, true>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
992 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
993 } else {
994 auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, false>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
995 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
996 }
997 }
998
999 filteredA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
1000 filteredA->SetFixedBlockSize(blkSize);
1001
1002 if (reuseEigenvalue) {
1003 // Reuse max eigenvalue from A
1004 // It is unclear what eigenvalue is the best for the smoothing, but we already may have
1005 // the D^{-1}A estimate in A, may as well use it.
1006 // NOTE: ML does that too
1007 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
1008 } else {
1009 filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
1010 }
1011
1012 graph = rcp(new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1013 graph->SetBoundaryNodeMap(boundaryNodes);
1014 } else {
1015 filteredA = A;
1016 graph = rcp(new LWGraph_kokkos(mergedA->getCrsGraph()->getLocalGraphDevice(), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1017 graph->SetBoundaryNodeMap(boundaryNodes);
1018 }
1019
1020 // Construct a second graph for coloring
1021 if (generateColoringGraph) {
1022 SubFactoryMonitor mColoringGraph(*this, "Construct coloring graph", currentLevel);
1023
1024 filtered_rowptr = rowptr_type("rowptr_coloring_graph", lclA.numRows() + 1);
1025 graph_rowptr = rowptr_type("rowptr", numNodes + 1);
1026 if (localizeColoringGraph) {
1027 auto drop_offrank = Misc::DropOffRankFunctor(lclA, results);
1028 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, drop_offrank);
1029 }
1030 if (symmetrizeColoringGraph) {
1031 auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
1032 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *this, symmetrize);
1033 }
1034 auto colidx = entries_type("entries_coloring_graph", nnz_filtered);
1035 auto lclGraph = local_graph_type(colidx, filtered_rowptr);
1036 auto graphConstruction = MatrixConstruction::GraphConstruction<local_matrix_type, local_graph_type>(lclA, results, lclGraph);
1037 Kokkos::parallel_for("MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
1038
1039 auto colorGraph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "coloring graph"));
1040 Set(currentLevel, "Coloring Graph", colorGraph);
1041 }
1042
1043 LO dofsPerNode = blkSize;
1044
1045 Set(currentLevel, "DofsPerNode", dofsPerNode);
1046 Set(currentLevel, "Graph", graph);
1047 Set(currentLevel, "A", filteredA);
1048
1049 return std::make_tuple(numDropped, boundaryNodes);
1050}
1051
1052} // namespace MueLu
1053#endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Functor that serially applies sub-functors to rows.
Functor for marking nodes as Dirichlet based on rowsum.
Functor for marking nodes as Dirichlet in a block operator.
void DeclareInput(Level &currentLevel) const
Input.
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
void Build(Level &currentLevel) const
Build an object with this factory.
typename MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node >::boundary_nodes_type boundary_nodes_type
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildVector(Level &currentLevel) const
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildScalar(Level &currentLevel) const
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Lightweight MueLu representation of a compressed row storage graph.
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
Functor does not reuse the graph of the matrix for a problem with blockSize == 1.
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping,...
Functor that drops off-rank entries.
Functor that drops boundary nodes for a blockSize == 1 problem.
Functor that symmetrizes the dropping decisions.
static const NoFactory * get()
static void runDroppingFunctors_on_A(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
static void runDroppingFunctors_on_dlap(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Level &level, const Factory &factory)
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps() *100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool doLumped=false)
Extract Matrix Diagonal.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > SPAI(const RCP< Matrix > &original)
Creates a sparse approximate inverse of a matrix with the same nonzero pattern as the input matrix.
static GlobalOrdinal CountNegativeDiagonalEntries(const Matrix &A)
Counts the number of negative diagonal entries.
static void runDroppingFunctors_on_A(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
static void runDroppingFunctors_on_dlap(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Teuchos::Array< double > &dlap_weights, LocalOrdinal interleaved_blocksize, Level &level, const Factory &factory)
Namespace for MueLu classes and methods.
@ Statistics2
Print even more statistics.
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
void runBoundaryFunctors(local_matrix_type &lclA, boundary_nodes_view &boundaryNodes, Functors &... functors)
void translateOldAlgoParam(const Teuchos::ParameterList &pL, std::string &droppingMethod, bool &useBlocking, std::string &socUsesMatrix, std::string &socUsesMeasure, bool &symmetrizeDroppedGraph, bool &generateColoringGraph, magnitudeType &threshold, MueLu::MatrixConstruction::lumpingType &lumpingChoice)