MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_CoalesceDropFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP
11#define MUELU_COALESCEDROPFACTORY_DEF_HPP
12
13#include <Xpetra_CrsGraphFactory.hpp>
14#include <Xpetra_CrsGraph.hpp>
15#include <Xpetra_ImportFactory.hpp>
16#include <Xpetra_ExportFactory.hpp>
17#include <Xpetra_MapFactory.hpp>
18#include <Xpetra_Map.hpp>
19#include <Xpetra_Matrix.hpp>
20#include <Xpetra_MultiVectorFactory.hpp>
21#include <Xpetra_MultiVector.hpp>
22#include <Xpetra_StridedMap.hpp>
23#include <Xpetra_VectorFactory.hpp>
24#include <Xpetra_Vector.hpp>
25
26#include <Xpetra_IO.hpp>
27
28#include <Kokkos_NestedSort.hpp>
29#include <Kokkos_StdAlgorithms.hpp>
31
32#include "MueLu_AmalgamationFactory.hpp"
33#include "MueLu_AmalgamationInfo.hpp"
34#include "MueLu_Exceptions.hpp"
35#include "MueLu_LWGraph.hpp"
36
37#include "MueLu_Level.hpp"
38#include "MueLu_LWGraph.hpp"
39#include "MueLu_MasterList.hpp"
40#include "MueLu_Monitor.hpp"
41#include "MueLu_PreDropFunctionConstVal.hpp"
42#include "MueLu_Utilities.hpp"
43
44#ifdef HAVE_XPETRA_TPETRA
45#include "Tpetra_CrsGraphTransposer.hpp"
46#endif
47
48#include <algorithm>
49#include <cstdlib>
50#include <string>
51
52// If defined, read environment variables.
53// Should be removed once we are confident that this works.
54//#define DJS_READ_ENV_VARIABLES
55
56namespace MueLu {
57
58namespace Details {
59template <class real_type, class LO>
60struct DropTol {
61 DropTol() = default;
62 DropTol(DropTol const&) = default;
63 DropTol(DropTol&&) = default;
64
65 DropTol& operator=(DropTol const&) = default;
66 DropTol& operator=(DropTol&&) = default;
67
68 DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
69 : val{val_}
70 , diag{diag_}
71 , col{col_}
72 , drop{drop_} {}
73
74 real_type val{Teuchos::ScalarTraits<real_type>::zero()};
75 real_type diag{Teuchos::ScalarTraits<real_type>::zero()};
76 LO col{Teuchos::OrdinalTraits<LO>::invalid()};
77 bool drop{true};
78
79 // CMS: Auxillary information for debugging info
80 // real_type aux_val {Teuchos::ScalarTraits<real_type>::nan()};
81};
82} // namespace Details
83
88
89template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
91 RCP<ParameterList> validParamList = rcp(new ParameterList());
92
93#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
94 SET_VALID_ENTRY("aggregation: drop tol");
95 SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
96 SET_VALID_ENTRY("aggregation: Dirichlet threshold");
97 SET_VALID_ENTRY("aggregation: greedy Dirichlet");
98 SET_VALID_ENTRY("aggregation: row sum drop tol");
99 SET_VALID_ENTRY("aggregation: drop scheme");
100 SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
101 SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
102 SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
103
104 {
105 // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values)
106 validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical"))));
107 }
108 SET_VALID_ENTRY("aggregation: distance laplacian algo");
109 SET_VALID_ENTRY("aggregation: classical algo");
110 SET_VALID_ENTRY("aggregation: coloring: localize color graph");
111#undef SET_VALID_ENTRY
112 validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
113
114 validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
115 validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
116 validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
117 validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNUmber");
118
119 return validParamList;
120}
121
122template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
125
126template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
128 Input(currentLevel, "A");
129 Input(currentLevel, "UnAmalgamationInfo");
130
131 const ParameterList& pL = GetParameterList();
132 if (pL.get<bool>("lightweight wrap") == true) {
133 std::string algo = pL.get<std::string>("aggregation: drop scheme");
134 if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
135 Input(currentLevel, "Coordinates");
136 }
137 if (algo == "signed classical sa")
138 ;
139 else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) {
140 Input(currentLevel, "BlockNumber");
141 }
142 }
143}
144
145template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
147 FactoryMonitor m(*this, "Build", currentLevel);
148
149 typedef Teuchos::ScalarTraits<SC> STS;
150 typedef typename STS::magnitudeType real_type;
151 typedef Xpetra::MultiVector<real_type, LO, GO, NO> RealValuedMultiVector;
152 typedef Xpetra::MultiVectorFactory<real_type, LO, GO, NO> RealValuedMultiVectorFactory;
153
154 if (predrop_ != Teuchos::null)
155 GetOStream(Parameters0) << predrop_->description();
156
157 RCP<Matrix> realA = Get<RCP<Matrix>>(currentLevel, "A");
158 RCP<AmalgamationInfo> amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
159 const ParameterList& pL = GetParameterList();
160 bool doExperimentalWrap = pL.get<bool>("lightweight wrap");
161
162 GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl;
163 std::string algo = pL.get<std::string>("aggregation: drop scheme");
164 const bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
165
166 RCP<RealValuedMultiVector> Coords;
167 RCP<Matrix> A;
168
169 bool use_block_algorithm = false;
170 LO interleaved_blocksize = as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
171 bool useSignedClassicalRS = false;
172 bool useSignedClassicalSA = false;
173 bool generateColoringGraph = false;
174
175 // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice (we'll do it
176 // in the block diagonalization). So we'll clobber the rowSumTol with -1.0 in this case
177 typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
178
179 RCP<LocalOrdinalVector> ghostedBlockNumber;
180
181 if (algo == "distance laplacian") {
182 // Grab the coordinates for distance laplacian
183 Coords = Get<RCP<RealValuedMultiVector>>(currentLevel, "Coordinates");
184 A = realA;
185 } else if (algo == "signed classical sa") {
186 useSignedClassicalSA = true;
187 algo = "classical";
188 A = realA;
189 } else if (algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") {
190 useSignedClassicalRS = true;
191 // if(realA->GetFixedBlockSize() > 1) {
192 RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber");
193 // Ghost the column block numbers if we need to
194 RCP<const Import> importer = realA->getCrsGraph()->getImporter();
195 if (!importer.is_null()) {
196 SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
197 ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
198 ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
199 } else {
200 ghostedBlockNumber = BlockNumber;
201 }
202 // }
203 if (algo == "block diagonal colored signed classical")
204 generateColoringGraph = true;
205 algo = "classical";
206 A = realA;
207
208 } else if (algo == "block diagonal") {
209 // Handle the "block diagonal" filtering and then leave
210 BlockDiagonalize(currentLevel, realA, false);
211 return;
212 } else if (algo == "block diagonal classical" || algo == "block diagonal distance laplacian") {
213 // Handle the "block diagonal" filtering, and then continue onward
214 use_block_algorithm = true;
215 RCP<Matrix> filteredMatrix = BlockDiagonalize(currentLevel, realA, true);
216 if (algo == "block diagonal distance laplacian") {
217 // We now need to expand the coordinates by the interleaved blocksize
218 RCP<RealValuedMultiVector> OldCoords = Get<RCP<RealValuedMultiVector>>(currentLevel, "Coordinates");
219 if (OldCoords->getLocalLength() != realA->getLocalNumRows()) {
220 LO dim = (LO)OldCoords->getNumVectors();
221 Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(), dim);
222 for (LO k = 0; k < dim; k++) {
223 ArrayRCP<const real_type> old_vec = OldCoords->getData(k);
224 ArrayRCP<real_type> new_vec = Coords->getDataNonConst(k);
225 for (LO i = 0; i < (LO)OldCoords->getLocalLength(); i++) {
226 LO new_base = i * dim;
227 for (LO j = 0; j < interleaved_blocksize; j++)
228 new_vec[new_base + j] = old_vec[i];
229 }
230 }
231 } else {
232 Coords = OldCoords;
233 }
234 algo = "distance laplacian";
235 } else if (algo == "block diagonal classical") {
236 algo = "classical";
237 }
238 // All cases
239 A = filteredMatrix;
240 rowSumTol = -1.0;
241 } else {
242 A = realA;
243 }
244
245 // Distance Laplacian weights
246 Array<double> dlap_weights = pL.get<Array<double>>("aggregation: distance laplacian directional weights");
247 enum { NO_WEIGHTS = 0,
248 SINGLE_WEIGHTS,
249 BLOCK_WEIGHTS };
250 int use_dlap_weights = NO_WEIGHTS;
251 if (algo == "distance laplacian") {
252 LO dim = (LO)Coords->getNumVectors();
253 // If anything isn't 1.0 we need to turn on the weighting
254 bool non_unity = false;
255 for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) {
256 if (dlap_weights[i] != 1.0) {
257 non_unity = true;
258 }
259 }
260 if (non_unity) {
261 LO blocksize = use_block_algorithm ? as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize")) : 1;
262 if ((LO)dlap_weights.size() == dim)
263 use_dlap_weights = SINGLE_WEIGHTS;
264 else if ((LO)dlap_weights.size() == blocksize * dim)
265 use_dlap_weights = BLOCK_WEIGHTS;
266 else {
267 TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError,
268 "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
269 }
270 if (GetVerbLevel() & Statistics1)
271 GetOStream(Statistics1) << "Using distance laplacian weights: " << dlap_weights << std::endl;
272 }
273 }
274
275 // decide wether to use the fast-track code path for standard maps or the somewhat slower
276 // code path for non-standard maps
277 /*bool bNonStandardMaps = false;
278 if (A->IsView("stridedMaps") == true) {
279 Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
280 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
281 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
282 if (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0)
283 bNonStandardMaps = true;
284 }*/
285
286 if (doExperimentalWrap) {
287 TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm");
288 TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian" && algo != "signed classical", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian|signed classical)");
289
290 SC threshold;
291 // If we're doing the ML-style halving of the drop tol at each level, we do that here.
292 if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
293 threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
294 else
295 threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
296
297 std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
298 std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
299 real_type realThreshold = STS::magnitude(threshold); // CMS: Rename this to "magnitude threshold" sometime
300
302 // Remove this bit once we are confident that cut-based dropping works.
303#ifdef HAVE_MUELU_DEBUG
304 int distanceLaplacianCutVerbose = 0;
305#endif
306#ifdef DJS_READ_ENV_VARIABLES
307 if (getenv("MUELU_DROP_TOLERANCE_MODE")) {
308 distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE"));
309 }
310
311 if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) {
312 auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD"));
313 realThreshold = 1e-4 * tmp;
314 }
315
316#ifdef HAVE_MUELU_DEBUG
317 if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) {
318 distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE"));
319 }
320#endif
321#endif
323
324 decisionAlgoType distanceLaplacianAlgo = defaultAlgo;
325 decisionAlgoType classicalAlgo = defaultAlgo;
326 if (algo == "distance laplacian") {
327 if (distanceLaplacianAlgoStr == "default")
328 distanceLaplacianAlgo = defaultAlgo;
329 else if (distanceLaplacianAlgoStr == "unscaled cut")
330 distanceLaplacianAlgo = unscaled_cut;
331 else if (distanceLaplacianAlgoStr == "scaled cut")
332 distanceLaplacianAlgo = scaled_cut;
333 else if (distanceLaplacianAlgoStr == "scaled cut symmetric")
334 distanceLaplacianAlgo = scaled_cut_symmetric;
335 else
336 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\"");
337 GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
338 } else if (algo == "classical") {
339 if (classicalAlgoStr == "default")
340 classicalAlgo = defaultAlgo;
341 else if (classicalAlgoStr == "unscaled cut")
342 classicalAlgo = unscaled_cut;
343 else if (classicalAlgoStr == "scaled cut")
344 classicalAlgo = scaled_cut;
345 else
346 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\"");
347 GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
348
349 } else
350 GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
351
352 if (((algo == "classical") && (classicalAlgoStr.find("scaled") != std::string::npos)) || ((algo == "distance laplacian") && (distanceLaplacianAlgoStr.find("scaled") != std::string::npos)))
353 TEUCHOS_TEST_FOR_EXCEPTION(realThreshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
354
355 Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
356
357 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
358
359 // NOTE: We don't support signed classical RS or SA with cut drop at present
360 TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalRS && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical aggregation");
361 TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalSA && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical sa aggregation");
362
363 GO numDropped = 0, numTotal = 0;
364 std::string graphType = "unamalgamated"; // for description purposes only
365
366 /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
367 BlockSize is the number of storage blocks that must kept together during the amalgamation process.
368
369 Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
370
371 numPDEs = BlockSize * storageblocksize.
372
373 If numPDEs==1
374 Matrix is point storage (classical CRS storage). storageblocksize=1 and BlockSize=1
375 No other values makes sense.
376
377 If numPDEs>1
378 If matrix uses point storage, then storageblocksize=1 and BlockSize=numPDEs.
379 If matrix uses block storage, with block size of n, then storageblocksize=n, and BlockSize=numPDEs/n.
380 Thus far, only storageblocksize=numPDEs and BlockSize=1 has been tested.
381 */
382 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
383 const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
384
385 /************************** RS or SA-style Classical Dropping (and variants) **************************/
386 if (algo == "classical") {
387 if (predrop_ == null) {
388 // ap: this is a hack: had to declare predrop_ as mutable
389 predrop_ = rcp(new PreDropFunctionConstVal(threshold));
390 }
391
392 if (predrop_ != null) {
393 RCP<PreDropFunctionConstVal> predropConstVal = rcp_dynamic_cast<PreDropFunctionConstVal>(predrop_);
394 TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast,
395 "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
396 // If a user provided a predrop function, it overwrites the XML threshold parameter
397 SC newt = predropConstVal->GetThreshold();
398 if (newt != threshold) {
399 GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl;
400 threshold = newt;
401 }
402 }
403 // At this points we either have
404 // (predrop_ != null)
405 // Therefore, it is sufficient to check only threshold
406 if (BlockSize == 1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) {
407 // Case 1: scalar problem, no dropping => just use matrix graph
408 RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
409 // Detect and record rows that correspond to Dirichlet boundary conditions
410 auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
411 if (rowSumTol > 0.)
412 Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
413
414 graph->SetBoundaryNodeMap(boundaryNodes);
415 numTotal = A->getLocalNumEntries();
416
417 if (GetVerbLevel() & Statistics1) {
418 GO numLocalBoundaryNodes = 0;
419 GO numGlobalBoundaryNodes = 0;
420 for (size_t i = 0; i < boundaryNodes.size(); ++i)
421 if (boundaryNodes[i])
422 numLocalBoundaryNodes++;
423 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
424 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
425 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
426 }
427
428 Set(currentLevel, "DofsPerNode", 1);
429 Set(currentLevel, "Graph", graph);
430
431 } else if ((BlockSize == 1 && threshold != STS::zero()) ||
432 (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) ||
433 (BlockSize == 1 && useSignedClassicalRS) ||
434 (BlockSize == 1 && useSignedClassicalSA)) {
435 // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original
436 // graph's map information, e.g., whether index is local
437 // OR a matrix without a CrsGraph
438
439 // allocate space for the local graph
440 typename LWGraph::row_type::non_const_type rows("rows", A->getLocalNumRows() + 1);
441 typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
442
443 using MT = typename STS::magnitudeType;
444 RCP<Vector> ghostedDiag;
445 ArrayRCP<const SC> ghostedDiagVals;
446 ArrayRCP<const SC> negMaxOffDiagonal;
447 // RS style needs the max negative off-diagonal, SA style needs the diagonal
448 if (useSignedClassicalRS) {
449 if (ghostedBlockNumber.is_null()) {
451 negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
452 if (GetVerbLevel() & Statistics1)
453 GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl;
454 } else {
455 auto negMaxOffDiagonalVec = MueLu::Utilities<SC, LO, GO, NO>::GetMatrixMaxMinusOffDiagonal(*A, *ghostedBlockNumber);
456 negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
457 if (GetVerbLevel() & Statistics1)
458 GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl;
459 }
460 } else {
462 if (classicalAlgo == defaultAlgo) {
463 ghostedDiagVals = ghostedDiag->getData(0);
464 }
465 }
466 auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
467 if (rowSumTol > 0.) {
468 if (ghostedBlockNumber.is_null()) {
469 if (GetVerbLevel() & Statistics1)
470 GetOStream(Statistics1) << "Applying point row sum criterion." << std::endl;
471 Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
472 } else {
473 if (GetVerbLevel() & Statistics1)
474 GetOStream(Statistics1) << "Applying block row sum criterion." << std::endl;
475 Utilities::ApplyRowSumCriterionHost(*A, *ghostedBlockNumber, rowSumTol, boundaryNodes);
476 }
477 }
478
479 ArrayRCP<const LO> g_block_id;
480 if (!ghostedBlockNumber.is_null())
481 g_block_id = ghostedBlockNumber->getData(0);
482
483 LO realnnz = 0;
484 rows(0) = 0;
485 if (classicalAlgo == defaultAlgo) {
486 SubFactoryMonitor m1(*this, "Classical RS/SA", currentLevel);
487 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
488 size_t nnz = A->getNumEntriesInLocalRow(row);
489 bool rowIsDirichlet = boundaryNodes[row];
490 ArrayView<const LO> indices;
491 ArrayView<const SC> vals;
492 A->getLocalRowView(row, indices, vals);
493
494 // FIXME the current predrop function uses the following
495 // FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid )
496 // FIXME but the threshold doesn't take into account the rows' diagonal entries
497 // FIXME For now, hardwiring the dropping in here
498
499 LO rownnz = 0;
500 if (useSignedClassicalRS) {
501 // Signed classical RS style
502 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
503 LO col = indices[colID];
504 MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]);
505 MT neg_aij = -STS::real(vals[colID]);
506 /* if(row==1326) printf("A(%d,%d) = %6.4e, block = (%d,%d) neg_aij = %6.4e max_neg_aik = %6.4e\n",row,col,vals[colID],
507 g_block_id.is_null() ? -1 : g_block_id[row],
508 g_block_id.is_null() ? -1 : g_block_id[col],
509 neg_aij, max_neg_aik);*/
510 if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) {
511 columns[realnnz++] = col;
512 rownnz++;
513 } else
514 numDropped++;
515 }
516 rows(row + 1) = realnnz;
517 } else if (useSignedClassicalSA) {
518 // Signed classical SA style
519 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
520 LO col = indices[colID];
521
522 bool is_nonpositive = STS::real(vals[colID]) <= 0;
523 MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
524 MT aij = is_nonpositive ? STS::magnitude(vals[colID] * vals[colID]) : (-STS::magnitude(vals[colID] * vals[colID])); // + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0
525 /*
526 if(row==1326) printf("A(%d,%d) = %6.4e, raw_aij = %6.4e aij = %6.4e aiiajj = %6.4e\n",row,col,vals[colID],
527 vals[colID],aij, aiiajj);
528 */
529
530 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
531 columns(realnnz++) = col;
532 rownnz++;
533 } else
534 numDropped++;
535 }
536 rows[row + 1] = realnnz;
537 } else {
538 // Standard abs classical
539 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
540 LO col = indices[colID];
541 MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
542 MT aij = STS::magnitude(vals[colID] * vals[colID]); // |a_ij|^2
543
544 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
545 columns(realnnz++) = col;
546 rownnz++;
547 } else
548 numDropped++;
549 }
550 rows(row + 1) = realnnz;
551 }
552 } // end for row
553 } else {
554 /* Cut Algorithm */
555 SubFactoryMonitor m1(*this, "Cut Drop", currentLevel);
556 using ExecSpace = typename Node::execution_space;
557 using TeamPol = Kokkos::TeamPolicy<ExecSpace>;
558 using TeamMem = typename TeamPol::member_type;
559#if KOKKOS_VERSION >= 40799
560 using ATS = KokkosKernels::ArithTraits<Scalar>;
561#else
562 using ATS = Kokkos::ArithTraits<Scalar>;
563#endif
564 using impl_scalar_type = typename ATS::val_type;
565#if KOKKOS_VERSION >= 40799
566 using implATS = KokkosKernels::ArithTraits<impl_scalar_type>;
567#else
568 using implATS = Kokkos::ArithTraits<impl_scalar_type>;
569#endif
570
571 // move from host to device
572 auto ghostedDiagValsView = Kokkos::subview(ghostedDiag->getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
573 auto thresholdKokkos = static_cast<impl_scalar_type>(threshold);
574 auto realThresholdKokkos = implATS::magnitude(thresholdKokkos);
575 auto columnsDevice = Kokkos::create_mirror_view(ExecSpace(), columns);
576
577 auto A_device = A->getLocalMatrixDevice();
578 RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
579 RCP<const Import> importer = A->getCrsGraph()->getImporter();
580 RCP<LocalOrdinalVector> boundaryNodesVector = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(graph->GetDomainMap());
581 RCP<LocalOrdinalVector> boundaryColumnVector;
582 for (size_t i = 0; i < graph->GetNodeNumVertices(); i++) {
583 boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
584 }
585 if (!importer.is_null()) {
586 boundaryColumnVector = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(graph->GetImportMap());
587 boundaryColumnVector->doImport(*boundaryNodesVector, *importer, Xpetra::INSERT);
588 } else {
589 boundaryColumnVector = boundaryNodesVector;
590 }
591 auto boundaryColumn = boundaryColumnVector->getLocalViewDevice(Tpetra::Access::ReadOnly);
592 auto boundary = Kokkos::subview(boundaryColumn, Kokkos::ALL(), 0);
593
594 Kokkos::View<LO*, ExecSpace> rownnzView("rownnzView", A_device.numRows());
595 auto drop_views = Kokkos::View<bool*, ExecSpace>("drop_views", A_device.nnz());
596 auto index_views = Kokkos::View<size_t*, ExecSpace>("index_views", A_device.nnz());
597
598 Kokkos::parallel_reduce(
599 "classical_cut", TeamPol(A_device.numRows(), Kokkos::AUTO), KOKKOS_LAMBDA(const TeamMem& teamMember, LO& globalnnz, GO& totalDropped) {
600 LO row = teamMember.league_rank();
601 auto rowView = A_device.rowConst(row);
602 size_t nnz = rowView.length;
603
604 auto drop_view = Kokkos::subview(drop_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
605 auto index_view = Kokkos::subview(index_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
606
607 // find magnitudes
608 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, (LO)nnz), [&](const LO colID) {
609 index_view(colID) = colID;
610 LO col = rowView.colidx(colID);
611 // ignore diagonals for now, they are checked again later
612 // Don't aggregate boundaries
613 if (row == col || boundary(col)) {
614 drop_view(colID) = true;
615 } else {
616 drop_view(colID) = false;
617 }
618 });
619
620 size_t dropStart = nnz;
621 if (classicalAlgo == unscaled_cut) {
622 // push diagonals and boundaries to the right, sort everything else by aij on the left
623 Kokkos::Experimental::sort_team(teamMember, index_view, [=](size_t& x, size_t& y) -> bool {
624 if (drop_view(x) || drop_view(y)) {
625 return drop_view(x) < drop_view(y);
626 } else {
627 auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
628 auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
629 return x_aij > y_aij;
630 }
631 });
632
633 // find index where dropping starts
634 Kokkos::parallel_reduce(
635 Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](size_t i, size_t& min) {
636 auto const& x = index_view(i - 1);
637 auto const& y = index_view(i);
638 typename implATS::magnitudeType x_aij = 0;
639 typename implATS::magnitudeType y_aij = 0;
640 if (!drop_view(x)) {
641 x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
642 }
643 if (!drop_view(y)) {
644 y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
645 }
646
647 if (realThresholdKokkos * realThresholdKokkos * x_aij > y_aij) {
648 if (i < min) {
649 min = i;
650 }
651 }
652 },
653 Kokkos::Min<size_t>(dropStart));
654 } else if (classicalAlgo == scaled_cut) {
655 // push diagonals and boundaries to the right, sort everything else by aij/aiiajj on the left
656 Kokkos::Experimental::sort_team(teamMember, index_view, [=](size_t& x, size_t& y) -> bool {
657 if (drop_view(x) || drop_view(y)) {
658 return drop_view(x) < drop_view(y);
659 } else {
660 auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
661 auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
662 auto x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
663 auto y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
664 return (x_aij / x_aiiajj) > (y_aij / y_aiiajj);
665 }
666 });
667
668 // find index where dropping starts
669 Kokkos::parallel_reduce(
670 Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](size_t i, size_t& min) {
671 auto const& x = index_view(i - 1);
672 auto const& y = index_view(i);
673 typename implATS::magnitudeType x_val = 0;
674 typename implATS::magnitudeType y_val = 0;
675 if (!drop_view(x)) {
676 typename implATS::magnitudeType x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
677 typename implATS::magnitudeType x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
678 x_val = x_aij / x_aiiajj;
679 }
680 if (!drop_view(y)) {
681 typename implATS::magnitudeType y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
682 typename implATS::magnitudeType y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
683 y_val = y_aij / y_aiiajj;
684 }
685
686 if (realThresholdKokkos * realThresholdKokkos * x_val > y_val) {
687 if (i < min) {
688 min = i;
689 }
690 }
691 },
692 Kokkos::Min<size_t>(dropStart));
693 }
694
695 // drop everything to the right of where values stop passing threshold
696 if (dropStart < nnz) {
697 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, dropStart, nnz), [=](size_t i) {
698 drop_view(index_view(i)) = true;
699 });
700 }
701
702 LO rownnz = 0;
703 GO rowDropped = 0;
704 Kokkos::parallel_reduce(
705 Kokkos::TeamThreadRange(teamMember, nnz), [=](const size_t idxID, LO& keep, GO& drop) {
706 LO col = rowView.colidx(idxID);
707 // don't drop diagonal
708 if (row == col || !drop_view(idxID)) {
709 columnsDevice(A_device.graph.row_map(row) + idxID) = col;
710 keep++;
711 } else {
712 columnsDevice(A_device.graph.row_map(row) + idxID) = -1;
713 drop++;
714 }
715 },
716 rownnz, rowDropped);
717
718 globalnnz += rownnz;
719 totalDropped += rowDropped;
720 rownnzView(row) = rownnz;
721 },
722 realnnz, numDropped);
723
724 // update column indices so that kept indices are aligned to the left for subview that happens later on
725 Kokkos::Experimental::remove(ExecSpace(), columnsDevice, -1);
726 Kokkos::deep_copy(columns, columnsDevice);
727
728 // update row indices by adding up new # of nnz in each row
729 auto rowsDevice = Kokkos::create_mirror_view(ExecSpace(), rows);
730 Kokkos::parallel_scan(
731 Kokkos::RangePolicy<ExecSpace>(0, A_device.numRows()), KOKKOS_LAMBDA(const int i, LO& partial_sum, bool is_final) {
732 partial_sum += rownnzView(i);
733 if (is_final) rowsDevice(i + 1) = partial_sum;
734 });
735 Kokkos::deep_copy(rows, rowsDevice);
736 }
737
738 numTotal = A->getLocalNumEntries();
739
740 if (aggregationMayCreateDirichlet) {
741 // If the only element remaining after filtering is diagonal, mark node as boundary
742 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
743 if (rows[row + 1] - rows[row] <= 1)
744 boundaryNodes[row] = true;
745 }
746 }
747
748 RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(), "thresholded graph of A"));
749 graph->SetBoundaryNodeMap(boundaryNodes);
750 if (GetVerbLevel() & Statistics1) {
751 GO numLocalBoundaryNodes = 0;
752 GO numGlobalBoundaryNodes = 0;
753 for (size_t i = 0; i < boundaryNodes.size(); ++i)
754 if (boundaryNodes(i))
755 numLocalBoundaryNodes++;
756 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
757 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
758 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
759 }
760 Set(currentLevel, "Graph", graph);
761 Set(currentLevel, "DofsPerNode", 1);
762
763 // If we're doing signed classical, we might want to block-diagonalize *after* the dropping
764 if (generateColoringGraph) {
765 RCP<LWGraph> colorGraph;
766 RCP<const Import> importer = A->getCrsGraph()->getImporter();
767 BlockDiagonalizeGraph(graph, ghostedBlockNumber, colorGraph, importer);
768 Set(currentLevel, "Coloring Graph", colorGraph);
769 // #define CMS_DUMP
770#ifdef CMS_DUMP
771 {
772 Xpetra::IO<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("m_regular_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(graph)->GetCrsGraph());
773 Xpetra::IO<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("m_color_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(colorGraph)->GetCrsGraph());
774 // int rank = graph->GetDomainMap()->getComm()->getRank();
775 // {
776 // std::ofstream ofs(std::string("m_color_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
777 // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
778 // colorGraph->print(*fancy,Debug);
779 // }
780 // {
781 // std::ofstream ofs(std::string("m_regular_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
782 // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
783 // graph->print(*fancy,Debug);
784 // }
785 }
786#endif
787 } // end generateColoringGraph
788 } else if (BlockSize > 1 && threshold == STS::zero()) {
789 // Case 3: Multiple DOF/node problem without dropping
790 const RCP<const Map> rowMap = A->getRowMap();
791 const RCP<const Map> colMap = A->getColMap();
792
793 graphType = "amalgamated";
794
795 // build node row map (uniqueMap) and node column map (nonUniqueMap)
796 // the arrays rowTranslation and colTranslation contain the local node id
797 // given a local dof id. The data is calculated by the AmalgamationFactory and
798 // stored in the variable container "UnAmalgamationInfo"
799 RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
800 RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
801 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
802 Array<LO> colTranslation = *(amalInfo->getColTranslation());
803
804 // get number of local nodes
805 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
806
807 // Allocate space for the local graph
808 typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
809 typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
810
811 typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
812 Kokkos::deep_copy(amalgBoundaryNodes, false);
813
814 // Detect and record rows that correspond to Dirichlet boundary conditions
815 // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
816 // TODO the array one bigger than the number of local rows, and the last entry can
817 // TODO hold the actual number of boundary nodes. Clever, huh?
818 ArrayRCP<bool> pointBoundaryNodes;
819 pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows(*A, dirichletThreshold));
820 if (rowSumTol > 0.)
821 Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
822
823 // extract striding information
824 LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
825 LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
826 LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
827 if (A->IsView("stridedMaps") == true) {
828 Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
829 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
830 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
831 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
832 blkId = strMap->getStridedBlockId();
833 if (blkId > -1)
834 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
835 }
836
837 // loop over all local nodes
838 LO realnnz = 0;
839 rows(0) = 0;
840 Array<LO> indicesExtra;
841 for (LO row = 0; row < numRows; row++) {
842 ArrayView<const LO> indices;
843 indicesExtra.resize(0);
844
845 // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
846 // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
847 // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
848 // with local ids.
849 // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
850 // node.
851 bool isBoundary = false;
852 if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
853 for (LO j = 0; j < blkPartSize; j++) {
854 if (pointBoundaryNodes[row * blkPartSize + j]) {
855 isBoundary = true;
856 break;
857 }
858 }
859 } else {
860 isBoundary = true;
861 for (LO j = 0; j < blkPartSize; j++) {
862 if (!pointBoundaryNodes[row * blkPartSize + j]) {
863 isBoundary = false;
864 break;
865 }
866 }
867 }
868
869 // Merge rows of A
870 // The array indicesExtra contains local column node ids for the current local node "row"
871 if (!isBoundary)
872 MergeRows(*A, row, indicesExtra, colTranslation);
873 else
874 indicesExtra.push_back(row);
875 indices = indicesExtra;
876 numTotal += indices.size();
877
878 // add the local column node ids to the full columns array which
879 // contains the local column node ids for all local node rows
880 LO nnz = indices.size(), rownnz = 0;
881 for (LO colID = 0; colID < nnz; colID++) {
882 LO col = indices[colID];
883 columns(realnnz++) = col;
884 rownnz++;
885 }
886
887 if (rownnz == 1) {
888 // If the only element remaining after filtering is diagonal, mark node as boundary
889 // FIXME: this should really be replaced by the following
890 // if (indices.size() == 1 && indices[0] == row)
891 // boundaryNodes[row] = true;
892 // We do not do it this way now because there is no framework for distinguishing isolated
893 // and boundary nodes in the aggregation algorithms
894 amalgBoundaryNodes[row] = true;
895 }
896 rows(row + 1) = realnnz;
897 } // for (LO row = 0; row < numRows; row++)
898
899 RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
900 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
901
902 if (GetVerbLevel() & Statistics1) {
903 GO numLocalBoundaryNodes = 0;
904 GO numGlobalBoundaryNodes = 0;
905
906 for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
907 if (amalgBoundaryNodes(i))
908 numLocalBoundaryNodes++;
909
910 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
911 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
912 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
913 << " agglomerated Dirichlet nodes" << std::endl;
914 }
915
916 Set(currentLevel, "Graph", graph);
917 Set(currentLevel, "DofsPerNode", blkSize); // full block size
918
919 } else if (BlockSize > 1 && threshold != STS::zero()) {
920 // Case 4: Multiple DOF/node problem with dropping
921 const RCP<const Map> rowMap = A->getRowMap();
922 const RCP<const Map> colMap = A->getColMap();
923 graphType = "amalgamated";
924
925 // build node row map (uniqueMap) and node column map (nonUniqueMap)
926 // the arrays rowTranslation and colTranslation contain the local node id
927 // given a local dof id. The data is calculated by the AmalgamationFactory and
928 // stored in the variable container "UnAmalgamationInfo"
929 RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
930 RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
931 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
932 Array<LO> colTranslation = *(amalInfo->getColTranslation());
933
934 // get number of local nodes
935 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
936
937 // Allocate space for the local graph
938 typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
939 typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
940
941 typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
942 Kokkos::deep_copy(amalgBoundaryNodes, false);
943
944 // Detect and record rows that correspond to Dirichlet boundary conditions
945 // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
946 // TODO the array one bigger than the number of local rows, and the last entry can
947 // TODO hold the actual number of boundary nodes. Clever, huh?
948 auto pointBoundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
949 if (rowSumTol > 0.)
950 Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, pointBoundaryNodes);
951
952 // extract striding information
953 LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
954 LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
955 LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
956 if (A->IsView("stridedMaps") == true) {
957 Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
958 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
959 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
960 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
961 blkId = strMap->getStridedBlockId();
962 if (blkId > -1)
963 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
964 }
965
966 // extract diagonal data for dropping strategy
968 const ArrayRCP<const SC> ghostedDiagVals = ghostedDiag->getData(0);
969
970 // loop over all local nodes
971 LO realnnz = 0;
972 rows[0] = 0;
973 Array<LO> indicesExtra;
974 for (LO row = 0; row < numRows; row++) {
975 ArrayView<const LO> indices;
976 indicesExtra.resize(0);
977
978 // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
979 // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
980 // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
981 // with local ids.
982 // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
983 // node.
984 bool isBoundary = false;
985 if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
986 for (LO j = 0; j < blkPartSize; j++) {
987 if (pointBoundaryNodes[row * blkPartSize + j]) {
988 isBoundary = true;
989 break;
990 }
991 }
992 } else {
993 isBoundary = true;
994 for (LO j = 0; j < blkPartSize; j++) {
995 if (!pointBoundaryNodes[row * blkPartSize + j]) {
996 isBoundary = false;
997 break;
998 }
999 }
1000 }
1001
1002 // Merge rows of A
1003 // The array indicesExtra contains local column node ids for the current local node "row"
1004 if (!isBoundary)
1005 MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation);
1006 else
1007 indicesExtra.push_back(row);
1008 indices = indicesExtra;
1009 numTotal += indices.size();
1010
1011 // add the local column node ids to the full columns array which
1012 // contains the local column node ids for all local node rows
1013 LO nnz = indices.size(), rownnz = 0;
1014 for (LO colID = 0; colID < nnz; colID++) {
1015 LO col = indices[colID];
1016 columns[realnnz++] = col;
1017 rownnz++;
1018 }
1019
1020 if (rownnz == 1) {
1021 // If the only element remaining after filtering is diagonal, mark node as boundary
1022 // FIXME: this should really be replaced by the following
1023 // if (indices.size() == 1 && indices[0] == row)
1024 // boundaryNodes[row] = true;
1025 // We do not do it this way now because there is no framework for distinguishing isolated
1026 // and boundary nodes in the aggregation algorithms
1027 amalgBoundaryNodes[row] = true;
1028 }
1029 rows[row + 1] = realnnz;
1030 } // for (LO row = 0; row < numRows; row++)
1031 // columns.resize(realnnz);
1032
1033 RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1034 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1035
1036 if (GetVerbLevel() & Statistics1) {
1037 GO numLocalBoundaryNodes = 0;
1038 GO numGlobalBoundaryNodes = 0;
1039
1040 for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1041 if (amalgBoundaryNodes(i))
1042 numLocalBoundaryNodes++;
1043
1044 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1045 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1046 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
1047 << " agglomerated Dirichlet nodes" << std::endl;
1048 }
1049
1050 Set(currentLevel, "Graph", graph);
1051 Set(currentLevel, "DofsPerNode", blkSize); // full block size
1052 }
1053
1054 } else if (algo == "distance laplacian") {
1055 LO blkSize = A->GetFixedBlockSize();
1056 GO indexBase = A->getRowMap()->getIndexBase();
1057 // [*0*] : FIXME
1058 // ap: somehow, if I move this line to [*1*], Belos throws an error
1059 // I'm not sure what's going on. Do we always have to Get data, if we did
1060 // DeclareInput for it?
1061 // RCP<RealValuedMultiVector> Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
1062
1063 // Detect and record rows that correspond to Dirichlet boundary conditions
1064 // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
1065 // TODO the array one bigger than the number of local rows, and the last entry can
1066 // TODO hold the actual number of boundary nodes. Clever, huh?
1067 auto pointBoundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
1068 if (rowSumTol > 0.)
1069 Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, pointBoundaryNodes);
1070
1071 if ((blkSize == 1) && (threshold == STS::zero())) {
1072 // Trivial case: scalar problem, no dropping. Can return original graph
1073 RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
1074 graph->SetBoundaryNodeMap(pointBoundaryNodes);
1075 graphType = "unamalgamated";
1076 numTotal = A->getLocalNumEntries();
1077
1078 if (GetVerbLevel() & Statistics1) {
1079 GO numLocalBoundaryNodes = 0;
1080 GO numGlobalBoundaryNodes = 0;
1081 for (size_t i = 0; i < pointBoundaryNodes.size(); ++i)
1082 if (pointBoundaryNodes(i))
1083 numLocalBoundaryNodes++;
1084 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1085 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1086 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1087 }
1088
1089 Set(currentLevel, "DofsPerNode", blkSize);
1090 Set(currentLevel, "Graph", graph);
1091
1092 } else {
1093 // ap: We make quite a few assumptions here; general case may be a lot different,
1094 // but much much harder to implement. We assume that:
1095 // 1) all maps are standard maps, not strided maps
1096 // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic
1097 // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i
1098 //
1099 // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo,
1100 // but as I totally don't understand that code, here is my solution
1101
1102 // [*1*]: see [*0*]
1103
1104 // Check that the number of local coordinates is consistent with the #rows in A
1105 TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() / blkSize != Coords->getLocalLength(), Exceptions::Incompatible,
1106 "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() << ") by modulo block size (" << blkSize << ").");
1107
1108 const RCP<const Map> colMap = A->getColMap();
1109 RCP<const Map> uniqueMap, nonUniqueMap;
1110 Array<LO> colTranslation;
1111 if (blkSize == 1) {
1112 uniqueMap = A->getRowMap();
1113 nonUniqueMap = A->getColMap();
1114 graphType = "unamalgamated";
1115
1116 } else {
1117 uniqueMap = Coords->getMap();
1118 TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible,
1119 "Different index bases for matrix and coordinates");
1120
1121 AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation);
1122
1123 graphType = "amalgamated";
1124 }
1125 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
1126
1127 RCP<RealValuedMultiVector> ghostedCoords;
1128 RCP<Vector> ghostedLaplDiag;
1129 Teuchos::ArrayRCP<SC> ghostedLaplDiagData;
1130 if (threshold != STS::zero()) {
1131 // Get ghost coordinates
1132 RCP<const Import> importer;
1133 {
1134 SubFactoryMonitor m1(*this, "Import construction", currentLevel);
1135 if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) {
1136 GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl;
1137 importer = realA->getCrsGraph()->getImporter();
1138 } else {
1139 GetOStream(Warnings0) << "Constructing new importer instance" << std::endl;
1140 importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
1141 }
1142 } // subtimer
1143 ghostedCoords = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(nonUniqueMap, Coords->getNumVectors());
1144 {
1145 SubFactoryMonitor m1(*this, "Coordinate import", currentLevel);
1146 ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT);
1147 } // subtimer
1148
1149 // Construct Distance Laplacian diagonal
1150 RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
1151 Array<LO> indicesExtra;
1152 Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1153 if (threshold != STS::zero()) {
1154 const size_t numVectors = ghostedCoords->getNumVectors();
1155 coordData.reserve(numVectors);
1156 for (size_t j = 0; j < numVectors; j++) {
1157 Teuchos::ArrayRCP<const real_type> tmpData = ghostedCoords->getData(j);
1158 coordData.push_back(tmpData);
1159 }
1160 }
1161 {
1162 SubFactoryMonitor m1(*this, "Laplacian local diagonal", currentLevel);
1163 ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
1164 for (LO row = 0; row < numRows; row++) {
1165 ArrayView<const LO> indices;
1166
1167 if (blkSize == 1) {
1168 ArrayView<const SC> vals;
1169 A->getLocalRowView(row, indices, vals);
1170
1171 } else {
1172 // Merge rows of A
1173 indicesExtra.resize(0);
1174 MergeRows(*A, row, indicesExtra, colTranslation);
1175 indices = indicesExtra;
1176 }
1177
1178 LO nnz = indices.size();
1179 bool haveAddedToDiag = false;
1180 for (LO colID = 0; colID < nnz; colID++) {
1181 const LO col = indices[colID];
1182
1183 if (row != col) {
1184 if (use_dlap_weights == SINGLE_WEIGHTS) {
1185 /*printf("[%d,%d] Unweighted Distance = %6.4e Weighted Distance = %6.4e\n",row,col,
1186 MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col),
1187 MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col));*/
1188 localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1189 } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1190 int block_id = row % interleaved_blocksize;
1191 int block_start = block_id * interleaved_blocksize;
1192 localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1193 } else {
1194 // printf("[%d,%d] Unweighted Distance = %6.4e\n",row,col,MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col));
1195 localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1196 }
1197 haveAddedToDiag = true;
1198 }
1199 }
1200 // Deal with the situation where boundary conditions have only been enforced on rows, but not on columns.
1201 // We enforce dropping of these entries by assigning a very large number to the diagonal entries corresponding to BCs.
1202 if (!haveAddedToDiag)
1203 localLaplDiagData[row] = STS::squareroot(STS::rmax());
1204 }
1205 } // subtimer
1206 {
1207 SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", currentLevel);
1208 ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
1209 ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT);
1210 ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
1211 } // subtimer
1212
1213 } else {
1214 GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl;
1215 }
1216
1217 // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian
1218
1219 // allocate space for the local graph
1220 typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
1221 typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
1222
1223#ifdef HAVE_MUELU_DEBUG
1224 // DEBUGGING
1225 for (LO i = 0; i < (LO)columns.size(); i++) columns[i] = -666;
1226#endif
1227
1228 // Extra array for if we're allowing symmetrization with cutting
1229 ArrayRCP<LO> rows_stop;
1230 bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo == scaled_cut_symmetric;
1231 if (use_stop_array)
1232 // rows_stop = typename LWGraph::row_type::non_const_type("rows_stop", numRows);
1233 rows_stop.resize(numRows);
1234
1235 typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
1236 Kokkos::deep_copy(amalgBoundaryNodes, false);
1237
1238 LO realnnz = 0;
1239 rows(0) = 0;
1240
1241 Array<LO> indicesExtra;
1242 {
1243 SubFactoryMonitor m1(*this, "Laplacian dropping", currentLevel);
1244 Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1245 if (threshold != STS::zero()) {
1246 const size_t numVectors = ghostedCoords->getNumVectors();
1247 coordData.reserve(numVectors);
1248 for (size_t j = 0; j < numVectors; j++) {
1249 Teuchos::ArrayRCP<const real_type> tmpData = ghostedCoords->getData(j);
1250 coordData.push_back(tmpData);
1251 }
1252 }
1253
1254 ArrayView<const SC> vals; // CMS hackery
1255 for (LO row = 0; row < numRows; row++) {
1256 ArrayView<const LO> indices;
1257 indicesExtra.resize(0);
1258 bool isBoundary = false;
1259
1260 if (blkSize == 1) {
1261 // ArrayView<const SC> vals;//CMS uncomment
1262 A->getLocalRowView(row, indices, vals);
1263 isBoundary = pointBoundaryNodes[row];
1264 } else {
1265 // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
1266 isBoundary = true;
1267 for (LO j = 0; j < blkSize; j++) {
1268 if (!pointBoundaryNodes[row * blkSize + j]) {
1269 isBoundary = false;
1270 break;
1271 }
1272 }
1273
1274 // Merge rows of A
1275 if (!isBoundary)
1276 MergeRows(*A, row, indicesExtra, colTranslation);
1277 else
1278 indicesExtra.push_back(row);
1279 indices = indicesExtra;
1280 }
1281 numTotal += indices.size();
1282
1283 LO nnz = indices.size(), rownnz = 0;
1284
1285 if (use_stop_array) {
1286 rows(row + 1) = rows(row) + nnz;
1287 realnnz = rows(row);
1288 }
1289
1290 if (threshold != STS::zero()) {
1291 // default
1292 if (distanceLaplacianAlgo == defaultAlgo) {
1293 /* Standard Distance Laplacian */
1294 for (LO colID = 0; colID < nnz; colID++) {
1295 LO col = indices[colID];
1296
1297 if (row == col) {
1298 columns(realnnz++) = col;
1299 rownnz++;
1300 continue;
1301 }
1302
1303 // We do not want the distance Laplacian aggregating boundary nodes
1304 if (isBoundary) continue;
1305
1306 SC laplVal;
1307 if (use_dlap_weights == SINGLE_WEIGHTS) {
1308 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1309 } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1310 int block_id = row % interleaved_blocksize;
1311 int block_start = block_id * interleaved_blocksize;
1312 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1313 } else {
1314 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1315 }
1316 real_type aiiajj = STS::magnitude(realThreshold * realThreshold * ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1317 real_type aij = STS::magnitude(laplVal * laplVal);
1318
1319 if (aij > aiiajj) {
1320 columns(realnnz++) = col;
1321 rownnz++;
1322 } else {
1323 numDropped++;
1324 }
1325 }
1326 } else {
1327 /* Cut Algorithm */
1328 using DropTol = Details::DropTol<real_type, LO>;
1329 std::vector<DropTol> drop_vec;
1330 drop_vec.reserve(nnz);
1331 const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
1332 const real_type one = Teuchos::ScalarTraits<real_type>::one();
1333
1334 // find magnitudes
1335 for (LO colID = 0; colID < nnz; colID++) {
1336 LO col = indices[colID];
1337
1338 if (row == col) {
1339 drop_vec.emplace_back(zero, one, colID, false);
1340 continue;
1341 }
1342 // We do not want the distance Laplacian aggregating boundary nodes
1343 if (isBoundary) continue;
1344
1345 SC laplVal;
1346 if (use_dlap_weights == SINGLE_WEIGHTS) {
1347 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1348 } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1349 int block_id = row % interleaved_blocksize;
1350 int block_start = block_id * interleaved_blocksize;
1351 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1352 } else {
1353 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1354 }
1355
1356 real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1357 real_type aij = STS::magnitude(laplVal * laplVal);
1358
1359 drop_vec.emplace_back(aij, aiiajj, colID, false);
1360 }
1361
1362 const size_t n = drop_vec.size();
1363
1364 if (distanceLaplacianAlgo == unscaled_cut) {
1365 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1366 return a.val > b.val;
1367 });
1368
1369 bool drop = false;
1370 for (size_t i = 1; i < n; ++i) {
1371 if (!drop) {
1372 auto const& x = drop_vec[i - 1];
1373 auto const& y = drop_vec[i];
1374 auto a = x.val;
1375 auto b = y.val;
1376 if (realThreshold * realThreshold * a > b) {
1377 drop = true;
1378#ifdef HAVE_MUELU_DEBUG
1379 if (distanceLaplacianCutVerbose) {
1380 std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
1381 }
1382#endif
1383 }
1384 }
1385 drop_vec[i].drop = drop;
1386 }
1387 } else if (distanceLaplacianAlgo == scaled_cut || distanceLaplacianAlgo == scaled_cut_symmetric) {
1388 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1389 return a.val / a.diag > b.val / b.diag;
1390 });
1391
1392 bool drop = false;
1393 for (size_t i = 1; i < n; ++i) {
1394 if (!drop) {
1395 auto const& x = drop_vec[i - 1];
1396 auto const& y = drop_vec[i];
1397 auto a = x.val / x.diag;
1398 auto b = y.val / y.diag;
1399 if (realThreshold * realThreshold * a > b) {
1400 drop = true;
1401#ifdef HAVE_MUELU_DEBUG
1402 if (distanceLaplacianCutVerbose) {
1403 std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
1404 }
1405#endif
1406 }
1407 }
1408 drop_vec[i].drop = drop;
1409 }
1410 }
1411
1412 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1413 return a.col < b.col;
1414 });
1415
1416 for (LO idxID = 0; idxID < (LO)drop_vec.size(); idxID++) {
1417 LO col = indices[drop_vec[idxID].col];
1418
1419 // don't drop diagonal
1420 if (row == col) {
1421 columns(realnnz++) = col;
1422 rownnz++;
1423 // printf("(%d,%d) KEEP %13s matrix = %6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val);
1424 continue;
1425 }
1426
1427 if (!drop_vec[idxID].drop) {
1428 columns(realnnz++) = col;
1429 // printf("(%d,%d) KEEP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1430 rownnz++;
1431 } else {
1432 // printf("(%d,%d) DROP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1433 numDropped++;
1434 }
1435 }
1436 }
1437 } else {
1438 // Skip laplace calculation and threshold comparison for zero threshold
1439 for (LO colID = 0; colID < nnz; colID++) {
1440 LO col = indices[colID];
1441 columns(realnnz++) = col;
1442 rownnz++;
1443 }
1444 }
1445
1446 if (rownnz == 1) {
1447 // If the only element remaining after filtering is diagonal, mark node as boundary
1448 // FIXME: this should really be replaced by the following
1449 // if (indices.size() == 1 && indices[0] == row)
1450 // boundaryNodes[row] = true;
1451 // We do not do it this way now because there is no framework for distinguishing isolated
1452 // and boundary nodes in the aggregation algorithms
1453 amalgBoundaryNodes[row] = true;
1454 }
1455
1456 if (use_stop_array)
1457 rows_stop[row] = rownnz + rows[row];
1458 else
1459 rows[row + 1] = realnnz;
1460 } // for (LO row = 0; row < numRows; row++)
1461
1462 } // subtimer
1463
1464 if (use_stop_array) {
1465 // Do symmetrization of the cut matrix
1466 // NOTE: We assume nested row/column maps here
1467 for (LO row = 0; row < numRows; row++) {
1468 for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) {
1469 LO col = columns[colidx];
1470 if (col >= numRows) continue;
1471
1472 bool found = false;
1473 for (LO t_col = rows(col); !found && t_col < rows_stop[col]; t_col++) {
1474 if (columns[t_col] == row)
1475 found = true;
1476 }
1477 // We didn't find the transpose buddy, so let's symmetrize, unless we'd be symmetrizing
1478 // into a Dirichlet unknown. In that case don't.
1479 if (!found && !pointBoundaryNodes[col] && Teuchos::as<typename LWGraph::row_type::value_type>(rows_stop[col]) < rows[col + 1]) {
1480 LO new_idx = rows_stop[col];
1481 // printf("(%d,%d) SYMADD entry\n",col,row);
1482 columns[new_idx] = row;
1483 rows_stop[col]++;
1484 numDropped--;
1485 }
1486 }
1487 }
1488
1489 // Condense everything down
1490 LO current_start = 0;
1491 for (LO row = 0; row < numRows; row++) {
1492 LO old_start = current_start;
1493 for (LO col = rows(row); col < rows_stop[row]; col++) {
1494 if (current_start != col) {
1495 columns(current_start) = columns(col);
1496 }
1497 current_start++;
1498 }
1499 rows[row] = old_start;
1500 }
1501 rows(numRows) = realnnz = current_start;
1502 }
1503
1504 RCP<LWGraph> graph;
1505 {
1506 SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel);
1507 graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1508 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1509 } // subtimer
1510
1511 if (GetVerbLevel() & Statistics1) {
1512 GO numLocalBoundaryNodes = 0;
1513 GO numGlobalBoundaryNodes = 0;
1514
1515 for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1516 if (amalgBoundaryNodes(i))
1517 numLocalBoundaryNodes++;
1518
1519 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1520 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1521 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes"
1522 << " using threshold " << dirichletThreshold << std::endl;
1523 }
1524
1525 Set(currentLevel, "Graph", graph);
1526 Set(currentLevel, "DofsPerNode", blkSize);
1527 }
1528 }
1529
1530 if (GetVerbLevel() & Statistics1) {
1531 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1532 GO numGlobalTotal, numGlobalDropped;
1533 MueLu_sumAll(comm, numTotal, numGlobalTotal);
1534 MueLu_sumAll(comm, numDropped, numGlobalDropped);
1535 GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1536 if (numGlobalTotal != 0)
1537 GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1538 GetOStream(Statistics1) << std::endl;
1539 }
1540
1541 } else {
1542 // what Tobias has implemented
1543
1544 SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
1545 // GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1546 GetOStream(Runtime0) << "algorithm = \""
1547 << "failsafe"
1548 << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1549 Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
1550
1551 RCP<const Map> rowMap = A->getRowMap();
1552 RCP<const Map> colMap = A->getColMap();
1553
1554 LO blockdim = 1; // block dim for fixed size blocks
1555 GO indexBase = rowMap->getIndexBase(); // index base of maps
1556 GO offset = 0;
1557
1558 // 1) check for blocking/striding information
1559 if (A->IsView("stridedMaps") &&
1560 Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap("stridedMaps")) != Teuchos::null) {
1561 Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!)
1562 RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap());
1563 TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null, Exceptions::BadCast, "MueLu::CoalesceFactory::Build: cast to strided row map failed.");
1564 blockdim = strMap->getFixedBlockSize();
1565 offset = strMap->getOffset();
1566 oldView = A->SwitchToView(oldView);
1567 GetOStream(Statistics1) << "CoalesceDropFactory::Build():"
1568 << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl;
1569 } else
1570 GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1571
1572 // 2) get row map for amalgamated matrix (graph of A)
1573 // with same distribution over all procs as row map of A
1574 RCP<const Map> nodeMap = amalInfo->getNodeRowMap();
1575 GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl;
1576
1577 // 3) create graph of amalgamated matrix
1578 RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries() * blockdim);
1579
1580 LO numRows = A->getRowMap()->getLocalNumElements();
1581 LO numNodes = nodeMap->getLocalNumElements();
1582 typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numNodes);
1583 Kokkos::deep_copy(amalgBoundaryNodes, false);
1584 const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node
1585 bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid
1586
1587 // 4) do amalgamation. generate graph of amalgamated matrix
1588 // Note, this code is much more inefficient than the leightwight implementation
1589 // Most of the work has already been done in the AmalgamationFactory
1590 for (LO row = 0; row < numRows; row++) {
1591 // get global DOF id
1592 GO grid = rowMap->getGlobalElement(row);
1593
1594 // reinitialize boolean helper variable
1595 bIsDiagonalEntry = false;
1596
1597 // translate grid to nodeid
1598 GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase);
1599
1600 size_t nnz = A->getNumEntriesInLocalRow(row);
1601 Teuchos::ArrayView<const LO> indices;
1602 Teuchos::ArrayView<const SC> vals;
1603 A->getLocalRowView(row, indices, vals);
1604
1605 RCP<std::vector<GO>> cnodeIds = Teuchos::rcp(new std::vector<GO>); // global column block ids
1606 LO realnnz = 0;
1607 for (LO col = 0; col < Teuchos::as<LO>(nnz); col++) {
1608 GO gcid = colMap->getGlobalElement(indices[col]); // global column id
1609
1610 if (vals[col] != STS::zero()) {
1611 GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase);
1612 cnodeIds->push_back(cnodeId);
1613 realnnz++; // increment number of nnz in matrix row
1614 if (grid == gcid) bIsDiagonalEntry = true;
1615 }
1616 }
1617
1618 if (realnnz == 1 && bIsDiagonalEntry == true) {
1619 LO lNodeId = nodeMap->getLocalElement(nodeId);
1620 numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId
1621 if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes
1622 amalgBoundaryNodes[lNodeId] = true;
1623 }
1624
1625 Teuchos::ArrayRCP<GO> arr_cnodeIds = Teuchos::arcp(cnodeIds);
1626
1627 if (arr_cnodeIds.size() > 0)
1628 crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1629 }
1630 // fill matrix graph
1631 crsGraph->fillComplete(nodeMap, nodeMap);
1632
1633 // 5) create MueLu Graph object
1634 RCP<LWGraph> graph = rcp(new LWGraph(crsGraph, "amalgamated graph of A"));
1635
1636 // Detect and record rows that correspond to Dirichlet boundary conditions
1637 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1638
1639 if (GetVerbLevel() & Statistics1) {
1640 GO numLocalBoundaryNodes = 0;
1641 GO numGlobalBoundaryNodes = 0;
1642 for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1643 if (amalgBoundaryNodes(i))
1644 numLocalBoundaryNodes++;
1645 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1646 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1647 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1648 }
1649
1650 // 6) store results in Level
1651 // graph->SetBoundaryNodeMap(gBoundaryNodeMap);
1652 Set(currentLevel, "DofsPerNode", blockdim);
1653 Set(currentLevel, "Graph", graph);
1654
1655 } // if (doExperimentalWrap) ... else ...
1656
1657} // Build
1658
1659template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1660void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRows(const Matrix& A, const LO row, Array<LO>& cols, const Array<LO>& translation) const {
1661 typedef typename ArrayView<const LO>::size_type size_type;
1662
1663 // extract striding information
1664 LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1665 if (A.IsView("stridedMaps") == true) {
1666 Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1667 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1668 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1669 if (strMap->getStridedBlockId() > -1)
1670 blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1671 }
1672
1673 // count nonzero entries in all dof rows associated with node row
1674 size_t nnz = 0, pos = 0;
1675 for (LO j = 0; j < blkSize; j++)
1676 nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1677
1678 if (nnz == 0) {
1679 cols.resize(0);
1680 return;
1681 }
1682
1683 cols.resize(nnz);
1684
1685 // loop over all local dof rows associated with local node "row"
1686 ArrayView<const LO> inds;
1687 ArrayView<const SC> vals;
1688 for (LO j = 0; j < blkSize; j++) {
1689 A.getLocalRowView(row * blkSize + j, inds, vals);
1690 size_type numIndices = inds.size();
1691
1692 if (numIndices == 0) // skip empty dof rows
1693 continue;
1694
1695 // cols: stores all local node ids for current local node id "row"
1696 cols[pos++] = translation[inds[0]];
1697 for (size_type k = 1; k < numIndices; k++) {
1698 LO nodeID = translation[inds[k]];
1699 // Here we try to speed up the process by reducing the size of an array
1700 // to sort. This works if the column nonzeros belonging to the same
1701 // node are stored consequently.
1702 if (nodeID != cols[pos - 1])
1703 cols[pos++] = nodeID;
1704 }
1705 }
1706 cols.resize(pos);
1707 nnz = pos;
1708
1709 // Sort and remove duplicates
1710 std::sort(cols.begin(), cols.end());
1711 pos = 0;
1712 for (size_t j = 1; j < nnz; j++)
1713 if (cols[j] != cols[pos])
1714 cols[++pos] = cols[j];
1715 cols.resize(pos + 1);
1716}
1717
1718template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1719void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP<const SC>& ghostedDiagVals, SC threshold, Array<LO>& cols, const Array<LO>& translation) const {
1720 typedef typename ArrayView<const LO>::size_type size_type;
1721 typedef Teuchos::ScalarTraits<SC> STS;
1722
1723 // extract striding information
1724 LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1725 if (A.IsView("stridedMaps") == true) {
1726 Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1727 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1728 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1729 if (strMap->getStridedBlockId() > -1)
1730 blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1731 }
1732
1733 // count nonzero entries in all dof rows associated with node row
1734 size_t nnz = 0, pos = 0;
1735 for (LO j = 0; j < blkSize; j++)
1736 nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1737
1738 if (nnz == 0) {
1739 cols.resize(0);
1740 return;
1741 }
1742
1743 cols.resize(nnz);
1744
1745 // loop over all local dof rows associated with local node "row"
1746 ArrayView<const LO> inds;
1747 ArrayView<const SC> vals;
1748 for (LO j = 0; j < blkSize; j++) {
1749 A.getLocalRowView(row * blkSize + j, inds, vals);
1750 size_type numIndices = inds.size();
1751
1752 if (numIndices == 0) // skip empty dof rows
1753 continue;
1754
1755 // cols: stores all local node ids for current local node id "row"
1756 LO prevNodeID = -1;
1757 for (size_type k = 0; k < numIndices; k++) {
1758 LO dofID = inds[k];
1759 LO nodeID = translation[inds[k]];
1760
1761 // we avoid a square root by using squared values
1762 typename STS::magnitudeType aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[dofID] * ghostedDiagVals[row * blkSize + j]); // eps^2 * |a_ii| * |a_jj|
1763 typename STS::magnitudeType aij = STS::magnitude(vals[k] * vals[k]);
1764
1765 // check dropping criterion
1766 if (aij > aiiajj || (row * blkSize + j == dofID)) {
1767 // accept entry in graph
1768
1769 // Here we try to speed up the process by reducing the size of an array
1770 // to sort. This works if the column nonzeros belonging to the same
1771 // node are stored consequently.
1772 if (nodeID != prevNodeID) {
1773 cols[pos++] = nodeID;
1774 prevNodeID = nodeID;
1775 }
1776 }
1777 }
1778 }
1779 cols.resize(pos);
1780 nnz = pos;
1781
1782 // Sort and remove duplicates
1783 std::sort(cols.begin(), cols.end());
1784 pos = 0;
1785 for (size_t j = 1; j < nnz; j++)
1786 if (cols[j] != cols[pos])
1787 cols[++pos] = cols[j];
1788 cols.resize(pos + 1);
1789
1790 return;
1791}
1792
1793template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1794Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BlockDiagonalize(Level& currentLevel, const RCP<Matrix>& A, bool generate_matrix) const {
1795 typedef Teuchos::ScalarTraits<SC> STS;
1796
1797 const ParameterList& pL = GetParameterList();
1798 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
1799 const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
1800
1801 RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber");
1802 RCP<LocalOrdinalVector> ghostedBlockNumber;
1803 GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)" << std::endl;
1804
1805 // Ghost the column block numbers if we need to
1806 RCP<const Import> importer = A->getCrsGraph()->getImporter();
1807 if (!importer.is_null()) {
1808 SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
1809 ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
1810 ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
1811 } else {
1812 ghostedBlockNumber = BlockNumber;
1813 }
1814
1815 // Accessors for block numbers
1816 Teuchos::ArrayRCP<const LO> row_block_number = BlockNumber->getData(0);
1817 Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1818
1819 // allocate space for the local graph
1820 typename CrsMatrix::local_matrix_device_type::row_map_type::host_mirror_type::non_const_type rows_mat;
1821 typename LWGraph::row_type::non_const_type rows_graph;
1822 typename LWGraph::entries_type::non_const_type columns;
1823 typename CrsMatrix::local_matrix_device_type::values_type::host_mirror_type::non_const_type values;
1824 RCP<CrsMatrixWrap> crs_matrix_wrap;
1825
1826 if (generate_matrix) {
1827 crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0));
1828 rows_mat = typename CrsMatrix::local_matrix_device_type::row_map_type::host_mirror_type::non_const_type("rows_mat", A->getLocalNumRows() + 1);
1829 } else {
1830 rows_graph = typename LWGraph::row_type::non_const_type("rows_graph", A->getLocalNumRows() + 1);
1831 }
1832 columns = typename LWGraph::entries_type::non_const_type("columns", A->getLocalNumEntries());
1833 values = typename CrsMatrix::local_matrix_device_type::values_type::host_mirror_type::non_const_type("values", A->getLocalNumEntries());
1834
1835 LO realnnz = 0;
1836 GO numDropped = 0, numTotal = 0;
1837 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
1838 LO row_block = row_block_number[row];
1839 size_t nnz = A->getNumEntriesInLocalRow(row);
1840 ArrayView<const LO> indices;
1841 ArrayView<const SC> vals;
1842 A->getLocalRowView(row, indices, vals);
1843
1844 LO rownnz = 0;
1845 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
1846 LO col = indices[colID];
1847 LO col_block = col_block_number[col];
1848
1849 if (row_block == col_block) {
1850 if (generate_matrix) values[realnnz] = vals[colID];
1851 columns[realnnz++] = col;
1852 rownnz++;
1853 } else
1854 numDropped++;
1855 }
1856 if (generate_matrix)
1857 rows_mat[row + 1] = realnnz;
1858 else
1859 rows_graph[row + 1] = realnnz;
1860 }
1861
1862 auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
1863 if (rowSumTol > 0.)
1864 Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
1865
1866 numTotal = A->getLocalNumEntries();
1867
1868 if (GetVerbLevel() & Statistics1) {
1869 GO numLocalBoundaryNodes = 0;
1870 GO numGlobalBoundaryNodes = 0;
1871 for (size_t i = 0; i < boundaryNodes.size(); ++i)
1872 if (boundaryNodes(i))
1873 numLocalBoundaryNodes++;
1874 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1875 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1876 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1877
1878 GO numGlobalTotal, numGlobalDropped;
1879 MueLu_sumAll(comm, numTotal, numGlobalTotal);
1880 MueLu_sumAll(comm, numDropped, numGlobalDropped);
1881 GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1882 if (numGlobalTotal != 0)
1883 GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1884 GetOStream(Statistics1) << std::endl;
1885 }
1886
1887 Set(currentLevel, "Filtering", true);
1888
1889 if (generate_matrix) {
1890 // NOTE: Trying to use A's Import/Export objects will cause the code to segfault back in Build() with errors on the Import
1891 // if you're using Epetra. I'm not really sure why. By using the Col==Domain and Row==Range maps, we get null Import/Export objects
1892 // here, which is legit, because we never use them anyway.
1893 if constexpr (std::is_same<typename LWGraph::row_type,
1894 typename CrsMatrix::local_matrix_device_type::row_map_type>::value) {
1895 crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat, columns, values);
1896 } else {
1897 auto rows_mat2 = typename CrsMatrix::local_matrix_device_type::row_map_type::non_const_type("rows_mat2", rows_mat.extent(0));
1898 Kokkos::deep_copy(rows_mat2, rows_mat);
1899 auto columns2 = typename CrsMatrix::local_graph_device_type::entries_type::non_const_type("columns2", columns.extent(0));
1900 Kokkos::deep_copy(columns2, columns);
1901 auto values2 = typename CrsMatrix::local_matrix_device_type::values_type::non_const_type("values2", values.extent(0));
1902 Kokkos::deep_copy(values2, values);
1903 crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat2, columns2, values2);
1904 }
1905 crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap());
1906 } else {
1907 RCP<LWGraph> graph = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(), "block-diagonalized graph of A"));
1908 graph->SetBoundaryNodeMap(boundaryNodes);
1909 Set(currentLevel, "Graph", graph);
1910 }
1911
1912 Set(currentLevel, "DofsPerNode", 1);
1913 return crs_matrix_wrap;
1914}
1915
1916template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1917void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BlockDiagonalizeGraph(const RCP<LWGraph>& inputGraph, const RCP<LocalOrdinalVector>& ghostedBlockNumber, RCP<LWGraph>& outputGraph, RCP<const Import>& importer) const {
1918 TEUCHOS_TEST_FOR_EXCEPTION(ghostedBlockNumber.is_null(), Exceptions::RuntimeError, "BlockDiagonalizeGraph(): ghostedBlockNumber is null.");
1919 const ParameterList& pL = GetParameterList();
1920
1921 const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
1922
1923 GetOStream(Statistics1) << "Using BlockDiagonal Graph after Dropping (with provided blocking)";
1924 if (localizeColoringGraph)
1925 GetOStream(Statistics1) << ", with localization" << std::endl;
1926 else
1927 GetOStream(Statistics1) << ", without localization" << std::endl;
1928
1929 // Accessors for block numbers
1930 Teuchos::ArrayRCP<const LO> row_block_number = ghostedBlockNumber->getData(0);
1931 Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1932
1933 // allocate space for the local graph
1934 ArrayRCP<size_t> rows_mat;
1935 typename LWGraph::row_type::non_const_type rows_graph("rows_graph", inputGraph->GetNodeNumVertices() + 1);
1936 typename LWGraph::entries_type::non_const_type columns("columns", inputGraph->GetNodeNumEdges());
1937
1938 LO realnnz = 0;
1939 GO numDropped = 0, numTotal = 0;
1940 const LO numRows = Teuchos::as<LO>(inputGraph->GetDomainMap()->getLocalNumElements());
1941 if (localizeColoringGraph) {
1942 for (LO row = 0; row < numRows; ++row) {
1943 LO row_block = row_block_number[row];
1944 auto indices = inputGraph->getNeighborVertices(row);
1945
1946 LO rownnz = 0;
1947 for (LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1948 LO col = indices(colID);
1949 LO col_block = col_block_number[col];
1950
1951 if ((row_block == col_block) && (col < numRows)) {
1952 columns(realnnz++) = col;
1953 rownnz++;
1954 } else
1955 numDropped++;
1956 }
1957 rows_graph(row + 1) = realnnz;
1958 }
1959 } else {
1960 // ghosting of boundary node map
1961 auto boundaryNodes = inputGraph->GetBoundaryNodeMap();
1962 auto boundaryNodesVector = Xpetra::VectorFactory<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(inputGraph->GetDomainMap());
1963 for (size_t i = 0; i < inputGraph->GetNodeNumVertices(); i++)
1964 boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
1965 // Xpetra::IO<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Write("boundary",*boundaryNodesVector);
1966 auto boundaryColumnVector = Xpetra::VectorFactory<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(inputGraph->GetImportMap());
1967 boundaryColumnVector->doImport(*boundaryNodesVector, *importer, Xpetra::INSERT);
1968 auto boundaryColumn = boundaryColumnVector->getData(0);
1969
1970 for (LO row = 0; row < numRows; ++row) {
1971 LO row_block = row_block_number[row];
1972 auto indices = inputGraph->getNeighborVertices(row);
1973
1974 LO rownnz = 0;
1975 for (LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1976 LO col = indices(colID);
1977 LO col_block = col_block_number[col];
1978
1979 if ((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) {
1980 columns(realnnz++) = col;
1981 rownnz++;
1982 } else
1983 numDropped++;
1984 }
1985 rows_graph(row + 1) = realnnz;
1986 }
1987 }
1988
1989 numTotal = inputGraph->GetNodeNumEdges();
1990
1991 if (GetVerbLevel() & Statistics1) {
1992 RCP<const Teuchos::Comm<int>> comm = inputGraph->GetDomainMap()->getComm();
1993 GO numGlobalTotal, numGlobalDropped;
1994 MueLu_sumAll(comm, numTotal, numGlobalTotal);
1995 MueLu_sumAll(comm, numDropped, numGlobalDropped);
1996 GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1997 if (numGlobalTotal != 0)
1998 GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1999 GetOStream(Statistics1) << std::endl;
2000 }
2001
2002 if (localizeColoringGraph) {
2003 outputGraph = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
2004 outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
2005 } else {
2006 TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra);
2007#ifdef HAVE_XPETRA_TPETRA
2008 auto outputGraph2 = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
2009
2010 auto tpGraph = Xpetra::toTpetra(rcp_const_cast<const CrsGraph>(outputGraph2->GetCrsGraph()));
2011 auto sym = rcp(new Tpetra::CrsGraphTransposer<LocalOrdinal, GlobalOrdinal, Node>(tpGraph));
2012 auto tpGraphSym = sym->symmetrize();
2013 auto lclGraphSym = tpGraphSym->getLocalGraphHost();
2014 auto colIndsSym = lclGraphSym.entries;
2015
2016 auto rowsSym = tpGraphSym->getLocalRowPtrsHost();
2017 typename LWGraph::row_type::non_const_type rows_graphSym("rows_graphSym", rowsSym.size());
2018 for (size_t row = 0; row < rowsSym.size(); row++)
2019 rows_graphSym(row) = rowsSym(row);
2020 outputGraph = rcp(new LWGraph(rows_graphSym, colIndsSym, inputGraph->GetDomainMap(), Xpetra::toXpetra(tpGraphSym->getColMap()), "block-diagonalized graph of A"));
2021 outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
2022#endif
2023 }
2024}
2025
2026} // namespace MueLu
2027
2028#endif // MUELU_COALESCEDROPFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
#define MueLu_sumAll(rcpComm, in, out)
static void AmalgamateMap(const Map &sourceMap, const Matrix &A, RCP< const Map > &amalgamatedMap, Array< LO > &translation)
Method to create merged map for systems of PDEs.
static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, const GlobalOrdinal indexBase)
Translate global (row/column) id to global amalgamation block id.
void MergeRows(const Matrix &A, const LO row, Array< LO > &cols, const Array< LO > &translation) const
Method to merge rows of matrix for systems of PDEs.
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > BlockDiagonalize(Level &currentLevel, const RCP< Matrix > &A, bool generate_matrix) const
void DeclareInput(Level &currentLevel) const
Input.
void BlockDiagonalizeGraph(const RCP< LWGraph > &inputGraph, const RCP< LocalOrdinalVector > &ghostedBlockNumber, RCP< LWGraph > &outputGraph, RCP< const Import > &importer) const
void MergeRowsWithDropping(const Matrix &A, const LO row, const ArrayRCP< const SC > &ghostedDiagVals, SC threshold, Array< LO > &cols, const Array< LO > &translation) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level &currentLevel) const
Build an object with this factory.
Exception indicating invalid cast attempted.
Exception throws to report incompatible objects (like maps).
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Kokkos::View< bool *, memory_space > boundary_nodes_type
typename local_graph_type::row_map_type row_type
Lightweight MueLu representation of a compressed row storage graph.
Class that holds all level-specific information.
int GetLevelID() const
Return level number.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static Kokkos::View< bool *, typename Kokkos::HostSpace > DetectDirichletRows_kokkos_host(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
static Teuchos::RCP< Vector > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Return vector containing: max_{i\not=k}(-a_ik), for each for i in the matrix.
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
Apply Rowsum Criterion.
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar > > &v, LocalOrdinal i0, LocalOrdinal i1)
Squared distance between two rows in a multivector.
static void ApplyRowSumCriterionHost(const Matrix &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, Kokkos::HostSpace > &dirichletRows)
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
MueLu utility class.
Namespace for MueLu classes and methods.
@ Warnings0
Important warning messages (one line)
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
@ Warnings1
Additional warnings.
@ Parameters0
Print class parameters.
DropTol & operator=(DropTol const &)=default
DropTol(DropTol &&)=default
DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
DropTol & operator=(DropTol &&)=default
DropTol(DropTol const &)=default