MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_CoalesceDropFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP
11#define MUELU_COALESCEDROPFACTORY_DEF_HPP
12
13#include <Xpetra_CrsGraphFactory.hpp>
14#include <Xpetra_CrsGraph.hpp>
15#include <Xpetra_ImportFactory.hpp>
16#include <Xpetra_ExportFactory.hpp>
17#include <Xpetra_MapFactory.hpp>
18#include <Xpetra_Map.hpp>
19#include <Xpetra_Matrix.hpp>
20#include <Xpetra_MultiVectorFactory.hpp>
21#include <Xpetra_MultiVector.hpp>
22#include <Xpetra_StridedMap.hpp>
23#include <Xpetra_VectorFactory.hpp>
24#include <Xpetra_Vector.hpp>
25
26#include <Xpetra_IO.hpp>
27
28#include <Kokkos_NestedSort.hpp>
29#include <Kokkos_StdAlgorithms.hpp>
31
32#include "MueLu_AmalgamationFactory.hpp"
33#include "MueLu_AmalgamationInfo.hpp"
34#include "MueLu_Exceptions.hpp"
35#include "MueLu_LWGraph.hpp"
36
37#include "MueLu_Level.hpp"
38#include "MueLu_LWGraph.hpp"
39#include "MueLu_MasterList.hpp"
40#include "MueLu_Monitor.hpp"
41#include "MueLu_PreDropFunctionConstVal.hpp"
42#include "MueLu_Utilities.hpp"
43
44#include "Tpetra_CrsGraphTransposer.hpp"
45
46#include <algorithm>
47#include <cstdlib>
48#include <string>
49
50// If defined, read environment variables.
51// Should be removed once we are confident that this works.
52//#define DJS_READ_ENV_VARIABLES
53
54namespace MueLu {
55
56namespace Details {
57template <class real_type, class LO>
58struct DropTol {
59 DropTol() = default;
60 DropTol(DropTol const&) = default;
61 DropTol(DropTol&&) = default;
62
63 DropTol& operator=(DropTol const&) = default;
64 DropTol& operator=(DropTol&&) = default;
65
66 DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
67 : val{val_}
68 , diag{diag_}
69 , col{col_}
70 , drop{drop_} {}
71
72 real_type val{Teuchos::ScalarTraits<real_type>::zero()};
73 real_type diag{Teuchos::ScalarTraits<real_type>::zero()};
74 LO col{Teuchos::OrdinalTraits<LO>::invalid()};
75 bool drop{true};
76
77 // CMS: Auxillary information for debugging info
78 // real_type aux_val {Teuchos::ScalarTraits<real_type>::nan()};
79};
80} // namespace Details
81
86
87template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
89 RCP<ParameterList> validParamList = rcp(new ParameterList());
90
91#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
92 SET_VALID_ENTRY("aggregation: drop tol");
93 SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
94 SET_VALID_ENTRY("aggregation: Dirichlet threshold");
95 SET_VALID_ENTRY("aggregation: greedy Dirichlet");
96 SET_VALID_ENTRY("aggregation: row sum drop tol");
97 SET_VALID_ENTRY("aggregation: drop scheme");
98 SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
99 SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
100 SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
101
102 {
103 // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values)
104 validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical"))));
105 }
106 SET_VALID_ENTRY("aggregation: distance laplacian algo");
107 SET_VALID_ENTRY("aggregation: classical algo");
108 SET_VALID_ENTRY("aggregation: coloring: localize color graph");
109#undef SET_VALID_ENTRY
110 validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
111
112 validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
113 validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
114 validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
115 validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNUmber");
116
117 return validParamList;
118}
119
120template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
123
124template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
126 Input(currentLevel, "A");
127 Input(currentLevel, "UnAmalgamationInfo");
128
129 const ParameterList& pL = GetParameterList();
130 if (pL.get<bool>("lightweight wrap") == true) {
131 std::string algo = pL.get<std::string>("aggregation: drop scheme");
132 if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
133 Input(currentLevel, "Coordinates");
134 }
135 if (algo == "signed classical sa")
136 ;
137 else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) {
138 Input(currentLevel, "BlockNumber");
139 }
140 }
141}
142
143template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
145 FactoryMonitor m(*this, "Build", currentLevel);
146
147 typedef Teuchos::ScalarTraits<SC> STS;
148 typedef typename STS::magnitudeType real_type;
149 typedef Xpetra::MultiVector<real_type, LO, GO, NO> RealValuedMultiVector;
150 typedef Xpetra::MultiVectorFactory<real_type, LO, GO, NO> RealValuedMultiVectorFactory;
151
152 if (predrop_ != Teuchos::null)
153 GetOStream(Parameters0) << predrop_->description();
154
155 RCP<Matrix> realA = Get<RCP<Matrix>>(currentLevel, "A");
156 RCP<AmalgamationInfo> amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
157 const ParameterList& pL = GetParameterList();
158 bool doExperimentalWrap = pL.get<bool>("lightweight wrap");
159
160 GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl;
161 std::string algo = pL.get<std::string>("aggregation: drop scheme");
162 const bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
163
164 RCP<RealValuedMultiVector> Coords;
165 RCP<Matrix> A;
166
167 bool use_block_algorithm = false;
168 LO interleaved_blocksize = as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
169 bool useSignedClassicalRS = false;
170 bool useSignedClassicalSA = false;
171 bool generateColoringGraph = false;
172
173 // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice (we'll do it
174 // in the block diagonalization). So we'll clobber the rowSumTol with -1.0 in this case
175 typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
176
177 RCP<LocalOrdinalVector> ghostedBlockNumber;
178
179 if (algo == "distance laplacian") {
180 // Grab the coordinates for distance laplacian
181 Coords = Get<RCP<RealValuedMultiVector>>(currentLevel, "Coordinates");
182 A = realA;
183 } else if (algo == "signed classical sa") {
184 useSignedClassicalSA = true;
185 algo = "classical";
186 A = realA;
187 } else if (algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") {
188 useSignedClassicalRS = true;
189 // if(realA->GetFixedBlockSize() > 1) {
190 RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber");
191 // Ghost the column block numbers if we need to
192 RCP<const Import> importer = realA->getCrsGraph()->getImporter();
193 if (!importer.is_null()) {
194 SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
195 ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
196 ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
197 } else {
198 ghostedBlockNumber = BlockNumber;
199 }
200 // }
201 if (algo == "block diagonal colored signed classical")
202 generateColoringGraph = true;
203 algo = "classical";
204 A = realA;
205
206 } else if (algo == "block diagonal") {
207 // Handle the "block diagonal" filtering and then leave
208 BlockDiagonalize(currentLevel, realA, false);
209 return;
210 } else if (algo == "block diagonal classical" || algo == "block diagonal distance laplacian") {
211 // Handle the "block diagonal" filtering, and then continue onward
212 use_block_algorithm = true;
213 RCP<Matrix> filteredMatrix = BlockDiagonalize(currentLevel, realA, true);
214 if (algo == "block diagonal distance laplacian") {
215 // We now need to expand the coordinates by the interleaved blocksize
216 RCP<RealValuedMultiVector> OldCoords = Get<RCP<RealValuedMultiVector>>(currentLevel, "Coordinates");
217 if (OldCoords->getLocalLength() != realA->getLocalNumRows()) {
218 LO dim = (LO)OldCoords->getNumVectors();
219 Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(), dim);
220 for (LO k = 0; k < dim; k++) {
221 ArrayRCP<const real_type> old_vec = OldCoords->getData(k);
222 ArrayRCP<real_type> new_vec = Coords->getDataNonConst(k);
223 for (LO i = 0; i < (LO)OldCoords->getLocalLength(); i++) {
224 LO new_base = i * dim;
225 for (LO j = 0; j < interleaved_blocksize; j++)
226 new_vec[new_base + j] = old_vec[i];
227 }
228 }
229 } else {
230 Coords = OldCoords;
231 }
232 algo = "distance laplacian";
233 } else if (algo == "block diagonal classical") {
234 algo = "classical";
235 }
236 // All cases
237 A = filteredMatrix;
238 rowSumTol = -1.0;
239 } else {
240 A = realA;
241 }
242
243 // Distance Laplacian weights
244 Array<double> dlap_weights = pL.get<Array<double>>("aggregation: distance laplacian directional weights");
245 enum { NO_WEIGHTS = 0,
246 SINGLE_WEIGHTS,
247 BLOCK_WEIGHTS };
248 int use_dlap_weights = NO_WEIGHTS;
249 if (algo == "distance laplacian") {
250 LO dim = (LO)Coords->getNumVectors();
251 // If anything isn't 1.0 we need to turn on the weighting
252 bool non_unity = false;
253 for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) {
254 if (dlap_weights[i] != 1.0) {
255 non_unity = true;
256 }
257 }
258 if (non_unity) {
259 LO blocksize = use_block_algorithm ? as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize")) : 1;
260 if ((LO)dlap_weights.size() == dim)
261 use_dlap_weights = SINGLE_WEIGHTS;
262 else if ((LO)dlap_weights.size() == blocksize * dim)
263 use_dlap_weights = BLOCK_WEIGHTS;
264 else {
265 TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError,
266 "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
267 }
268 if (GetVerbLevel() & Statistics1)
269 GetOStream(Statistics1) << "Using distance laplacian weights: " << dlap_weights << std::endl;
270 }
271 }
272
273 // decide wether to use the fast-track code path for standard maps or the somewhat slower
274 // code path for non-standard maps
275 /*bool bNonStandardMaps = false;
276 if (A->IsView("stridedMaps") == true) {
277 Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
278 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
279 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
280 if (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0)
281 bNonStandardMaps = true;
282 }*/
283
284 if (doExperimentalWrap) {
285 TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm");
286 TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian" && algo != "signed classical", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian|signed classical)");
287
288 SC threshold;
289 // If we're doing the ML-style halving of the drop tol at each level, we do that here.
290 if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
291 threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
292 else
293 threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
294
295 std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
296 std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
297 real_type realThreshold = STS::magnitude(threshold); // CMS: Rename this to "magnitude threshold" sometime
298
300 // Remove this bit once we are confident that cut-based dropping works.
301#ifdef HAVE_MUELU_DEBUG
302 int distanceLaplacianCutVerbose = 0;
303#endif
304#ifdef DJS_READ_ENV_VARIABLES
305 if (getenv("MUELU_DROP_TOLERANCE_MODE")) {
306 distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE"));
307 }
308
309 if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) {
310 auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD"));
311 realThreshold = 1e-4 * tmp;
312 }
313
314#ifdef HAVE_MUELU_DEBUG
315 if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) {
316 distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE"));
317 }
318#endif
319#endif
321
322 decisionAlgoType distanceLaplacianAlgo = defaultAlgo;
323 decisionAlgoType classicalAlgo = defaultAlgo;
324 if (algo == "distance laplacian") {
325 if (distanceLaplacianAlgoStr == "default")
326 distanceLaplacianAlgo = defaultAlgo;
327 else if (distanceLaplacianAlgoStr == "unscaled cut")
328 distanceLaplacianAlgo = unscaled_cut;
329 else if (distanceLaplacianAlgoStr == "scaled cut")
330 distanceLaplacianAlgo = scaled_cut;
331 else if (distanceLaplacianAlgoStr == "scaled cut symmetric")
332 distanceLaplacianAlgo = scaled_cut_symmetric;
333 else
334 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\"");
335 GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
336 } else if (algo == "classical") {
337 if (classicalAlgoStr == "default")
338 classicalAlgo = defaultAlgo;
339 else if (classicalAlgoStr == "unscaled cut")
340 classicalAlgo = unscaled_cut;
341 else if (classicalAlgoStr == "scaled cut")
342 classicalAlgo = scaled_cut;
343 else
344 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\"");
345 GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
346
347 } else
348 GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
349
350 if (((algo == "classical") && (classicalAlgoStr.find("scaled") != std::string::npos)) || ((algo == "distance laplacian") && (distanceLaplacianAlgoStr.find("scaled") != std::string::npos)))
351 TEUCHOS_TEST_FOR_EXCEPTION(realThreshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
352
353 Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
354
355 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
356
357 // NOTE: We don't support signed classical RS or SA with cut drop at present
358 TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalRS && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical aggregation");
359 TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalSA && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical sa aggregation");
360
361 GO numDropped = 0, numTotal = 0;
362 std::string graphType = "unamalgamated"; // for description purposes only
363
364 /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
365 BlockSize is the number of storage blocks that must kept together during the amalgamation process.
366
367 Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
368
369 numPDEs = BlockSize * storageblocksize.
370
371 If numPDEs==1
372 Matrix is point storage (classical CRS storage). storageblocksize=1 and BlockSize=1
373 No other values makes sense.
374
375 If numPDEs>1
376 If matrix uses point storage, then storageblocksize=1 and BlockSize=numPDEs.
377 If matrix uses block storage, with block size of n, then storageblocksize=n, and BlockSize=numPDEs/n.
378 Thus far, only storageblocksize=numPDEs and BlockSize=1 has been tested.
379 */
380 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
381 const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
382
383 /************************** RS or SA-style Classical Dropping (and variants) **************************/
384 if (algo == "classical") {
385 if (predrop_ == null) {
386 // ap: this is a hack: had to declare predrop_ as mutable
387 predrop_ = rcp(new PreDropFunctionConstVal(threshold));
388 }
389
390 if (predrop_ != null) {
391 RCP<PreDropFunctionConstVal> predropConstVal = rcp_dynamic_cast<PreDropFunctionConstVal>(predrop_);
392 TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast,
393 "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
394 // If a user provided a predrop function, it overwrites the XML threshold parameter
395 SC newt = predropConstVal->GetThreshold();
396 if (newt != threshold) {
397 GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl;
398 threshold = newt;
399 }
400 }
401 // At this points we either have
402 // (predrop_ != null)
403 // Therefore, it is sufficient to check only threshold
404 if (BlockSize == 1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) {
405 // Case 1: scalar problem, no dropping => just use matrix graph
406 RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
407 // Detect and record rows that correspond to Dirichlet boundary conditions
408 auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
409 if (rowSumTol > 0.)
410 Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
411
412 graph->SetBoundaryNodeMap(boundaryNodes);
413 numTotal = A->getLocalNumEntries();
414
415 if (GetVerbLevel() & Statistics1) {
416 GO numLocalBoundaryNodes = 0;
417 GO numGlobalBoundaryNodes = 0;
418 for (size_t i = 0; i < boundaryNodes.size(); ++i)
419 if (boundaryNodes[i])
420 numLocalBoundaryNodes++;
421 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
422 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
423 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
424 }
425
426 Set(currentLevel, "DofsPerNode", 1);
427 Set(currentLevel, "Graph", graph);
428
429 } else if ((BlockSize == 1 && threshold != STS::zero()) ||
430 (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) ||
431 (BlockSize == 1 && useSignedClassicalRS) ||
432 (BlockSize == 1 && useSignedClassicalSA)) {
433 // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original
434 // graph's map information, e.g., whether index is local
435 // OR a matrix without a CrsGraph
436
437 // allocate space for the local graph
438 typename LWGraph::row_type::non_const_type rows("rows", A->getLocalNumRows() + 1);
439 typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
440
441 using MT = typename STS::magnitudeType;
442 RCP<Vector> ghostedDiag;
443 ArrayRCP<const SC> ghostedDiagVals;
444 ArrayRCP<const SC> negMaxOffDiagonal;
445 // RS style needs the max negative off-diagonal, SA style needs the diagonal
446 if (useSignedClassicalRS) {
447 if (ghostedBlockNumber.is_null()) {
449 negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
450 if (GetVerbLevel() & Statistics1)
451 GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl;
452 } else {
453 auto negMaxOffDiagonalVec = MueLu::Utilities<SC, LO, GO, NO>::GetMatrixMaxMinusOffDiagonal(*A, *ghostedBlockNumber);
454 negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
455 if (GetVerbLevel() & Statistics1)
456 GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl;
457 }
458 } else {
460 if (classicalAlgo == defaultAlgo) {
461 ghostedDiagVals = ghostedDiag->getData(0);
462 }
463 }
464 auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
465 if (rowSumTol > 0.) {
466 if (ghostedBlockNumber.is_null()) {
467 if (GetVerbLevel() & Statistics1)
468 GetOStream(Statistics1) << "Applying point row sum criterion." << std::endl;
469 Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
470 } else {
471 if (GetVerbLevel() & Statistics1)
472 GetOStream(Statistics1) << "Applying block row sum criterion." << std::endl;
473 Utilities::ApplyRowSumCriterionHost(*A, *ghostedBlockNumber, rowSumTol, boundaryNodes);
474 }
475 }
476
477 ArrayRCP<const LO> g_block_id;
478 if (!ghostedBlockNumber.is_null())
479 g_block_id = ghostedBlockNumber->getData(0);
480
481 LO realnnz = 0;
482 rows(0) = 0;
483 if (classicalAlgo == defaultAlgo) {
484 SubFactoryMonitor m1(*this, "Classical RS/SA", currentLevel);
485 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
486 size_t nnz = A->getNumEntriesInLocalRow(row);
487 bool rowIsDirichlet = boundaryNodes[row];
488 ArrayView<const LO> indices;
489 ArrayView<const SC> vals;
490 A->getLocalRowView(row, indices, vals);
491
492 // FIXME the current predrop function uses the following
493 // FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid )
494 // FIXME but the threshold doesn't take into account the rows' diagonal entries
495 // FIXME For now, hardwiring the dropping in here
496
497 LO rownnz = 0;
498 if (useSignedClassicalRS) {
499 // Signed classical RS style
500 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
501 LO col = indices[colID];
502 MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]);
503 MT neg_aij = -STS::real(vals[colID]);
504 /* if(row==1326) printf("A(%d,%d) = %6.4e, block = (%d,%d) neg_aij = %6.4e max_neg_aik = %6.4e\n",row,col,vals[colID],
505 g_block_id.is_null() ? -1 : g_block_id[row],
506 g_block_id.is_null() ? -1 : g_block_id[col],
507 neg_aij, max_neg_aik);*/
508 if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) {
509 columns[realnnz++] = col;
510 rownnz++;
511 } else
512 numDropped++;
513 }
514 rows(row + 1) = realnnz;
515 } else if (useSignedClassicalSA) {
516 // Signed classical SA style
517 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
518 LO col = indices[colID];
519
520 bool is_nonpositive = STS::real(vals[colID]) <= 0;
521 MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
522 MT aij = is_nonpositive ? STS::magnitude(vals[colID] * vals[colID]) : (-STS::magnitude(vals[colID] * vals[colID])); // + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0
523 /*
524 if(row==1326) printf("A(%d,%d) = %6.4e, raw_aij = %6.4e aij = %6.4e aiiajj = %6.4e\n",row,col,vals[colID],
525 vals[colID],aij, aiiajj);
526 */
527
528 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
529 columns(realnnz++) = col;
530 rownnz++;
531 } else
532 numDropped++;
533 }
534 rows[row + 1] = realnnz;
535 } else {
536 // Standard abs classical
537 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
538 LO col = indices[colID];
539 MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
540 MT aij = STS::magnitude(vals[colID] * vals[colID]); // |a_ij|^2
541
542 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
543 columns(realnnz++) = col;
544 rownnz++;
545 } else
546 numDropped++;
547 }
548 rows(row + 1) = realnnz;
549 }
550 } // end for row
551 } else {
552 /* Cut Algorithm */
553 SubFactoryMonitor m1(*this, "Cut Drop", currentLevel);
554 using ExecSpace = typename Node::execution_space;
555 using TeamPol = Kokkos::TeamPolicy<ExecSpace>;
556 using TeamMem = typename TeamPol::member_type;
557 using ATS = KokkosKernels::ArithTraits<Scalar>;
558 using impl_scalar_type = typename ATS::val_type;
559 using implATS = KokkosKernels::ArithTraits<impl_scalar_type>;
560
561 // move from host to device
562 auto ghostedDiagValsView = Kokkos::subview(ghostedDiag->getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
563 auto thresholdKokkos = static_cast<impl_scalar_type>(threshold);
564 auto realThresholdKokkos = implATS::magnitude(thresholdKokkos);
565 auto columnsDevice = Kokkos::create_mirror_view(ExecSpace(), columns);
566
567 auto A_device = A->getLocalMatrixDevice();
568 RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
569 RCP<const Import> importer = A->getCrsGraph()->getImporter();
570 RCP<LocalOrdinalVector> boundaryNodesVector = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(graph->GetDomainMap());
571 RCP<LocalOrdinalVector> boundaryColumnVector;
572 for (size_t i = 0; i < graph->GetNodeNumVertices(); i++) {
573 boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
574 }
575 if (!importer.is_null()) {
576 boundaryColumnVector = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(graph->GetImportMap());
577 boundaryColumnVector->doImport(*boundaryNodesVector, *importer, Xpetra::INSERT);
578 } else {
579 boundaryColumnVector = boundaryNodesVector;
580 }
581 auto boundaryColumn = boundaryColumnVector->getLocalViewDevice(Tpetra::Access::ReadOnly);
582 auto boundary = Kokkos::subview(boundaryColumn, Kokkos::ALL(), 0);
583
584 Kokkos::View<LO*, ExecSpace> rownnzView("rownnzView", A_device.numRows());
585 auto drop_views = Kokkos::View<bool*, ExecSpace>("drop_views", A_device.nnz());
586 auto index_views = Kokkos::View<size_t*, ExecSpace>("index_views", A_device.nnz());
587
588 Kokkos::parallel_reduce(
589 "classical_cut", TeamPol(A_device.numRows(), Kokkos::AUTO), KOKKOS_LAMBDA(const TeamMem& teamMember, LO& globalnnz, GO& totalDropped) {
590 LO row = teamMember.league_rank();
591 auto rowView = A_device.rowConst(row);
592 size_t nnz = rowView.length;
593
594 auto drop_view = Kokkos::subview(drop_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
595 auto index_view = Kokkos::subview(index_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
596
597 // find magnitudes
598 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, (LO)nnz), [&](const LO colID) {
599 index_view(colID) = colID;
600 LO col = rowView.colidx(colID);
601 // ignore diagonals for now, they are checked again later
602 // Don't aggregate boundaries
603 if (row == col || boundary(col)) {
604 drop_view(colID) = true;
605 } else {
606 drop_view(colID) = false;
607 }
608 });
609
610 size_t dropStart = nnz;
611 if (classicalAlgo == unscaled_cut) {
612 // push diagonals and boundaries to the right, sort everything else by aij on the left
613 Kokkos::Experimental::sort_team(teamMember, index_view, [=](size_t& x, size_t& y) -> bool {
614 if (drop_view(x) || drop_view(y)) {
615 return drop_view(x) < drop_view(y);
616 } else {
617 auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
618 auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
619 return x_aij > y_aij;
620 }
621 });
622
623 // find index where dropping starts
624 Kokkos::parallel_reduce(
625 Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](size_t i, size_t& min) {
626 auto const& x = index_view(i - 1);
627 auto const& y = index_view(i);
628 typename implATS::magnitudeType x_aij = 0;
629 typename implATS::magnitudeType y_aij = 0;
630 if (!drop_view(x)) {
631 x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
632 }
633 if (!drop_view(y)) {
634 y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
635 }
636
637 if (realThresholdKokkos * realThresholdKokkos * x_aij > y_aij) {
638 if (i < min) {
639 min = i;
640 }
641 }
642 },
643 Kokkos::Min<size_t>(dropStart));
644 } else if (classicalAlgo == scaled_cut) {
645 // push diagonals and boundaries to the right, sort everything else by aij/aiiajj on the left
646 Kokkos::Experimental::sort_team(teamMember, index_view, [=](size_t& x, size_t& y) -> bool {
647 if (drop_view(x) || drop_view(y)) {
648 return drop_view(x) < drop_view(y);
649 } else {
650 auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
651 auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
652 auto x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
653 auto y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
654 return (x_aij / x_aiiajj) > (y_aij / y_aiiajj);
655 }
656 });
657
658 // find index where dropping starts
659 Kokkos::parallel_reduce(
660 Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](size_t i, size_t& min) {
661 auto const& x = index_view(i - 1);
662 auto const& y = index_view(i);
663 typename implATS::magnitudeType x_val = 0;
664 typename implATS::magnitudeType y_val = 0;
665 if (!drop_view(x)) {
666 typename implATS::magnitudeType x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
667 typename implATS::magnitudeType x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
668 x_val = x_aij / x_aiiajj;
669 }
670 if (!drop_view(y)) {
671 typename implATS::magnitudeType y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
672 typename implATS::magnitudeType y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
673 y_val = y_aij / y_aiiajj;
674 }
675
676 if (realThresholdKokkos * realThresholdKokkos * x_val > y_val) {
677 if (i < min) {
678 min = i;
679 }
680 }
681 },
682 Kokkos::Min<size_t>(dropStart));
683 }
684
685 // drop everything to the right of where values stop passing threshold
686 if (dropStart < nnz) {
687 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, dropStart, nnz), [=](size_t i) {
688 drop_view(index_view(i)) = true;
689 });
690 }
691
692 LO rownnz = 0;
693 GO rowDropped = 0;
694 Kokkos::parallel_reduce(
695 Kokkos::TeamThreadRange(teamMember, nnz), [=](const size_t idxID, LO& keep, GO& drop) {
696 LO col = rowView.colidx(idxID);
697 // don't drop diagonal
698 if (row == col || !drop_view(idxID)) {
699 columnsDevice(A_device.graph.row_map(row) + idxID) = col;
700 keep++;
701 } else {
702 columnsDevice(A_device.graph.row_map(row) + idxID) = -1;
703 drop++;
704 }
705 },
706 rownnz, rowDropped);
707
708 Kokkos::single(Kokkos::PerTeam(teamMember), [&]() {
709 globalnnz += rownnz;
710 totalDropped += rowDropped;
711 rownnzView(row) = rownnz;
712 });
713 },
714 realnnz, numDropped);
715
716 // update column indices so that kept indices are aligned to the left for subview that happens later on
717 Kokkos::Experimental::remove(ExecSpace(), columnsDevice, -1);
718 Kokkos::deep_copy(columns, columnsDevice);
719
720 // update row indices by adding up new # of nnz in each row
721 auto rowsDevice = Kokkos::create_mirror_view(ExecSpace(), rows);
722 Kokkos::parallel_scan(
723 Kokkos::RangePolicy<ExecSpace>(0, A_device.numRows()), KOKKOS_LAMBDA(const int i, LO& partial_sum, bool is_final) {
724 partial_sum += rownnzView(i);
725 if (is_final) rowsDevice(i + 1) = partial_sum;
726 });
727 Kokkos::deep_copy(rows, rowsDevice);
728 }
729
730 numTotal = A->getLocalNumEntries();
731
732 if (aggregationMayCreateDirichlet) {
733 // If the only element remaining after filtering is diagonal, mark node as boundary
734 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
735 if (rows[row + 1] - rows[row] <= 1)
736 boundaryNodes[row] = true;
737 }
738 }
739
740 RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(), "thresholded graph of A"));
741 graph->SetBoundaryNodeMap(boundaryNodes);
742 if (GetVerbLevel() & Statistics1) {
743 GO numLocalBoundaryNodes = 0;
744 GO numGlobalBoundaryNodes = 0;
745 for (size_t i = 0; i < boundaryNodes.size(); ++i)
746 if (boundaryNodes(i))
747 numLocalBoundaryNodes++;
748 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
749 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
750 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
751 }
752 Set(currentLevel, "Graph", graph);
753 Set(currentLevel, "DofsPerNode", 1);
754
755 // If we're doing signed classical, we might want to block-diagonalize *after* the dropping
756 if (generateColoringGraph) {
757 RCP<LWGraph> colorGraph;
758 RCP<const Import> importer = A->getCrsGraph()->getImporter();
759 BlockDiagonalizeGraph(graph, ghostedBlockNumber, colorGraph, importer);
760 Set(currentLevel, "Coloring Graph", colorGraph);
761 // #define CMS_DUMP
762#ifdef CMS_DUMP
763 {
764 Xpetra::IO<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("m_regular_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(graph)->GetCrsGraph());
765 Xpetra::IO<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("m_color_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(colorGraph)->GetCrsGraph());
766 // int rank = graph->GetDomainMap()->getComm()->getRank();
767 // {
768 // std::ofstream ofs(std::string("m_color_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
769 // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
770 // colorGraph->print(*fancy,Debug);
771 // }
772 // {
773 // std::ofstream ofs(std::string("m_regular_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
774 // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
775 // graph->print(*fancy,Debug);
776 // }
777 }
778#endif
779 } // end generateColoringGraph
780 } else if (BlockSize > 1 && threshold == STS::zero()) {
781 // Case 3: Multiple DOF/node problem without dropping
782 const RCP<const Map> rowMap = A->getRowMap();
783 const RCP<const Map> colMap = A->getColMap();
784
785 graphType = "amalgamated";
786
787 // build node row map (uniqueMap) and node column map (nonUniqueMap)
788 // the arrays rowTranslation and colTranslation contain the local node id
789 // given a local dof id. The data is calculated by the AmalgamationFactory and
790 // stored in the variable container "UnAmalgamationInfo"
791 RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
792 RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
793 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
794 Array<LO> colTranslation = *(amalInfo->getColTranslation());
795
796 // get number of local nodes
797 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
798
799 // Allocate space for the local graph
800 typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
801 typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
802
803 typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
804 Kokkos::deep_copy(amalgBoundaryNodes, false);
805
806 // Detect and record rows that correspond to Dirichlet boundary conditions
807 // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
808 // TODO the array one bigger than the number of local rows, and the last entry can
809 // TODO hold the actual number of boundary nodes. Clever, huh?
810 ArrayRCP<bool> pointBoundaryNodes;
811 pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows(*A, dirichletThreshold));
812 if (rowSumTol > 0.)
813 Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
814
815 // extract striding information
816 LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
817 LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
818 LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
819 if (A->IsView("stridedMaps") == true) {
820 Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
821 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
822 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
823 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
824 blkId = strMap->getStridedBlockId();
825 if (blkId > -1)
826 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
827 }
828
829 // loop over all local nodes
830 LO realnnz = 0;
831 rows(0) = 0;
832 Array<LO> indicesExtra;
833 for (LO row = 0; row < numRows; row++) {
834 ArrayView<const LO> indices;
835 indicesExtra.resize(0);
836
837 // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
838 // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
839 // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
840 // with local ids.
841 // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
842 // node.
843 bool isBoundary = false;
844 if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
845 for (LO j = 0; j < blkPartSize; j++) {
846 if (pointBoundaryNodes[row * blkPartSize + j]) {
847 isBoundary = true;
848 break;
849 }
850 }
851 } else {
852 isBoundary = true;
853 for (LO j = 0; j < blkPartSize; j++) {
854 if (!pointBoundaryNodes[row * blkPartSize + j]) {
855 isBoundary = false;
856 break;
857 }
858 }
859 }
860
861 // Merge rows of A
862 // The array indicesExtra contains local column node ids for the current local node "row"
863 if (!isBoundary)
864 MergeRows(*A, row, indicesExtra, colTranslation);
865 else
866 indicesExtra.push_back(row);
867 indices = indicesExtra;
868 numTotal += indices.size();
869
870 // add the local column node ids to the full columns array which
871 // contains the local column node ids for all local node rows
872 LO nnz = indices.size(), rownnz = 0;
873 for (LO colID = 0; colID < nnz; colID++) {
874 LO col = indices[colID];
875 columns(realnnz++) = col;
876 rownnz++;
877 }
878
879 if (rownnz == 1) {
880 // If the only element remaining after filtering is diagonal, mark node as boundary
881 // FIXME: this should really be replaced by the following
882 // if (indices.size() == 1 && indices[0] == row)
883 // boundaryNodes[row] = true;
884 // We do not do it this way now because there is no framework for distinguishing isolated
885 // and boundary nodes in the aggregation algorithms
886 amalgBoundaryNodes[row] = true;
887 }
888 rows(row + 1) = realnnz;
889 } // for (LO row = 0; row < numRows; row++)
890
891 RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
892 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
893
894 if (GetVerbLevel() & Statistics1) {
895 GO numLocalBoundaryNodes = 0;
896 GO numGlobalBoundaryNodes = 0;
897
898 for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
899 if (amalgBoundaryNodes(i))
900 numLocalBoundaryNodes++;
901
902 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
903 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
904 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
905 << " agglomerated Dirichlet nodes" << std::endl;
906 }
907
908 Set(currentLevel, "Graph", graph);
909 Set(currentLevel, "DofsPerNode", blkSize); // full block size
910
911 } else if (BlockSize > 1 && threshold != STS::zero()) {
912 // Case 4: Multiple DOF/node problem with dropping
913 const RCP<const Map> rowMap = A->getRowMap();
914 const RCP<const Map> colMap = A->getColMap();
915 graphType = "amalgamated";
916
917 // build node row map (uniqueMap) and node column map (nonUniqueMap)
918 // the arrays rowTranslation and colTranslation contain the local node id
919 // given a local dof id. The data is calculated by the AmalgamationFactory and
920 // stored in the variable container "UnAmalgamationInfo"
921 RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
922 RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
923 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
924 Array<LO> colTranslation = *(amalInfo->getColTranslation());
925
926 // get number of local nodes
927 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
928
929 // Allocate space for the local graph
930 typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
931 typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
932
933 typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
934 Kokkos::deep_copy(amalgBoundaryNodes, false);
935
936 // Detect and record rows that correspond to Dirichlet boundary conditions
937 // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
938 // TODO the array one bigger than the number of local rows, and the last entry can
939 // TODO hold the actual number of boundary nodes. Clever, huh?
940 auto pointBoundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
941 if (rowSumTol > 0.)
942 Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, pointBoundaryNodes);
943
944 // extract striding information
945 LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
946 LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
947 LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
948 if (A->IsView("stridedMaps") == true) {
949 Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
950 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
951 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
952 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
953 blkId = strMap->getStridedBlockId();
954 if (blkId > -1)
955 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
956 }
957
958 // extract diagonal data for dropping strategy
960 const ArrayRCP<const SC> ghostedDiagVals = ghostedDiag->getData(0);
961
962 // loop over all local nodes
963 LO realnnz = 0;
964 rows[0] = 0;
965 Array<LO> indicesExtra;
966 for (LO row = 0; row < numRows; row++) {
967 ArrayView<const LO> indices;
968 indicesExtra.resize(0);
969
970 // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
971 // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
972 // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
973 // with local ids.
974 // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
975 // node.
976 bool isBoundary = false;
977 if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
978 for (LO j = 0; j < blkPartSize; j++) {
979 if (pointBoundaryNodes[row * blkPartSize + j]) {
980 isBoundary = true;
981 break;
982 }
983 }
984 } else {
985 isBoundary = true;
986 for (LO j = 0; j < blkPartSize; j++) {
987 if (!pointBoundaryNodes[row * blkPartSize + j]) {
988 isBoundary = false;
989 break;
990 }
991 }
992 }
993
994 // Merge rows of A
995 // The array indicesExtra contains local column node ids for the current local node "row"
996 if (!isBoundary)
997 MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation);
998 else
999 indicesExtra.push_back(row);
1000 indices = indicesExtra;
1001 numTotal += indices.size();
1002
1003 // add the local column node ids to the full columns array which
1004 // contains the local column node ids for all local node rows
1005 LO nnz = indices.size(), rownnz = 0;
1006 for (LO colID = 0; colID < nnz; colID++) {
1007 LO col = indices[colID];
1008 columns[realnnz++] = col;
1009 rownnz++;
1010 }
1011
1012 if (rownnz == 1) {
1013 // If the only element remaining after filtering is diagonal, mark node as boundary
1014 // FIXME: this should really be replaced by the following
1015 // if (indices.size() == 1 && indices[0] == row)
1016 // boundaryNodes[row] = true;
1017 // We do not do it this way now because there is no framework for distinguishing isolated
1018 // and boundary nodes in the aggregation algorithms
1019 amalgBoundaryNodes[row] = true;
1020 }
1021 rows[row + 1] = realnnz;
1022 } // for (LO row = 0; row < numRows; row++)
1023 // columns.resize(realnnz);
1024
1025 RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1026 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1027
1028 if (GetVerbLevel() & Statistics1) {
1029 GO numLocalBoundaryNodes = 0;
1030 GO numGlobalBoundaryNodes = 0;
1031
1032 for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1033 if (amalgBoundaryNodes(i))
1034 numLocalBoundaryNodes++;
1035
1036 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1037 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1038 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
1039 << " agglomerated Dirichlet nodes" << std::endl;
1040 }
1041
1042 Set(currentLevel, "Graph", graph);
1043 Set(currentLevel, "DofsPerNode", blkSize); // full block size
1044 }
1045
1046 } else if (algo == "distance laplacian") {
1047 LO blkSize = A->GetFixedBlockSize();
1048 GO indexBase = A->getRowMap()->getIndexBase();
1049 // [*0*] : FIXME
1050 // ap: somehow, if I move this line to [*1*], Belos throws an error
1051 // I'm not sure what's going on. Do we always have to Get data, if we did
1052 // DeclareInput for it?
1053 // RCP<RealValuedMultiVector> Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
1054
1055 // Detect and record rows that correspond to Dirichlet boundary conditions
1056 // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
1057 // TODO the array one bigger than the number of local rows, and the last entry can
1058 // TODO hold the actual number of boundary nodes. Clever, huh?
1059 auto pointBoundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
1060 if (rowSumTol > 0.)
1061 Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, pointBoundaryNodes);
1062
1063 if ((blkSize == 1) && (threshold == STS::zero())) {
1064 // Trivial case: scalar problem, no dropping. Can return original graph
1065 RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
1066 graph->SetBoundaryNodeMap(pointBoundaryNodes);
1067 graphType = "unamalgamated";
1068 numTotal = A->getLocalNumEntries();
1069
1070 if (GetVerbLevel() & Statistics1) {
1071 GO numLocalBoundaryNodes = 0;
1072 GO numGlobalBoundaryNodes = 0;
1073 for (size_t i = 0; i < pointBoundaryNodes.size(); ++i)
1074 if (pointBoundaryNodes(i))
1075 numLocalBoundaryNodes++;
1076 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1077 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1078 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1079 }
1080
1081 Set(currentLevel, "DofsPerNode", blkSize);
1082 Set(currentLevel, "Graph", graph);
1083
1084 } else {
1085 // ap: We make quite a few assumptions here; general case may be a lot different,
1086 // but much much harder to implement. We assume that:
1087 // 1) all maps are standard maps, not strided maps
1088 // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic
1089 // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i
1090 //
1091 // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo,
1092 // but as I totally don't understand that code, here is my solution
1093
1094 // [*1*]: see [*0*]
1095
1096 // Check that the number of local coordinates is consistent with the #rows in A
1097 TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() / blkSize != Coords->getLocalLength(), Exceptions::Incompatible,
1098 "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() << ") by modulo block size (" << blkSize << ").");
1099
1100 const RCP<const Map> colMap = A->getColMap();
1101 RCP<const Map> uniqueMap, nonUniqueMap;
1102 Array<LO> colTranslation;
1103 if (blkSize == 1) {
1104 uniqueMap = A->getRowMap();
1105 nonUniqueMap = A->getColMap();
1106 graphType = "unamalgamated";
1107
1108 } else {
1109 uniqueMap = Coords->getMap();
1110 TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible,
1111 "Different index bases for matrix and coordinates");
1112
1113 AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation);
1114
1115 graphType = "amalgamated";
1116 }
1117 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
1118
1119 RCP<RealValuedMultiVector> ghostedCoords;
1120 RCP<Vector> ghostedLaplDiag;
1121 Teuchos::ArrayRCP<SC> ghostedLaplDiagData;
1122 if (threshold != STS::zero()) {
1123 // Get ghost coordinates
1124 RCP<const Import> importer;
1125 {
1126 SubFactoryMonitor m1(*this, "Import construction", currentLevel);
1127 if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) {
1128 GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl;
1129 importer = realA->getCrsGraph()->getImporter();
1130 } else {
1131 GetOStream(Warnings0) << "Constructing new importer instance" << std::endl;
1132 importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
1133 }
1134 } // subtimer
1135 ghostedCoords = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(nonUniqueMap, Coords->getNumVectors());
1136 {
1137 SubFactoryMonitor m1(*this, "Coordinate import", currentLevel);
1138 ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT);
1139 } // subtimer
1140
1141 // Construct Distance Laplacian diagonal
1142 RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
1143 Array<LO> indicesExtra;
1144 Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1145 if (threshold != STS::zero()) {
1146 const size_t numVectors = ghostedCoords->getNumVectors();
1147 coordData.reserve(numVectors);
1148 for (size_t j = 0; j < numVectors; j++) {
1149 Teuchos::ArrayRCP<const real_type> tmpData = ghostedCoords->getData(j);
1150 coordData.push_back(tmpData);
1151 }
1152 }
1153 {
1154 SubFactoryMonitor m1(*this, "Laplacian local diagonal", currentLevel);
1155 ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
1156 for (LO row = 0; row < numRows; row++) {
1157 ArrayView<const LO> indices;
1158
1159 if (blkSize == 1) {
1160 ArrayView<const SC> vals;
1161 A->getLocalRowView(row, indices, vals);
1162
1163 } else {
1164 // Merge rows of A
1165 indicesExtra.resize(0);
1166 MergeRows(*A, row, indicesExtra, colTranslation);
1167 indices = indicesExtra;
1168 }
1169
1170 LO nnz = indices.size();
1171 bool haveAddedToDiag = false;
1172 for (LO colID = 0; colID < nnz; colID++) {
1173 const LO col = indices[colID];
1174
1175 if (row != col) {
1176 if (use_dlap_weights == SINGLE_WEIGHTS) {
1177 /*printf("[%d,%d] Unweighted Distance = %6.4e Weighted Distance = %6.4e\n",row,col,
1178 MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col),
1179 MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col));*/
1180 localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1181 } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1182 int block_id = row % interleaved_blocksize;
1183 int block_start = block_id * interleaved_blocksize;
1184 localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1185 } else {
1186 // printf("[%d,%d] Unweighted Distance = %6.4e\n",row,col,MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col));
1187 localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1188 }
1189 haveAddedToDiag = true;
1190 }
1191 }
1192 // Deal with the situation where boundary conditions have only been enforced on rows, but not on columns.
1193 // We enforce dropping of these entries by assigning a very large number to the diagonal entries corresponding to BCs.
1194 if (!haveAddedToDiag)
1195 localLaplDiagData[row] = STS::squareroot(STS::rmax());
1196 }
1197 } // subtimer
1198 {
1199 SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", currentLevel);
1200 ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
1201 ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT);
1202 ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
1203 } // subtimer
1204
1205 } else {
1206 GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl;
1207 }
1208
1209 // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian
1210
1211 // allocate space for the local graph
1212 typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
1213 typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
1214
1215#ifdef HAVE_MUELU_DEBUG
1216 // DEBUGGING
1217 for (LO i = 0; i < (LO)columns.size(); i++) columns[i] = -666;
1218#endif
1219
1220 // Extra array for if we're allowing symmetrization with cutting
1221 ArrayRCP<LO> rows_stop;
1222 bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo == scaled_cut_symmetric;
1223 if (use_stop_array)
1224 // rows_stop = typename LWGraph::row_type::non_const_type("rows_stop", numRows);
1225 rows_stop.resize(numRows);
1226
1227 typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
1228 Kokkos::deep_copy(amalgBoundaryNodes, false);
1229
1230 LO realnnz = 0;
1231 rows(0) = 0;
1232
1233 Array<LO> indicesExtra;
1234 {
1235 SubFactoryMonitor m1(*this, "Laplacian dropping", currentLevel);
1236 Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1237 if (threshold != STS::zero()) {
1238 const size_t numVectors = ghostedCoords->getNumVectors();
1239 coordData.reserve(numVectors);
1240 for (size_t j = 0; j < numVectors; j++) {
1241 Teuchos::ArrayRCP<const real_type> tmpData = ghostedCoords->getData(j);
1242 coordData.push_back(tmpData);
1243 }
1244 }
1245
1246 ArrayView<const SC> vals; // CMS hackery
1247 for (LO row = 0; row < numRows; row++) {
1248 ArrayView<const LO> indices;
1249 indicesExtra.resize(0);
1250 bool isBoundary = false;
1251
1252 if (blkSize == 1) {
1253 // ArrayView<const SC> vals;//CMS uncomment
1254 A->getLocalRowView(row, indices, vals);
1255 isBoundary = pointBoundaryNodes[row];
1256 } else {
1257 // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
1258 isBoundary = true;
1259 for (LO j = 0; j < blkSize; j++) {
1260 if (!pointBoundaryNodes[row * blkSize + j]) {
1261 isBoundary = false;
1262 break;
1263 }
1264 }
1265
1266 // Merge rows of A
1267 if (!isBoundary)
1268 MergeRows(*A, row, indicesExtra, colTranslation);
1269 else
1270 indicesExtra.push_back(row);
1271 indices = indicesExtra;
1272 }
1273 numTotal += indices.size();
1274
1275 LO nnz = indices.size(), rownnz = 0;
1276
1277 if (use_stop_array) {
1278 rows(row + 1) = rows(row) + nnz;
1279 realnnz = rows(row);
1280 }
1281
1282 if (threshold != STS::zero()) {
1283 // default
1284 if (distanceLaplacianAlgo == defaultAlgo) {
1285 /* Standard Distance Laplacian */
1286 for (LO colID = 0; colID < nnz; colID++) {
1287 LO col = indices[colID];
1288
1289 if (row == col) {
1290 columns(realnnz++) = col;
1291 rownnz++;
1292 continue;
1293 }
1294
1295 // We do not want the distance Laplacian aggregating boundary nodes
1296 if (isBoundary) continue;
1297
1298 SC laplVal;
1299 if (use_dlap_weights == SINGLE_WEIGHTS) {
1300 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1301 } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1302 int block_id = row % interleaved_blocksize;
1303 int block_start = block_id * interleaved_blocksize;
1304 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1305 } else {
1306 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1307 }
1308 real_type aiiajj = STS::magnitude(realThreshold * realThreshold * ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1309 real_type aij = STS::magnitude(laplVal * laplVal);
1310
1311 if (aij > aiiajj) {
1312 columns(realnnz++) = col;
1313 rownnz++;
1314 } else {
1315 numDropped++;
1316 }
1317 }
1318 } else {
1319 /* Cut Algorithm */
1320 using DropTol = Details::DropTol<real_type, LO>;
1321 std::vector<DropTol> drop_vec;
1322 drop_vec.reserve(nnz);
1323 const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
1324 const real_type one = Teuchos::ScalarTraits<real_type>::one();
1325
1326 // find magnitudes
1327 for (LO colID = 0; colID < nnz; colID++) {
1328 LO col = indices[colID];
1329
1330 if (row == col) {
1331 drop_vec.emplace_back(zero, one, colID, false);
1332 continue;
1333 }
1334 // We do not want the distance Laplacian aggregating boundary nodes
1335 if (isBoundary) continue;
1336
1337 SC laplVal;
1338 if (use_dlap_weights == SINGLE_WEIGHTS) {
1339 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1340 } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1341 int block_id = row % interleaved_blocksize;
1342 int block_start = block_id * interleaved_blocksize;
1343 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1344 } else {
1345 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1346 }
1347
1348 real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1349 real_type aij = STS::magnitude(laplVal * laplVal);
1350
1351 drop_vec.emplace_back(aij, aiiajj, colID, false);
1352 }
1353
1354 const size_t n = drop_vec.size();
1355
1356 if (distanceLaplacianAlgo == unscaled_cut) {
1357 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1358 return a.val > b.val;
1359 });
1360
1361 bool drop = false;
1362 for (size_t i = 1; i < n; ++i) {
1363 if (!drop) {
1364 auto const& x = drop_vec[i - 1];
1365 auto const& y = drop_vec[i];
1366 auto a = x.val;
1367 auto b = y.val;
1368 if (realThreshold * realThreshold * a > b) {
1369 drop = true;
1370#ifdef HAVE_MUELU_DEBUG
1371 if (distanceLaplacianCutVerbose) {
1372 std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
1373 }
1374#endif
1375 }
1376 }
1377 drop_vec[i].drop = drop;
1378 }
1379 } else if (distanceLaplacianAlgo == scaled_cut || distanceLaplacianAlgo == scaled_cut_symmetric) {
1380 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1381 return a.val / a.diag > b.val / b.diag;
1382 });
1383
1384 bool drop = false;
1385 for (size_t i = 1; i < n; ++i) {
1386 if (!drop) {
1387 auto const& x = drop_vec[i - 1];
1388 auto const& y = drop_vec[i];
1389 auto a = x.val / x.diag;
1390 auto b = y.val / y.diag;
1391 if (realThreshold * realThreshold * a > b) {
1392 drop = true;
1393#ifdef HAVE_MUELU_DEBUG
1394 if (distanceLaplacianCutVerbose) {
1395 std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
1396 }
1397#endif
1398 }
1399 }
1400 drop_vec[i].drop = drop;
1401 }
1402 }
1403
1404 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1405 return a.col < b.col;
1406 });
1407
1408 for (LO idxID = 0; idxID < (LO)drop_vec.size(); idxID++) {
1409 LO col = indices[drop_vec[idxID].col];
1410
1411 // don't drop diagonal
1412 if (row == col) {
1413 columns(realnnz++) = col;
1414 rownnz++;
1415 // printf("(%d,%d) KEEP %13s matrix = %6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val);
1416 continue;
1417 }
1418
1419 if (!drop_vec[idxID].drop) {
1420 columns(realnnz++) = col;
1421 // printf("(%d,%d) KEEP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1422 rownnz++;
1423 } else {
1424 // printf("(%d,%d) DROP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1425 numDropped++;
1426 }
1427 }
1428 }
1429 } else {
1430 // Skip laplace calculation and threshold comparison for zero threshold
1431 for (LO colID = 0; colID < nnz; colID++) {
1432 LO col = indices[colID];
1433 columns(realnnz++) = col;
1434 rownnz++;
1435 }
1436 }
1437
1438 if (rownnz == 1) {
1439 // If the only element remaining after filtering is diagonal, mark node as boundary
1440 // FIXME: this should really be replaced by the following
1441 // if (indices.size() == 1 && indices[0] == row)
1442 // boundaryNodes[row] = true;
1443 // We do not do it this way now because there is no framework for distinguishing isolated
1444 // and boundary nodes in the aggregation algorithms
1445 amalgBoundaryNodes[row] = true;
1446 }
1447
1448 if (use_stop_array)
1449 rows_stop[row] = rownnz + rows[row];
1450 else
1451 rows[row + 1] = realnnz;
1452 } // for (LO row = 0; row < numRows; row++)
1453
1454 } // subtimer
1455
1456 if (use_stop_array) {
1457 // Do symmetrization of the cut matrix
1458 // NOTE: We assume nested row/column maps here
1459 for (LO row = 0; row < numRows; row++) {
1460 for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) {
1461 LO col = columns[colidx];
1462 if (col >= numRows) continue;
1463
1464 bool found = false;
1465 for (LO t_col = rows(col); !found && t_col < rows_stop[col]; t_col++) {
1466 if (columns[t_col] == row)
1467 found = true;
1468 }
1469 // We didn't find the transpose buddy, so let's symmetrize, unless we'd be symmetrizing
1470 // into a Dirichlet unknown. In that case don't.
1471 if (!found && !pointBoundaryNodes[col] && Teuchos::as<typename LWGraph::row_type::value_type>(rows_stop[col]) < rows[col + 1]) {
1472 LO new_idx = rows_stop[col];
1473 // printf("(%d,%d) SYMADD entry\n",col,row);
1474 columns[new_idx] = row;
1475 rows_stop[col]++;
1476 numDropped--;
1477 }
1478 }
1479 }
1480
1481 // Condense everything down
1482 LO current_start = 0;
1483 for (LO row = 0; row < numRows; row++) {
1484 LO old_start = current_start;
1485 for (LO col = rows(row); col < rows_stop[row]; col++) {
1486 if (current_start != col) {
1487 columns(current_start) = columns(col);
1488 }
1489 current_start++;
1490 }
1491 rows[row] = old_start;
1492 }
1493 rows(numRows) = realnnz = current_start;
1494 }
1495
1496 RCP<LWGraph> graph;
1497 {
1498 SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel);
1499 graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1500 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1501 } // subtimer
1502
1503 if (GetVerbLevel() & Statistics1) {
1504 GO numLocalBoundaryNodes = 0;
1505 GO numGlobalBoundaryNodes = 0;
1506
1507 for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1508 if (amalgBoundaryNodes(i))
1509 numLocalBoundaryNodes++;
1510
1511 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1512 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1513 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes"
1514 << " using threshold " << dirichletThreshold << std::endl;
1515 }
1516
1517 Set(currentLevel, "Graph", graph);
1518 Set(currentLevel, "DofsPerNode", blkSize);
1519 }
1520 }
1521
1522 if (GetVerbLevel() & Statistics1) {
1523 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1524 GO numGlobalTotal, numGlobalDropped;
1525 MueLu_sumAll(comm, numTotal, numGlobalTotal);
1526 MueLu_sumAll(comm, numDropped, numGlobalDropped);
1527 GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1528 if (numGlobalTotal != 0)
1529 GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1530 GetOStream(Statistics1) << std::endl;
1531 }
1532
1533 } else {
1534 // what Tobias has implemented
1535
1536 SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
1537 // GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1538 GetOStream(Runtime0) << "algorithm = \""
1539 << "failsafe"
1540 << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1541 Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
1542
1543 RCP<const Map> rowMap = A->getRowMap();
1544 RCP<const Map> colMap = A->getColMap();
1545
1546 LO blockdim = 1; // block dim for fixed size blocks
1547 GO indexBase = rowMap->getIndexBase(); // index base of maps
1548 GO offset = 0;
1549
1550 // 1) check for blocking/striding information
1551 if (A->IsView("stridedMaps") &&
1552 Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap("stridedMaps")) != Teuchos::null) {
1553 Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!)
1554 RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap());
1555 TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null, Exceptions::BadCast, "MueLu::CoalesceFactory::Build: cast to strided row map failed.");
1556 blockdim = strMap->getFixedBlockSize();
1557 offset = strMap->getOffset();
1558 oldView = A->SwitchToView(oldView);
1559 GetOStream(Statistics1) << "CoalesceDropFactory::Build():"
1560 << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl;
1561 } else
1562 GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1563
1564 // 2) get row map for amalgamated matrix (graph of A)
1565 // with same distribution over all procs as row map of A
1566 RCP<const Map> nodeMap = amalInfo->getNodeRowMap();
1567 GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl;
1568
1569 // 3) create graph of amalgamated matrix
1570 RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries() * blockdim);
1571
1572 LO numRows = A->getRowMap()->getLocalNumElements();
1573 LO numNodes = nodeMap->getLocalNumElements();
1574 typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numNodes);
1575 Kokkos::deep_copy(amalgBoundaryNodes, false);
1576 const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node
1577 bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid
1578
1579 // 4) do amalgamation. generate graph of amalgamated matrix
1580 // Note, this code is much more inefficient than the leightwight implementation
1581 // Most of the work has already been done in the AmalgamationFactory
1582 for (LO row = 0; row < numRows; row++) {
1583 // get global DOF id
1584 GO grid = rowMap->getGlobalElement(row);
1585
1586 // reinitialize boolean helper variable
1587 bIsDiagonalEntry = false;
1588
1589 // translate grid to nodeid
1590 GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase);
1591
1592 size_t nnz = A->getNumEntriesInLocalRow(row);
1593 Teuchos::ArrayView<const LO> indices;
1594 Teuchos::ArrayView<const SC> vals;
1595 A->getLocalRowView(row, indices, vals);
1596
1597 RCP<std::vector<GO>> cnodeIds = Teuchos::rcp(new std::vector<GO>); // global column block ids
1598 LO realnnz = 0;
1599 for (LO col = 0; col < Teuchos::as<LO>(nnz); col++) {
1600 GO gcid = colMap->getGlobalElement(indices[col]); // global column id
1601
1602 if (vals[col] != STS::zero()) {
1603 GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase);
1604 cnodeIds->push_back(cnodeId);
1605 realnnz++; // increment number of nnz in matrix row
1606 if (grid == gcid) bIsDiagonalEntry = true;
1607 }
1608 }
1609
1610 if (realnnz == 1 && bIsDiagonalEntry == true) {
1611 LO lNodeId = nodeMap->getLocalElement(nodeId);
1612 numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId
1613 if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes
1614 amalgBoundaryNodes[lNodeId] = true;
1615 }
1616
1617 Teuchos::ArrayRCP<GO> arr_cnodeIds = Teuchos::arcp(cnodeIds);
1618
1619 if (arr_cnodeIds.size() > 0)
1620 crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1621 }
1622 // fill matrix graph
1623 crsGraph->fillComplete(nodeMap, nodeMap);
1624
1625 // 5) create MueLu Graph object
1626 RCP<LWGraph> graph = rcp(new LWGraph(crsGraph, "amalgamated graph of A"));
1627
1628 // Detect and record rows that correspond to Dirichlet boundary conditions
1629 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1630
1631 if (GetVerbLevel() & Statistics1) {
1632 GO numLocalBoundaryNodes = 0;
1633 GO numGlobalBoundaryNodes = 0;
1634 for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1635 if (amalgBoundaryNodes(i))
1636 numLocalBoundaryNodes++;
1637 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1638 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1639 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1640 }
1641
1642 // 6) store results in Level
1643 // graph->SetBoundaryNodeMap(gBoundaryNodeMap);
1644 Set(currentLevel, "DofsPerNode", blockdim);
1645 Set(currentLevel, "Graph", graph);
1646
1647 } // if (doExperimentalWrap) ... else ...
1648
1649} // Build
1650
1651template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1652void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRows(const Matrix& A, const LO row, Array<LO>& cols, const Array<LO>& translation) const {
1653 typedef typename ArrayView<const LO>::size_type size_type;
1654
1655 // extract striding information
1656 LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1657 if (A.IsView("stridedMaps") == true) {
1658 Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1659 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1660 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1661 if (strMap->getStridedBlockId() > -1)
1662 blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1663 }
1664
1665 // count nonzero entries in all dof rows associated with node row
1666 size_t nnz = 0, pos = 0;
1667 for (LO j = 0; j < blkSize; j++)
1668 nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1669
1670 if (nnz == 0) {
1671 cols.resize(0);
1672 return;
1673 }
1674
1675 cols.resize(nnz);
1676
1677 // loop over all local dof rows associated with local node "row"
1678 ArrayView<const LO> inds;
1679 ArrayView<const SC> vals;
1680 for (LO j = 0; j < blkSize; j++) {
1681 A.getLocalRowView(row * blkSize + j, inds, vals);
1682 size_type numIndices = inds.size();
1683
1684 if (numIndices == 0) // skip empty dof rows
1685 continue;
1686
1687 // cols: stores all local node ids for current local node id "row"
1688 cols[pos++] = translation[inds[0]];
1689 for (size_type k = 1; k < numIndices; k++) {
1690 LO nodeID = translation[inds[k]];
1691 // Here we try to speed up the process by reducing the size of an array
1692 // to sort. This works if the column nonzeros belonging to the same
1693 // node are stored consequently.
1694 if (nodeID != cols[pos - 1])
1695 cols[pos++] = nodeID;
1696 }
1697 }
1698 cols.resize(pos);
1699 nnz = pos;
1700
1701 // Sort and remove duplicates
1702 std::sort(cols.begin(), cols.end());
1703 pos = 0;
1704 for (size_t j = 1; j < nnz; j++)
1705 if (cols[j] != cols[pos])
1706 cols[++pos] = cols[j];
1707 cols.resize(pos + 1);
1708}
1709
1710template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1711void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP<const SC>& ghostedDiagVals, SC threshold, Array<LO>& cols, const Array<LO>& translation) const {
1712 typedef typename ArrayView<const LO>::size_type size_type;
1713 typedef Teuchos::ScalarTraits<SC> STS;
1714
1715 // extract striding information
1716 LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1717 if (A.IsView("stridedMaps") == true) {
1718 Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1719 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1720 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1721 if (strMap->getStridedBlockId() > -1)
1722 blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1723 }
1724
1725 // count nonzero entries in all dof rows associated with node row
1726 size_t nnz = 0, pos = 0;
1727 for (LO j = 0; j < blkSize; j++)
1728 nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1729
1730 if (nnz == 0) {
1731 cols.resize(0);
1732 return;
1733 }
1734
1735 cols.resize(nnz);
1736
1737 // loop over all local dof rows associated with local node "row"
1738 ArrayView<const LO> inds;
1739 ArrayView<const SC> vals;
1740 for (LO j = 0; j < blkSize; j++) {
1741 A.getLocalRowView(row * blkSize + j, inds, vals);
1742 size_type numIndices = inds.size();
1743
1744 if (numIndices == 0) // skip empty dof rows
1745 continue;
1746
1747 // cols: stores all local node ids for current local node id "row"
1748 LO prevNodeID = -1;
1749 for (size_type k = 0; k < numIndices; k++) {
1750 LO dofID = inds[k];
1751 LO nodeID = translation[inds[k]];
1752
1753 // we avoid a square root by using squared values
1754 typename STS::magnitudeType aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[dofID] * ghostedDiagVals[row * blkSize + j]); // eps^2 * |a_ii| * |a_jj|
1755 typename STS::magnitudeType aij = STS::magnitude(vals[k] * vals[k]);
1756
1757 // check dropping criterion
1758 if (aij > aiiajj || (row * blkSize + j == dofID)) {
1759 // accept entry in graph
1760
1761 // Here we try to speed up the process by reducing the size of an array
1762 // to sort. This works if the column nonzeros belonging to the same
1763 // node are stored consequently.
1764 if (nodeID != prevNodeID) {
1765 cols[pos++] = nodeID;
1766 prevNodeID = nodeID;
1767 }
1768 }
1769 }
1770 }
1771 cols.resize(pos);
1772 nnz = pos;
1773
1774 // Sort and remove duplicates
1775 std::sort(cols.begin(), cols.end());
1776 pos = 0;
1777 for (size_t j = 1; j < nnz; j++)
1778 if (cols[j] != cols[pos])
1779 cols[++pos] = cols[j];
1780 cols.resize(pos + 1);
1781
1782 return;
1783}
1784
1785template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1786Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BlockDiagonalize(Level& currentLevel, const RCP<Matrix>& A, bool generate_matrix) const {
1787 typedef Teuchos::ScalarTraits<SC> STS;
1788
1789 const ParameterList& pL = GetParameterList();
1790 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
1791 const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
1792
1793 RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber");
1794 RCP<LocalOrdinalVector> ghostedBlockNumber;
1795 GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)" << std::endl;
1796
1797 // Ghost the column block numbers if we need to
1798 RCP<const Import> importer = A->getCrsGraph()->getImporter();
1799 if (!importer.is_null()) {
1800 SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
1801 ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
1802 ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
1803 } else {
1804 ghostedBlockNumber = BlockNumber;
1805 }
1806
1807 // Accessors for block numbers
1808 Teuchos::ArrayRCP<const LO> row_block_number = BlockNumber->getData(0);
1809 Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1810
1811 // allocate space for the local graph
1812 typename CrsMatrix::local_matrix_device_type::row_map_type::host_mirror_type::non_const_type rows_mat;
1813 typename LWGraph::row_type::non_const_type rows_graph;
1814 typename LWGraph::entries_type::non_const_type columns;
1815 typename CrsMatrix::local_matrix_device_type::values_type::host_mirror_type::non_const_type values;
1816 RCP<CrsMatrixWrap> crs_matrix_wrap;
1817
1818 if (generate_matrix) {
1819 crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0));
1820 rows_mat = typename CrsMatrix::local_matrix_device_type::row_map_type::host_mirror_type::non_const_type("rows_mat", A->getLocalNumRows() + 1);
1821 } else {
1822 rows_graph = typename LWGraph::row_type::non_const_type("rows_graph", A->getLocalNumRows() + 1);
1823 }
1824 columns = typename LWGraph::entries_type::non_const_type("columns", A->getLocalNumEntries());
1825 values = typename CrsMatrix::local_matrix_device_type::values_type::host_mirror_type::non_const_type("values", A->getLocalNumEntries());
1826
1827 LO realnnz = 0;
1828 GO numDropped = 0, numTotal = 0;
1829 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
1830 LO row_block = row_block_number[row];
1831 size_t nnz = A->getNumEntriesInLocalRow(row);
1832 ArrayView<const LO> indices;
1833 ArrayView<const SC> vals;
1834 A->getLocalRowView(row, indices, vals);
1835
1836 LO rownnz = 0;
1837 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
1838 LO col = indices[colID];
1839 LO col_block = col_block_number[col];
1840
1841 if (row_block == col_block) {
1842 if (generate_matrix) values[realnnz] = vals[colID];
1843 columns[realnnz++] = col;
1844 rownnz++;
1845 } else
1846 numDropped++;
1847 }
1848 if (generate_matrix)
1849 rows_mat[row + 1] = realnnz;
1850 else
1851 rows_graph[row + 1] = realnnz;
1852 }
1853
1854 auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
1855 if (rowSumTol > 0.)
1856 Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
1857
1858 numTotal = A->getLocalNumEntries();
1859
1860 if (GetVerbLevel() & Statistics1) {
1861 GO numLocalBoundaryNodes = 0;
1862 GO numGlobalBoundaryNodes = 0;
1863 for (size_t i = 0; i < boundaryNodes.size(); ++i)
1864 if (boundaryNodes(i))
1865 numLocalBoundaryNodes++;
1866 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1867 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1868 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1869
1870 GO numGlobalTotal, numGlobalDropped;
1871 MueLu_sumAll(comm, numTotal, numGlobalTotal);
1872 MueLu_sumAll(comm, numDropped, numGlobalDropped);
1873 GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1874 if (numGlobalTotal != 0)
1875 GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1876 GetOStream(Statistics1) << std::endl;
1877 }
1878
1879 Set(currentLevel, "Filtering", true);
1880
1881 if (generate_matrix) {
1882 // NOTE: Trying to use A's Import/Export objects will cause the code to segfault back in Build() with errors on the Import
1883 // if you're using Epetra. I'm not really sure why. By using the Col==Domain and Row==Range maps, we get null Import/Export objects
1884 // here, which is legit, because we never use them anyway.
1885 if constexpr (std::is_same<typename LWGraph::row_type,
1886 typename CrsMatrix::local_matrix_device_type::row_map_type>::value) {
1887 crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat, columns, values);
1888 } else {
1889 auto rows_mat2 = typename CrsMatrix::local_matrix_device_type::row_map_type::non_const_type("rows_mat2", rows_mat.extent(0));
1890 Kokkos::deep_copy(rows_mat2, rows_mat);
1891 auto columns2 = typename CrsMatrix::local_graph_device_type::entries_type::non_const_type("columns2", columns.extent(0));
1892 Kokkos::deep_copy(columns2, columns);
1893 auto values2 = typename CrsMatrix::local_matrix_device_type::values_type::non_const_type("values2", values.extent(0));
1894 Kokkos::deep_copy(values2, values);
1895 crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat2, columns2, values2);
1896 }
1897 crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap());
1898 } else {
1899 RCP<LWGraph> graph = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(), "block-diagonalized graph of A"));
1900 graph->SetBoundaryNodeMap(boundaryNodes);
1901 Set(currentLevel, "Graph", graph);
1902 }
1903
1904 Set(currentLevel, "DofsPerNode", 1);
1905 return crs_matrix_wrap;
1906}
1907
1908template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1909void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BlockDiagonalizeGraph(const RCP<LWGraph>& inputGraph, const RCP<LocalOrdinalVector>& ghostedBlockNumber, RCP<LWGraph>& outputGraph, RCP<const Import>& importer) const {
1910 TEUCHOS_TEST_FOR_EXCEPTION(ghostedBlockNumber.is_null(), Exceptions::RuntimeError, "BlockDiagonalizeGraph(): ghostedBlockNumber is null.");
1911 const ParameterList& pL = GetParameterList();
1912
1913 const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
1914
1915 GetOStream(Statistics1) << "Using BlockDiagonal Graph after Dropping (with provided blocking)";
1916 if (localizeColoringGraph)
1917 GetOStream(Statistics1) << ", with localization" << std::endl;
1918 else
1919 GetOStream(Statistics1) << ", without localization" << std::endl;
1920
1921 // Accessors for block numbers
1922 Teuchos::ArrayRCP<const LO> row_block_number = ghostedBlockNumber->getData(0);
1923 Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1924
1925 // allocate space for the local graph
1926 ArrayRCP<size_t> rows_mat;
1927 typename LWGraph::row_type::non_const_type rows_graph("rows_graph", inputGraph->GetNodeNumVertices() + 1);
1928 typename LWGraph::entries_type::non_const_type columns("columns", inputGraph->GetNodeNumEdges());
1929
1930 LO realnnz = 0;
1931 GO numDropped = 0, numTotal = 0;
1932 const LO numRows = Teuchos::as<LO>(inputGraph->GetDomainMap()->getLocalNumElements());
1933 if (localizeColoringGraph) {
1934 for (LO row = 0; row < numRows; ++row) {
1935 LO row_block = row_block_number[row];
1936 auto indices = inputGraph->getNeighborVertices(row);
1937
1938 LO rownnz = 0;
1939 for (LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1940 LO col = indices(colID);
1941 LO col_block = col_block_number[col];
1942
1943 if ((row_block == col_block) && (col < numRows)) {
1944 columns(realnnz++) = col;
1945 rownnz++;
1946 } else
1947 numDropped++;
1948 }
1949 rows_graph(row + 1) = realnnz;
1950 }
1951 } else {
1952 // ghosting of boundary node map
1953 auto boundaryNodes = inputGraph->GetBoundaryNodeMap();
1954 auto boundaryNodesVector = Xpetra::VectorFactory<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(inputGraph->GetDomainMap());
1955 for (size_t i = 0; i < inputGraph->GetNodeNumVertices(); i++)
1956 boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
1957 // Xpetra::IO<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Write("boundary",*boundaryNodesVector);
1958 auto boundaryColumnVector = Xpetra::VectorFactory<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(inputGraph->GetImportMap());
1959 boundaryColumnVector->doImport(*boundaryNodesVector, *importer, Xpetra::INSERT);
1960 auto boundaryColumn = boundaryColumnVector->getData(0);
1961
1962 for (LO row = 0; row < numRows; ++row) {
1963 LO row_block = row_block_number[row];
1964 auto indices = inputGraph->getNeighborVertices(row);
1965
1966 LO rownnz = 0;
1967 for (LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1968 LO col = indices(colID);
1969 LO col_block = col_block_number[col];
1970
1971 if ((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) {
1972 columns(realnnz++) = col;
1973 rownnz++;
1974 } else
1975 numDropped++;
1976 }
1977 rows_graph(row + 1) = realnnz;
1978 }
1979 }
1980
1981 numTotal = inputGraph->GetNodeNumEdges();
1982
1983 if (GetVerbLevel() & Statistics1) {
1984 RCP<const Teuchos::Comm<int>> comm = inputGraph->GetDomainMap()->getComm();
1985 GO numGlobalTotal, numGlobalDropped;
1986 MueLu_sumAll(comm, numTotal, numGlobalTotal);
1987 MueLu_sumAll(comm, numDropped, numGlobalDropped);
1988 GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1989 if (numGlobalTotal != 0)
1990 GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1991 GetOStream(Statistics1) << std::endl;
1992 }
1993
1994 if (localizeColoringGraph) {
1995 outputGraph = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1996 outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
1997 } else {
1998 TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra);
1999 auto outputGraph2 = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
2000
2001 auto tpGraph = Xpetra::toTpetra(rcp_const_cast<const CrsGraph>(outputGraph2->GetCrsGraph()));
2002 auto sym = rcp(new Tpetra::CrsGraphTransposer<LocalOrdinal, GlobalOrdinal, Node>(tpGraph));
2003 auto tpGraphSym = sym->symmetrize();
2004 auto lclGraphSym = tpGraphSym->getLocalGraphHost();
2005 auto colIndsSym = lclGraphSym.entries;
2006
2007 auto rowsSym = tpGraphSym->getLocalRowPtrsHost();
2008 typename LWGraph::row_type::non_const_type rows_graphSym("rows_graphSym", rowsSym.size());
2009 for (size_t row = 0; row < rowsSym.size(); row++)
2010 rows_graphSym(row) = rowsSym(row);
2011 outputGraph = rcp(new LWGraph(rows_graphSym, colIndsSym, inputGraph->GetDomainMap(), Xpetra::toXpetra(tpGraphSym->getColMap()), "block-diagonalized graph of A"));
2012 outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
2013 }
2014}
2015
2016} // namespace MueLu
2017
2018#endif // MUELU_COALESCEDROPFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
#define MueLu_sumAll(rcpComm, in, out)
static void AmalgamateMap(const Map &sourceMap, const Matrix &A, RCP< const Map > &amalgamatedMap, Array< LO > &translation)
Method to create merged map for systems of PDEs.
static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, const GlobalOrdinal indexBase)
Translate global (row/column) id to global amalgamation block id.
void MergeRows(const Matrix &A, const LO row, Array< LO > &cols, const Array< LO > &translation) const
Method to merge rows of matrix for systems of PDEs.
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > BlockDiagonalize(Level &currentLevel, const RCP< Matrix > &A, bool generate_matrix) const
void DeclareInput(Level &currentLevel) const
Input.
void BlockDiagonalizeGraph(const RCP< LWGraph > &inputGraph, const RCP< LocalOrdinalVector > &ghostedBlockNumber, RCP< LWGraph > &outputGraph, RCP< const Import > &importer) const
void MergeRowsWithDropping(const Matrix &A, const LO row, const ArrayRCP< const SC > &ghostedDiagVals, SC threshold, Array< LO > &cols, const Array< LO > &translation) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level &currentLevel) const
Build an object with this factory.
Exception indicating invalid cast attempted.
Exception throws to report incompatible objects (like maps).
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Kokkos::View< bool *, memory_space > boundary_nodes_type
typename local_graph_type::row_map_type row_type
Lightweight MueLu representation of a compressed row storage graph.
Class that holds all level-specific information.
int GetLevelID() const
Return level number.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static Kokkos::View< bool *, typename Kokkos::HostSpace > DetectDirichletRows_kokkos_host(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
static Teuchos::RCP< Vector > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Return vector containing: max_{i\not=k}(-a_ik), for each for i in the matrix.
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
Apply Rowsum Criterion.
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar > > &v, LocalOrdinal i0, LocalOrdinal i1)
Squared distance between two rows in a multivector.
static void ApplyRowSumCriterionHost(const Matrix &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, Kokkos::HostSpace > &dirichletRows)
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
MueLu utility class.
Namespace for MueLu classes and methods.
@ Warnings0
Important warning messages (one line)
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
@ Warnings1
Additional warnings.
@ Parameters0
Print class parameters.
DropTol & operator=(DropTol const &)=default
DropTol(DropTol &&)=default
DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
DropTol & operator=(DropTol &&)=default
DropTol(DropTol const &)=default