MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_FilteredAFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_FILTEREDAFACTORY_DEF_HPP
11#define MUELU_FILTEREDAFACTORY_DEF_HPP
12
13#include <Xpetra_Matrix.hpp>
14#include <Xpetra_MatrixFactory.hpp>
15#include <Xpetra_IO.hpp>
16
18
19#include "MueLu_Level.hpp"
20#include "MueLu_MasterList.hpp"
21#include "MueLu_Monitor.hpp"
22#include "MueLu_Aggregates.hpp"
23#include "MueLu_AmalgamationInfo.hpp"
24#include "MueLu_Utilities.hpp"
25
26// Variable to enable lots of debug output
27#define MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING 0
28
29namespace MueLu {
30
31template <class T>
32void sort_and_unique(T& array) {
33 std::sort(array.begin(), array.end());
34 auto last = std::unique(array.begin(), array.end());
35 array.erase(last, array.end());
36}
37
38template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
40 RCP<ParameterList> validParamList = rcp(new ParameterList());
41
42#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
43 SET_VALID_ENTRY("filtered matrix: use lumping");
44 SET_VALID_ENTRY("filtered matrix: reuse graph");
45 SET_VALID_ENTRY("filtered matrix: reuse eigenvalue");
46 SET_VALID_ENTRY("filtered matrix: use root stencil");
47 SET_VALID_ENTRY("filtered matrix: use spread lumping");
48 SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor");
49 SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap");
50 SET_VALID_ENTRY("filtered matrix: Dirichlet threshold");
51 SET_VALID_ENTRY("filtered matrix: count negative diagonals");
52#undef SET_VALID_ENTRY
53
54 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A used for filtering");
55 validParamList->set<RCP<const FactoryBase> >("Graph", Teuchos::null, "Generating factory for coalesced filtered graph");
56 validParamList->set<RCP<const FactoryBase> >("Filtering", Teuchos::null, "Generating factory for filtering boolean");
57
58 // Only need these for the "use root stencil" option
59 validParamList->set<RCP<const FactoryBase> >("Aggregates", Teuchos::null, "Generating factory of the aggregates");
60 validParamList->set<RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
61 return validParamList;
62}
63
64template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
66 Input(currentLevel, "A");
67 Input(currentLevel, "Filtering");
68 Input(currentLevel, "Graph");
69 const ParameterList& pL = GetParameterList();
70 if (pL.isParameter("filtered matrix: use root stencil") && pL.get<bool>("filtered matrix: use root stencil") == true) {
71 Input(currentLevel, "Aggregates");
72 Input(currentLevel, "UnAmalgamationInfo");
73 }
74}
75
76template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
78 FactoryMonitor m(*this, "Matrix filtering", currentLevel);
79
80 RCP<Matrix> A = Get<RCP<Matrix> >(currentLevel, "A");
81 if (Get<bool>(currentLevel, "Filtering") == false) {
82 GetOStream(Runtime0) << "Filtered matrix is not being constructed as no filtering is being done" << std::endl;
83 Set(currentLevel, "A", A);
84 return;
85 }
86
87 const ParameterList& pL = GetParameterList();
88 bool lumping = pL.get<bool>("filtered matrix: use lumping");
89 if (lumping)
90 GetOStream(Runtime0) << "Lumping dropped entries" << std::endl;
91
92 bool use_spread_lumping = pL.get<bool>("filtered matrix: use spread lumping");
93 if (use_spread_lumping && (!lumping))
94 throw std::runtime_error("Must also request 'filtered matrix: use lumping' in order to use spread lumping");
95
96 if (use_spread_lumping) {
97 GetOStream(Runtime0) << "using spread lumping " << std::endl;
98 }
99
100 double DdomAllowGrowthRate = 1.1;
101 double DdomCap = 2.0;
102 if (use_spread_lumping) {
103 DdomAllowGrowthRate = pL.get<double>("filtered matrix: spread lumping diag dom growth factor");
104 DdomCap = pL.get<double>("filtered matrix: spread lumping diag dom cap");
105 }
106 bool use_root_stencil = lumping && pL.get<bool>("filtered matrix: use root stencil");
107 if (use_root_stencil)
108 GetOStream(Runtime0) << "Using root stencil for dropping" << std::endl;
109 double dirichlet_threshold = pL.get<double>("filtered matrix: Dirichlet threshold");
110 if (dirichlet_threshold >= 0.0)
111 GetOStream(Runtime0) << "Filtering Dirichlet threshold of " << dirichlet_threshold << std::endl;
112
113 if (use_root_stencil || pL.get<bool>("filtered matrix: reuse graph"))
114 GetOStream(Runtime0) << "Reusing graph" << std::endl;
115 else
116 GetOStream(Runtime0) << "Generating new graph" << std::endl;
117
118 RCP<LWGraph> G = Get<RCP<LWGraph> >(currentLevel, "Graph");
120 FILE* f = fopen("graph.dat", "w");
121 size_t numGRows = G->GetNodeNumVertices();
122 for (size_t i = 0; i < numGRows; i++) {
123 // Set up filtering array
124 auto indsG = G->getNeighborVertices(i);
125 for (size_t j = 0; j < (size_t)indsG.length; j++) {
126 fprintf(f, "%d %d 1.0\n", (int)i, (int)indsG(j));
127 }
128 }
129 fclose(f);
130 }
131
132 RCP<ParameterList> fillCompleteParams(new ParameterList);
133 fillCompleteParams->set("No Nonlocal Changes", true);
134
135 RCP<Matrix> filteredA;
136 if (use_root_stencil) {
137 filteredA = MatrixFactory::Build(A->getCrsGraph());
138 filteredA->fillComplete(fillCompleteParams);
139 filteredA->resumeFill();
140 BuildNewUsingRootStencil(*A, *G, dirichlet_threshold, currentLevel, *filteredA, use_spread_lumping, DdomAllowGrowthRate, DdomCap);
141 filteredA->fillComplete(fillCompleteParams);
142
143 } else if (pL.get<bool>("filtered matrix: reuse graph")) {
144 filteredA = MatrixFactory::Build(A->getCrsGraph());
145 filteredA->resumeFill();
146 BuildReuse(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold, *filteredA);
147 // only lump inside BuildReuse if lumping is true and use_spread_lumping is false
148 // note: they use_spread_lumping cannot be true if lumping is false
149
150 if (use_spread_lumping) ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap);
151 filteredA->fillComplete(fillCompleteParams);
152
153 } else {
154 filteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries());
155 BuildNew(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold, *filteredA);
156 // only lump inside BuildNew if lumping is true and use_spread_lumping is false
157 // note: they use_spread_lumping cannot be true if lumping is false
158 if (use_spread_lumping) ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap);
159 filteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams);
160 }
161
163 Xpetra::IO<SC, LO, GO, NO>::Write("filteredA.dat", *filteredA);
164
165 // original filtered A and actual A
166 Xpetra::IO<SC, LO, GO, NO>::Write("A.dat", *A);
167 RCP<Matrix> origFilteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries());
168 BuildNew(*A, *G, lumping, dirichlet_threshold, *origFilteredA);
169 if (use_spread_lumping) ExperimentalLumping(*A, *origFilteredA, DdomAllowGrowthRate, DdomCap);
170 origFilteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams);
171 Xpetra::IO<SC, LO, GO, NO>::Write("origFilteredA.dat", *origFilteredA);
172 }
173
174 filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
175
176 if (pL.get<bool>("filtered matrix: reuse eigenvalue")) {
177 // Reuse max eigenvalue from A
178 // It is unclear what eigenvalue is the best for the smoothing, but we already may have
179 // the D^{-1}A estimate in A, may as well use it.
180 // NOTE: ML does that too
181 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
182 }
183
184 if (pL.get<bool>("filtered matrix: count negative diagonals")) {
185 // Count the negative diagonals (and display that information)
187 GetOStream(Runtime0) << "FilteredA: Negative diagonals: " << neg_count << std::endl;
188 }
189
190 Set(currentLevel, "A", filteredA);
191}
192
193// Epetra's API allows direct access to row array.
194// Tpetra's API does not, providing only ArrayView<const .>
195// But in most situations we are currently interested in, it is safe to assume
196// that the view is to the actual data. So this macro directs the code to do
197// const_cast, and modify the entries directly. This allows us to avoid
198// replaceLocalValues() call which is quite expensive due to all the searches.
199//#define ASSUME_DIRECT_ACCESS_TO_ROW // See github issue 10883#issuecomment-1256676340
200
201// Both Epetra and Tpetra matrix-matrix multiply use the following trick:
202// if an entry of the left matrix is zero, it does not compute or store the
203// zero value.
204//
205// This trick allows us to bypass constructing a new matrix. Instead, we
206// make a deep copy of the original one, and fill it in with zeros, which
207// are ignored during the prolongator smoothing.
208template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
210 BuildReuse(const Matrix& A, const LWGraph& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const {
211 using TST = typename Teuchos::ScalarTraits<SC>;
212 SC zero = TST::zero();
213
214 size_t blkSize = A.GetFixedBlockSize();
215
216 ArrayView<const LO> inds;
217 ArrayView<const SC> valsA;
218#ifdef ASSUME_DIRECT_ACCESS_TO_ROW
219 ArrayView<SC> vals;
220#else
221 Array<SC> vals;
222#endif
223
224 Array<char> filter(std::max(blkSize * G.GetImportMap()->getLocalNumElements(),
225 A.getColMap()->getLocalNumElements()),
226 0);
227
228 size_t numGRows = G.GetNodeNumVertices();
229 for (size_t i = 0; i < numGRows; i++) {
230 // Set up filtering array
231 auto indsG = G.getNeighborVertices(i);
232 for (size_t j = 0; j < as<size_t>(indsG.length); j++)
233 for (size_t k = 0; k < blkSize; k++)
234 filter[indsG(j) * blkSize + k] = 1;
235
236 for (size_t k = 0; k < blkSize; k++) {
237 LO row = i * blkSize + k;
238
239 A.getLocalRowView(row, inds, valsA);
240
241 size_t nnz = inds.size();
242 if (nnz == 0)
243 continue;
244
245#ifdef ASSUME_DIRECT_ACCESS_TO_ROW
246 // Transform ArrayView<const SC> into ArrayView<SC>
247 ArrayView<const SC> vals1;
248 filteredA.getLocalRowView(row, inds, vals1);
249 vals = ArrayView<SC>(const_cast<SC*>(vals1.getRawPtr()), nnz);
250
251 memcpy(vals.getRawPtr(), valsA.getRawPtr(), nnz * sizeof(SC));
252#else
253 vals = Array<SC>(valsA);
254#endif
255
256 SC ZERO = Teuchos::ScalarTraits<SC>::zero();
257 // SC ONE = Teuchos::ScalarTraits<SC>::one();
258 SC A_rowsum = ZERO, F_rowsum = ZERO;
259 for (LO l = 0; l < (LO)inds.size(); l++)
260 A_rowsum += valsA[l];
261
262 if (lumping == false) {
263 for (size_t j = 0; j < nnz; j++)
264 if (!filter[inds[j]])
265 vals[j] = zero;
266
267 } else {
268 LO diagIndex = -1;
269 SC diagExtra = zero;
270
271 for (size_t j = 0; j < nnz; j++) {
272 if (filter[inds[j]]) {
273 if (inds[j] == row) {
274 // Remember diagonal position
275 diagIndex = j;
276 }
277 continue;
278 }
279
280 diagExtra += vals[j];
281
282 vals[j] = zero;
283 }
284
285 // Lump dropped entries
286 // NOTE
287 // * Does it make sense to lump for elasticity?
288 // * Is it different for diffusion and elasticity?
289 // SC diagA = ZERO;
290 if (diagIndex != -1) {
291 // diagA = vals[diagIndex];
292 vals[diagIndex] += diagExtra;
293 if (dirichletThresh >= 0.0 && TST::real(vals[diagIndex]) <= dirichletThresh) {
294 // printf("WARNING: row %d diag(Afiltered) = %8.2e diag(A)=%8.2e\n",row,vals[diagIndex],diagA);
295 for (LO l = 0; l < (LO)nnz; l++)
296 F_rowsum += vals[l];
297 // printf(" : A rowsum = %8.2e F rowsum = %8.2e\n",A_rowsum,F_rowsum);
298 vals[diagIndex] = TST::one();
299 }
300 }
301 }
302
303#ifndef ASSUME_DIRECT_ACCESS_TO_ROW
304 // Because we used a column map in the construction of the matrix
305 // we can just use insertLocalValues here instead of insertGlobalValues
306 filteredA.replaceLocalValues(row, inds, vals);
307#endif
308 }
309
310 // Reset filtering array
311 for (size_t j = 0; j < as<size_t>(indsG.length); j++)
312 for (size_t k = 0; k < blkSize; k++)
313 filter[indsG(j) * blkSize + k] = 0;
314 }
315}
316
317template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
319 BuildNew(const Matrix& A, const LWGraph& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const {
320 using TST = typename Teuchos::ScalarTraits<SC>;
321 SC zero = Teuchos::ScalarTraits<SC>::zero();
322
323 size_t blkSize = A.GetFixedBlockSize();
324
325 ArrayView<const LO> indsA;
326 ArrayView<const SC> valsA;
327 Array<LO> inds;
328 Array<SC> vals;
329
330 Array<char> filter(blkSize * G.GetImportMap()->getLocalNumElements(), 0);
331
332 size_t numGRows = G.GetNodeNumVertices();
333 for (size_t i = 0; i < numGRows; i++) {
334 // Set up filtering array
335 auto indsG = G.getNeighborVertices(i);
336 for (size_t j = 0; j < as<size_t>(indsG.length); j++)
337 for (size_t k = 0; k < blkSize; k++)
338 filter[indsG(j) * blkSize + k] = 1;
339
340 for (size_t k = 0; k < blkSize; k++) {
341 LO row = i * blkSize + k;
342
343 A.getLocalRowView(row, indsA, valsA);
344
345 size_t nnz = indsA.size();
346 if (nnz == 0)
347 continue;
348
349 inds.resize(indsA.size());
350 vals.resize(valsA.size());
351
352 size_t numInds = 0;
353 if (lumping == false) {
354 for (size_t j = 0; j < nnz; j++)
355 if (filter[indsA[j]]) {
356 inds[numInds] = indsA[j];
357 vals[numInds] = valsA[j];
358 numInds++;
359 }
360
361 } else {
362 LO diagIndex = -1;
363 SC diagExtra = zero;
364
365 for (size_t j = 0; j < nnz; j++) {
366 if (filter[indsA[j]]) {
367 inds[numInds] = indsA[j];
368 vals[numInds] = valsA[j];
369
370 // Remember diagonal position
371 if (inds[numInds] == row)
372 diagIndex = numInds;
373
374 numInds++;
375
376 } else {
377 diagExtra += valsA[j];
378 }
379 }
380
381 // Lump dropped entries
382 // NOTE
383 // * Does it make sense to lump for elasticity?
384 // * Is it different for diffusion and elasticity?
385 if (diagIndex != -1) {
386 vals[diagIndex] += diagExtra;
387 if (dirichletThresh >= 0.0 && TST::real(vals[diagIndex]) <= dirichletThresh) {
388 // SC A_rowsum = ZERO, F_rowsum = ZERO;
389 // printf("WARNING: row %d diag(Afiltered) = %8.2e diag(A)=%8.2e\n",row,vals[diagIndex],diagA);
390 // for(LO l = 0; l < (LO)nnz; l++)
391 // F_rowsum += vals[l];
392 // printf(" : A rowsum = %8.2e F rowsum = %8.2e\n",A_rowsum,F_rowsum);
393 vals[diagIndex] = TST::one();
394 }
395 }
396 }
397 inds.resize(numInds);
398 vals.resize(numInds);
399
400 // Because we used a column map in the construction of the matrix
401 // we can just use insertLocalValues here instead of insertGlobalValues
402 filteredA.insertLocalValues(row, inds, vals);
403 }
404
405 // Reset filtering array
406 for (size_t j = 0; j < as<size_t>(indsG.length); j++)
407 for (size_t k = 0; k < blkSize; k++)
408 filter[indsG(j) * blkSize + k] = 0;
409 }
410}
411
412template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
414 BuildNewUsingRootStencil(const Matrix& A, const LWGraph& G, double dirichletThresh, Level& currentLevel, Matrix& filteredA, bool use_spread_lumping, double DdomAllowGrowthRate, double DdomCap) const {
415 using TST = typename Teuchos::ScalarTraits<SC>;
416 using Teuchos::arcp_const_cast;
417 SC ZERO = Teuchos::ScalarTraits<SC>::zero();
418 SC ONE = Teuchos::ScalarTraits<SC>::one();
419 LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
420
421 // In the badAggNeighbors loop, if the entry has any number besides NAN, I add it to the diagExtra and then zero the guy.
422 RCP<Aggregates> aggregates = Get<RCP<Aggregates> >(currentLevel, "Aggregates");
423 RCP<AmalgamationInfo> amalgInfo = Get<RCP<AmalgamationInfo> >(currentLevel, "UnAmalgamationInfo");
424 LO numAggs = aggregates->GetNumAggregates();
425
426 size_t numNodes = G.GetNodeNumVertices();
427 size_t blkSize = A.GetFixedBlockSize();
428 size_t numRows = A.getMap()->getLocalNumElements();
429 ArrayView<const LO> indsA;
430 ArrayView<const SC> valsA;
431 ArrayRCP<const size_t> rowptr;
432 ArrayRCP<const LO> inds;
433 ArrayRCP<const SC> vals_const;
434 ArrayRCP<SC> vals;
435
436 // We're going to grab the vals array from filteredA and then blitz it with NAN as a placeholder for "entries that have
437 // not yey been touched." If I see an entry in the primary loop that has a zero, then I assume it has been nuked by
438 // it's symmetric pair, so I add it to the diagonal. If it has a NAN, process as normal.
439 RCP<CrsMatrix> filteredAcrs = dynamic_cast<const CrsMatrixWrap*>(&filteredA)->getCrsMatrix();
440 filteredAcrs->getAllValues(rowptr, inds, vals_const);
441 vals = arcp_const_cast<SC>(vals_const);
442 Array<bool> vals_dropped_indicator(vals.size(), false);
443
444 // Check map nesting
445 RCP<const Map> rowMap = A.getRowMap();
446 RCP<const Map> colMap = A.getColMap();
447 bool goodMap = MueLu::Utilities<SC, LO, GO, NO>::MapsAreNested(*rowMap, *colMap);
448 TEUCHOS_TEST_FOR_EXCEPTION(!goodMap, Exceptions::RuntimeError, "FilteredAFactory: Maps are not nested");
449
450 // Since we're going to symmetrize this
451 Array<LO> diagIndex(numRows, INVALID);
452 Array<SC> diagExtra(numRows, ZERO);
453
454 // Lists of nodes in each aggregate
455 struct {
456 // GH: For now, copy everything to host until we properly set this factory to run device code
457 // Instead, we'll copy data into host_mirror_types and run the algorithms on host, saving optimization for later.
458 typename Aggregates::LO_view ptr, nodes, unaggregated;
459 typename Aggregates::LO_view::host_mirror_type ptr_h, nodes_h, unaggregated_h;
460 } nodesInAgg;
461 aggregates->ComputeNodesInAggregate(nodesInAgg.ptr, nodesInAgg.nodes, nodesInAgg.unaggregated);
462 nodesInAgg.ptr_h = Kokkos::create_mirror_view(nodesInAgg.ptr);
463 nodesInAgg.nodes_h = Kokkos::create_mirror_view(nodesInAgg.nodes);
464 nodesInAgg.unaggregated_h = Kokkos::create_mirror_view(nodesInAgg.unaggregated);
465 Kokkos::deep_copy(nodesInAgg.ptr_h, nodesInAgg.ptr);
466 Kokkos::deep_copy(nodesInAgg.nodes_h, nodesInAgg.nodes);
467 Kokkos::deep_copy(nodesInAgg.unaggregated_h, nodesInAgg.unaggregated);
468 Teuchos::ArrayRCP<const LO> vertex2AggId = aggregates->GetVertex2AggId()->getData(0); // GH: this is needed on device, grab the pointer after we call ComputeNodesInAggregate
469
470 LO graphNumCols = G.GetImportMap()->getLocalNumElements();
471 Array<bool> filter(graphNumCols, false);
472
473 // Loop over the unaggregated nodes. Blitz those rows. We don't want to smooth singletons.
474 for (LO i = 0; i < (LO)nodesInAgg.unaggregated_h.extent(0); i++) {
475 for (LO m = 0; m < (LO)blkSize; m++) {
476 LO row = amalgInfo->ComputeLocalDOF(nodesInAgg.unaggregated_h(i), m);
477 if (row >= (LO)numRows) continue;
478 size_t index_start = rowptr[row];
479 A.getLocalRowView(row, indsA, valsA);
480 for (LO k = 0; k < (LO)indsA.size(); k++) {
481 if (row == indsA[k]) {
482 vals[index_start + k] = ONE;
483 diagIndex[row] = k;
484 } else
485 vals[index_start + k] = ZERO;
486 }
487 }
488 } // end nodesInAgg.unaggregated.extent(0);
489
490 std::vector<LO> badCount(numAggs, 0);
491
492 // Find the biggest aggregate size in *nodes*
493 LO maxAggSize = 0;
494 for (LO i = 0; i < numAggs; i++)
495 maxAggSize = std::max(maxAggSize, nodesInAgg.ptr_h(i + 1) - nodesInAgg.ptr_h(i));
496
497 // Loop over all the aggregates
498 std::vector<LO> goodAggNeighbors(G.getLocalMaxNumRowEntries());
499 std::vector<LO> badAggNeighbors(std::min(G.getLocalMaxNumRowEntries() * maxAggSize, numNodes));
500
501 size_t numNewDrops = 0;
502 size_t numOldDrops = 0;
503 size_t numFixedDiags = 0;
504 size_t numSymDrops = 0;
505
506 for (LO i = 0; i < numAggs; i++) {
507 LO numNodesInAggregate = nodesInAgg.ptr_h(i + 1) - nodesInAgg.ptr_h(i);
508 if (numNodesInAggregate == 0) continue;
509
510 // Find the root *node*
511 LO root_node = INVALID;
512 for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) {
513 if (aggregates->IsRoot(nodesInAgg.nodes_h(k))) {
514 root_node = nodesInAgg.nodes_h(k);
515 break;
516 }
517 }
518
519 TEUCHOS_TEST_FOR_EXCEPTION(root_node == INVALID,
520 Exceptions::RuntimeError, "MueLu::FilteredAFactory::BuildNewUsingRootStencil: Cannot find root node");
521
522 // Find the list of "good" node neighbors (aka nodes which border the root node in the Graph G)
523 auto goodNodeNeighbors = G.getNeighborVertices(root_node);
524
525 // Now find the list of "good" aggregate neighbors (aka the aggregates neighbor the root node in the Graph G)
526 goodAggNeighbors.resize(0);
527 for (LO k = 0; k < (LO)goodNodeNeighbors.length; k++) {
528 goodAggNeighbors.push_back(vertex2AggId[goodNodeNeighbors(k)]);
529 }
530 sort_and_unique(goodAggNeighbors);
531
532 // Now we get the list of "bad" aggregate neighbors (aka aggregates which border the
533 // root node in the original matrix A, which are not goodNodeNeighbors). Since we
534 // don't have an amalgamated version of the original matrix, we use the matrix directly
535 badAggNeighbors.resize(0);
536 for (LO j = 0; j < (LO)blkSize; j++) {
537 LO row = amalgInfo->ComputeLocalDOF(root_node, j);
538 if (row >= (LO)numRows) continue;
539 A.getLocalRowView(row, indsA, valsA);
540 for (LO k = 0; k < (LO)indsA.size(); k++) {
541 if ((indsA[k] < (LO)numRows) && (TST::magnitude(valsA[k]) != TST::magnitude(ZERO))) {
542 LO node = amalgInfo->ComputeLocalNode(indsA[k]);
543 LO agg = vertex2AggId[node];
544 if (!std::binary_search(goodAggNeighbors.begin(), goodAggNeighbors.end(), agg))
545 badAggNeighbors.push_back(agg);
546 }
547 }
548 }
549 sort_and_unique(badAggNeighbors);
550
551 // Go through the filtered graph and count the number of connections to the badAggNeighbors
552 // if there are 2 or more of these connections, remove them from the bad list.
553
554 for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) {
555 auto nodeNeighbors = G.getNeighborVertices(k);
556 for (LO kk = 0; kk < nodeNeighbors.length; kk++) {
557 if ((vertex2AggId[nodeNeighbors(kk)] >= 0) && (vertex2AggId[nodeNeighbors(kk)] < numAggs))
558 (badCount[vertex2AggId[nodeNeighbors(kk)]])++;
559 }
560 }
561 std::vector<LO> reallyBadAggNeighbors(std::min(G.getLocalMaxNumRowEntries() * maxAggSize, numNodes));
562 reallyBadAggNeighbors.resize(0);
563 for (LO k = 0; k < (LO)badAggNeighbors.size(); k++) {
564 if (badCount[badAggNeighbors[k]] <= 1) reallyBadAggNeighbors.push_back(badAggNeighbors[k]);
565 }
566 for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) {
567 auto nodeNeighbors = G.getNeighborVertices(k);
568 for (LO kk = 0; kk < nodeNeighbors.length; kk++) {
569 if ((vertex2AggId[nodeNeighbors(kk)] >= 0) && (vertex2AggId[nodeNeighbors(kk)] < numAggs))
570 badCount[vertex2AggId[nodeNeighbors(kk)]] = 0;
571 }
572 }
573
574 // For each of the reallyBadAggNeighbors, we go and blitz their connections to dofs in this aggregate.
575 // We remove the INVALID marker when we do this so we don't wind up doubling this up later
576 for (LO b = 0; b < (LO)reallyBadAggNeighbors.size(); b++) {
577 LO bad_agg = reallyBadAggNeighbors[b];
578 for (LO k = nodesInAgg.ptr_h(bad_agg); k < nodesInAgg.ptr_h(bad_agg + 1); k++) {
579 LO bad_node = nodesInAgg.nodes_h(k);
580 for (LO j = 0; j < (LO)blkSize; j++) {
581 LO bad_row = amalgInfo->ComputeLocalDOF(bad_node, j);
582 if (bad_row >= (LO)numRows) continue;
583 size_t index_start = rowptr[bad_row];
584 A.getLocalRowView(bad_row, indsA, valsA);
585 for (LO l = 0; l < (LO)indsA.size(); l++) {
586 if (indsA[l] < (LO)numRows && vertex2AggId[amalgInfo->ComputeLocalNode(indsA[l])] == i && vals_dropped_indicator[index_start + l] == false) {
587 vals_dropped_indicator[index_start + l] = true;
588 vals[index_start + l] = ZERO;
589 diagExtra[bad_row] += valsA[l];
590 numSymDrops++;
591 }
592 }
593 }
594 }
595 }
596
597 // Now lets fill the rows in this aggregate and figure out the diagonal lumping
598 // We loop over each node in the aggregate and then over the neighbors of that node
599
600 for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) {
601 LO row_node = nodesInAgg.nodes_h(k);
602
603 // Set up filtering array
604 auto indsG = G.getNeighborVertices(row_node);
605 for (size_t j = 0; j < as<size_t>(indsG.length); j++)
606 filter[indsG(j)] = true;
607
608 for (LO m = 0; m < (LO)blkSize; m++) {
609 LO row = amalgInfo->ComputeLocalDOF(row_node, m);
610 if (row >= (LO)numRows) continue;
611 size_t index_start = rowptr[row];
612 A.getLocalRowView(row, indsA, valsA);
613
614 for (LO l = 0; l < (LO)indsA.size(); l++) {
615 int col_node = amalgInfo->ComputeLocalNode(indsA[l]);
616 bool is_good = filter[col_node];
617 if (indsA[l] == row) {
618 diagIndex[row] = l;
619 vals[index_start + l] = valsA[l];
620 continue;
621 }
622
623 // If we've already dropped this guy (from symmetry above), then continue onward
624 if (vals_dropped_indicator[index_start + l] == true) {
625 if (is_good)
626 numOldDrops++;
627 else
628 numNewDrops++;
629 continue;
630 }
631
632 // FIXME: I'm assuming vertex2AggId is only length of the rowmap, so
633 // we won'd do secondary dropping on off-processor neighbors
634 if (is_good && indsA[l] < (LO)numRows) {
635 int agg = vertex2AggId[col_node];
636 if (std::binary_search(reallyBadAggNeighbors.begin(), reallyBadAggNeighbors.end(), agg))
637 is_good = false;
638 }
639
640 if (is_good) {
641 vals[index_start + l] = valsA[l];
642 } else {
643 if (!filter[col_node])
644 numOldDrops++;
645 else
646 numNewDrops++;
647 diagExtra[row] += valsA[l];
648 vals[index_start + l] = ZERO;
649 vals_dropped_indicator[index_start + l] = true;
650 }
651 } // end for l "indsA.size()" loop
652
653 } // end m "blkSize" loop
654
655 // Clear filtering array
656 for (size_t j = 0; j < as<size_t>(indsG.length); j++)
657 filter[indsG(j)] = false;
658
659 } // end k loop over number of nodes in this agg
660 } // end i loop over numAggs
661
662 if (!use_spread_lumping) {
663 // Now do the diagonal modifications in one, final pass
664 for (LO row = 0; row < (LO)numRows; row++) {
665 if (diagIndex[row] != INVALID) {
666 size_t index_start = rowptr[row];
667 size_t diagIndexInMatrix = index_start + diagIndex[row];
668 // printf("diag_vals pre update = %8.2e\n", vals[diagIndex] );
669 vals[diagIndexInMatrix] += diagExtra[row];
670 SC A_rowsum = ZERO, A_absrowsum = ZERO, F_rowsum = ZERO;
671
672 if ((dirichletThresh >= 0.0 && TST::real(vals[diagIndexInMatrix]) <= dirichletThresh) || TST::real(vals[diagIndexInMatrix]) == ZERO) {
674 A.getLocalRowView(row, indsA, valsA);
675 // SC diagA = valsA[diagIndex[row]];
676 // printf("WARNING: row %d (diagIndex=%d) diag(Afiltered) = %8.2e diag(A)=%8.2e numInds = %d\n",row,diagIndex[row],vals[diagIndexInMatrix],diagA,(LO)indsA.size());
677
678 for (LO l = 0; l < (LO)indsA.size(); l++) {
679 A_rowsum += valsA[l];
680 A_absrowsum += std::abs(valsA[l]);
681 }
682 for (LO l = 0; l < (LO)indsA.size(); l++)
683 F_rowsum += vals[index_start + l];
684 // printf(" : A rowsum = %8.2e |A| rowsum = %8.2e rowsum = %8.2e\n",A_rowsum,A_absrowsum,F_rowsum);
686 // printf(" Avals =");
687 // for(LO l = 0; l < (LO)indsA.size(); l++)
688 // printf("%d(%8.2e)[%d] ",(LO)indsA[l],valsA[l],(LO)l);
689 // printf("\n");
690 // printf(" Fvals =");
691 // for(LO l = 0; l < (LO)indsA.size(); l++)
692 // if(vals[index_start+l] != ZERO)
693 // printf("%d(%8.2e)[%d] ",(LO)indsA[l],vals[index_start+l],(LO)l);
694 }
695 }
696 // Don't know what to do, so blitz the row and dump a one on the diagonal
697 for (size_t l = rowptr[row]; l < rowptr[row + 1]; l++) {
698 vals[l] = ZERO;
699 }
700 vals[diagIndexInMatrix] = TST::one();
701 numFixedDiags++;
702 }
703 } else {
704 GetOStream(Runtime0) << "WARNING: Row " << row << " has no diagonal " << std::endl;
705 }
706 } /*end row "numRows" loop"*/
707 }
708
709 // Copy all the goop out
710 for (LO row = 0; row < (LO)numRows; row++) {
711 filteredA.replaceLocalValues(row, inds(rowptr[row], rowptr[row + 1] - rowptr[row]), vals(rowptr[row], rowptr[row + 1] - rowptr[row]));
712 }
713 if (use_spread_lumping) ExperimentalLumping(A, filteredA, DdomAllowGrowthRate, DdomCap);
714
715 size_t g_newDrops = 0, g_oldDrops = 0, g_fixedDiags = 0;
716
717 MueLu_sumAll(A.getRowMap()->getComm(), numNewDrops, g_newDrops);
718 MueLu_sumAll(A.getRowMap()->getComm(), numOldDrops, g_oldDrops);
719 MueLu_sumAll(A.getRowMap()->getComm(), numFixedDiags, g_fixedDiags);
720 GetOStream(Runtime0) << "Filtering out " << g_newDrops << " edges, in addition to the " << g_oldDrops << " edges dropped earlier" << std::endl;
721 GetOStream(Runtime0) << "Fixing " << g_fixedDiags << " zero diagonal values" << std::endl;
722}
723
724// fancy lumping trying to not just move everything to the diagonal but to also consider moving
725// some lumping to the kept off-diagonals. We basically aim to not increase the diagonal
726// dominance in a row. In particular, the goal is that row i satisfies
727//
728// lumpedDiagDomMeasure_i <= rho2
729// or
730// lumpedDiagDomMeasure <= rho*unlumpedDiagDomMeasure
731//
732// NOTE: THIS CODE assumes direct access to a row. See comments above concerning
733// ASSUME_DIRECT_ACCESS_TO_ROW
734//
735template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
737 ExperimentalLumping(const Matrix& A, Matrix& filteredA, double irho, double irho2) const {
738 using TST = typename Teuchos::ScalarTraits<SC>;
739 SC zero = TST::zero();
740 SC one = TST::one();
741
742 ArrayView<const LO> inds;
743 ArrayView<const SC> vals;
744 ArrayView<const LO> finds;
745 ArrayView<SC> fvals;
746
747 SC PosOffSum, NegOffSum, PosOffDropSum, NegOffDropSum;
748 SC diag, gamma, alpha;
749 LO NumPosKept, NumNegKept;
750
751 SC noLumpDdom;
752 SC numer, denom;
753 SC PosFilteredSum, NegFilteredSum;
754 SC Target;
755
756 SC rho = as<Scalar>(irho);
757 SC rho2 = as<Scalar>(irho2);
758
759 for (LO row = 0; row < (LO)A.getRowMap()->getLocalNumElements(); row++) {
760 noLumpDdom = as<Scalar>(10000.0); // only used if diagonal is zero
761 // the whole idea sort of breaks down
762 // when the diagonal is zero. In particular,
763 // the old diag dominance ratio is infinity
764 // ... so what do we want for the new ddom
765 // ratio. Do we want to allow the diagonal
766 // to go negative, just to have a better ddom
767 // ratio? This current choice essentially
768 // changes 'Target' to a large number
769 // meaning that we will allow the new
770 // ddom number to be fairly large (because
771 // the old one was infinity)
772
773 ArrayView<const SC> tvals;
774 A.getLocalRowView(row, inds, vals);
775 size_t nnz = inds.size();
776 if (nnz == 0) continue;
777 filteredA.getLocalRowView(row, finds, tvals); // assume 2 getLocalRowView()s
778 // have things in same order
779 fvals = ArrayView<SC>(const_cast<SC*>(tvals.getRawPtr()), nnz);
780
781 LO diagIndex = -1, fdiagIndex = -1;
782
783 PosOffSum = zero;
784 NegOffSum = zero;
785 PosOffDropSum = zero;
786 NegOffDropSum = zero;
787 diag = zero;
788 NumPosKept = 0;
789 NumNegKept = 0;
790
791 // first record diagonal, offdiagonal sums and off diag dropped sums
792 for (size_t j = 0; j < nnz; j++) {
793 if (inds[j] == row) {
794 diagIndex = j;
795 diag = vals[j];
796 } else { // offdiagonal
797 if (TST::real(vals[j]) > TST::real(zero))
798 PosOffSum += vals[j];
799 else
800 NegOffSum += vals[j];
801 }
802 }
803 PosOffDropSum = PosOffSum;
804 NegOffDropSum = NegOffSum;
805 NumPosKept = 0;
806 NumNegKept = 0;
807 LO j = 0;
808 for (size_t jj = 0; jj < (size_t)finds.size(); jj++) {
809 while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as
810 // inds ... but perhaps has some entries missing
811 if (finds[jj] == row)
812 fdiagIndex = jj;
813 else {
814 if (TST::real(vals[j]) > TST::real(zero)) {
815 PosOffDropSum -= fvals[jj];
816 if (TST::real(fvals[jj]) != TST::real(zero)) NumPosKept++;
817 } else {
818 NegOffDropSum -= fvals[jj];
819 if (TST::real(fvals[jj]) != TST::real(zero)) NumNegKept++;
820 }
821 }
822 }
823
824 // measure of diagonal dominance if no lumping is done.
825 if (TST::magnitude(diag) != TST::magnitude(zero))
826 noLumpDdom = (PosOffSum - NegOffSum) / diag;
827
828 // Target is an acceptable diagonal dominance ratio
829 // which should really be larger than 1
830
831 Target = rho * noLumpDdom;
832 if (TST::magnitude(Target) <= TST::magnitude(rho)) Target = rho2;
833
834 PosFilteredSum = PosOffSum - PosOffDropSum;
835 NegFilteredSum = NegOffSum - NegOffDropSum;
836 // Note: PosNotFilterdSum is not equal to the sum of the
837 // positive entries after lumping. It just reflects the
838 // pos offdiag sum of the filtered matrix before lumping
839 // and does not account for negative dropped terms lumped
840 // to the positive kept terms.
841
842 // dropped positive offdiags always go to the diagonal as these
843 // always improve diagonal dominance.
844
845 diag += PosOffDropSum;
846
847 // now lets work on lumping dropped negative offdiags
848 gamma = -NegOffDropSum - PosFilteredSum;
849
850 if (TST::real(gamma) < TST::real(zero)) {
851 // the total amount of negative dropping is less than PosFilteredSum,
852 // so we can distribute this dropping to pos offdiags. After lumping
853 // the sum of the pos offdiags is just -gamma so we just assign pos
854 // offdiags proportional to vals[j]/PosFilteredSum
855 // Note: in this case the diagonal is not changed as all lumping
856 // occurs to the pos offdiags
857
858 if (fdiagIndex != -1) fvals[fdiagIndex] = diag;
859 j = 0;
860 for (LO jj = 0; jj < (LO)finds.size(); jj++) {
861 while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as
862 // inds ... but perhaps has some entries missing
863 if ((j != diagIndex) && (TST::real(vals[j]) > TST::real(zero)) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero)))
864 fvals[jj] = -gamma * (vals[j] / PosFilteredSum);
865 }
866 } else {
867 // So there are more negative values that need lumping than kept
868 // positive offdiags. Meaning there is enough negative lumping to
869 // completely clear out all pos offdiags. If we lump all negs
870 // to pos off diags, we'd actually change them to negative. We
871 // only do this if we are desperate. Otherwise, we'll clear out
872 // all the positive kept offdiags and try to lump the rest
873 // somewhere else. We defer the clearing of pos off diags
874 // to see first if we are going to be desperate.
875
876 bool flipPosOffDiagsToNeg = false;
877
878 // Even if we lumped by zeroing positive offdiags, we are still
879 // going to have more lumping to distribute to either
880 // 1) the diagonal
881 // 2) the kept negative offdiags
882 // 3) the kept positive offdiags (desperate)
883
884 // Let's first considering lumping the remaining neg offdiag stuff
885 // to the diagonal ... if this does not increase the diagonal
886 // dominance ratio too much (given by rho).
887
888 if ((TST::real(diag) > TST::real(gamma)) &&
889 (TST::real((-NegFilteredSum) / (diag - gamma)) <= TST::real(Target))) {
890 // 1st if term above insures that resulting diagonal (=diag-gamma)
891 // is positive. . The left side of 2nd term is the diagonal dominance
892 // if we lump the remaining stuff (gamma) to the diagonal. Recall,
893 // that now there are no positive off-diags so the sum(abs(offdiags))
894 // is just the negative of NegFilteredSum
895
896 if (fdiagIndex != -1) fvals[fdiagIndex] = diag - gamma;
897 } else if (NumNegKept > 0) {
898 // need to do some lumping to neg offdiags to avoid a large
899 // increase in diagonal dominance. We first compute alpha
900 // which measures how much gamma should go to the
901 // negative offdiags. The rest will go to the diagonal
902
903 numer = -NegFilteredSum - Target * (diag - gamma);
904 denom = gamma * (Target - TST::one());
905
906 // make sure that alpha is between 0 and 1 ... and that it doesn't
907 // result in a sign flip
908 // Note: when alpha is set to 1, then the diagonal is not modified
909 // and the negative offdiags just get shifted from those
910 // removed and those kept, meaning that the digaonal dominance
911 // should be the same as before
912 //
913 // can alpha be negative? It looks like denom should always
914 // be positive. The 'if' statement above
915 // Normally, diag-gamma should also be positive (but if it
916 // is negative then numer is guaranteed to be positve).
917 // look at the 'if' above,
918 // if (( TST::real(diag) > TST::real(gamma)) &&
919 // ( TST::real((-NegFilteredSum)/(diag - gamma)) <= TST::real(Target))) {
920 //
921 // Should guarantee that numer is positive. This is obvious when
922 // the second condition is false. When it is the first condition that
923 // is false, it follows that the two indiviudal terms in the numer
924 // formula must be positive.
925
926 if (TST::magnitude(denom) < TST::magnitude(numer))
927 alpha = TST::one();
928 else
929 alpha = numer / denom;
930 if (TST::real(alpha) < TST::real(zero)) alpha = zero;
931 if (TST::real(diag) < TST::real((one - alpha) * gamma)) alpha = TST::one();
932
933 // first change the diagonal
934
935 if (fdiagIndex != -1) fvals[fdiagIndex] = diag - (one - alpha) * gamma;
936
937 // after lumping the sum of neg offdiags will be NegFilteredSum
938 // + alpha*gamma. That is the remaining negative entries altered
939 // by the percent (=alpha) of stuff (=gamma) that needs to be
940 // lumped after taking into account lumping to pos offdiags
941
942 // Do this by assigning a fraction of NegFilteredSum+alpha*gamma
943 // proportional to vals[j]/NegFilteredSum
944
945 SC temp = (NegFilteredSum + alpha * gamma) / NegFilteredSum;
946 j = 0;
947 for (LO jj = 0; jj < (LO)finds.size(); jj++) {
948 while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as
949 // inds ... but perhaps has some entries missing
950 if ((jj != fdiagIndex) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero)) &&
951 (TST::real(vals[j]) < TST::real(zero)))
952 fvals[jj] = temp * vals[j];
953 }
954 } else { // desperate case
955 // So we don't have any kept negative offdiags ...
956
957 if (NumPosKept > 0) {
958 // luckily we can push this stuff to the pos offdiags
959 // which now makes them negative
960 flipPosOffDiagsToNeg = true;
961
962 j = 0;
963 for (LO jj = 0; jj < (LO)finds.size(); jj++) {
964 while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as
965 // inds ... but perhaps has some entries missing
966 if ((j != diagIndex) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero)) &&
967 (TST::real(vals[j]) > TST::real(zero)))
968 fvals[jj] = -gamma / ((SC)NumPosKept);
969 }
970 }
971 // else abandon rowsum preservation and do nothing
972 }
973 if (!flipPosOffDiagsToNeg) { // not desperate so we now zero out
974 // all pos terms including some
975 // not originally filtered
976 // but zeroed due to lumping
977 j = 0;
978 for (LO jj = 0; jj < (LO)finds.size(); jj++) {
979 while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as
980 // inds ... but perhaps has some entries missing
981 if ((jj != fdiagIndex) && (TST::real(vals[j]) > TST::real(zero))) fvals[jj] = zero;
982 }
983 }
984 } // positive gamma else
985
986 } // loop over all rows
987}
988
989} // namespace MueLu
990
991#endif // MUELU_FILTEREDAFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
#define MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Kokkos::View< local_ordinal_type *, device_type > LO_view
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void BuildNewUsingRootStencil(const Matrix &A, const LWGraph &G, double dirichletThresh, Level &currentLevel, Matrix &filteredA, bool use_spread_lumping, double DdomAllowGrowthRate, double DdomCap) const
void BuildNew(const Matrix &A, const LWGraph &G, const bool lumping, double dirichletThresh, Matrix &filteredA) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void DeclareInput(Level &currentLevel) const
Input.
void BuildReuse(const Matrix &A, const LWGraph &G, const bool lumping, double dirichletThresh, Matrix &filteredA) const
void ExperimentalLumping(const Matrix &A, Matrix &filteredA, double rho, double rho2) const
void Build(Level &currentLevel) const
Build method.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.
const RCP< const Map > GetImportMap() const
Return overlapping import map (nodes).
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex 'v'.
Lightweight MueLu representation of a compressed row storage graph.
Class that holds all level-specific information.
static bool MapsAreNested(const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &rowMap, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &colMap)
static GlobalOrdinal CountNegativeDiagonalEntries(const Matrix &A)
Counts the number of negative diagonal entries.
Namespace for MueLu classes and methods.
@ Runtime0
One-liner description of what is happening.
void sort_and_unique(T &array)