MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_NotayAggregationFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
11#define MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
12
13#include <Xpetra_Map.hpp>
14#include <Xpetra_Vector.hpp>
15#include <Xpetra_MultiVectorFactory.hpp>
16#include <Xpetra_MapFactory.hpp>
17#include <Xpetra_VectorFactory.hpp>
18
19#include "KokkosKernels_Handle.hpp"
20#include "KokkosSparse_spgemm.hpp"
21#include "KokkosSparse_spmv.hpp"
22
24
25#include "MueLu_Aggregates.hpp"
26#include "MueLu_LWGraph.hpp"
27#include "MueLu_LWGraph_kokkos.hpp"
28#include "MueLu_Level.hpp"
29#include "MueLu_MasterList.hpp"
30#include "MueLu_Monitor.hpp"
31#include "MueLu_Types.hpp"
32#include "MueLu_Utilities.hpp"
33
34namespace MueLu {
35
36namespace NotayUtils {
37template <class LocalOrdinal>
39 return min + as<LocalOrdinal>((max - min + 1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
40}
41
42template <class LocalOrdinal>
43void RandomReorder(Teuchos::Array<LocalOrdinal>& list) {
44 typedef LocalOrdinal LO;
45 LO n = Teuchos::as<LO>(list.size());
46 for (LO i = 0; i < n - 1; i++)
47 std::swap(list[i], list[RandomOrdinal(i, n - 1)]);
48}
49} // namespace NotayUtils
50
51template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
53 RCP<ParameterList> validParamList = rcp(new ParameterList());
54
55#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
56 SET_VALID_ENTRY("aggregation: pairwise: size");
57 SET_VALID_ENTRY("aggregation: pairwise: tie threshold");
58 SET_VALID_ENTRY("aggregation: Dirichlet threshold");
59 SET_VALID_ENTRY("aggregation: ordering");
60#undef SET_VALID_ENTRY
61
62 // general variables needed in AggregationFactory
63 validParamList->set<RCP<const FactoryBase>>("A", null, "Generating factory of the matrix");
64 validParamList->set<RCP<const FactoryBase>>("Graph", null, "Generating factory of the graph");
65 validParamList->set<RCP<const FactoryBase>>("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
66
67 return validParamList;
68}
69
70template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
72 Input(currentLevel, "A");
73 Input(currentLevel, "Graph");
74 Input(currentLevel, "DofsPerNode");
75}
76
77template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
79 FactoryMonitor m(*this, "Build", currentLevel);
80 using STS = Teuchos::ScalarTraits<Scalar>;
81 using MT = typename STS::magnitudeType;
82
83 const MT MT_TWO = Teuchos::ScalarTraits<MT>::one() + Teuchos::ScalarTraits<MT>::one();
84
85 RCP<Teuchos::FancyOStream> out;
86 if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
87 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
88 out->setShowAllFrontMatter(false).setShowProcRank(true);
89 } else {
90 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
91 }
92
93 const ParameterList& pL = GetParameterList();
94
95 const MT kappa = static_cast<MT>(pL.get<double>("aggregation: Dirichlet threshold"));
96 TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO,
98 "Pairwise requires kappa > 2"
99 " otherwise all rows are considered as Dirichlet rows.");
100
101 // Parameters
102 int maxNumIter = 3;
103 if (pL.isParameter("aggregation: pairwise: size"))
104 maxNumIter = pL.get<int>("aggregation: pairwise: size");
105 TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1,
107 "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\""
108 " must be a strictly positive integer");
109
110 RCP<const LWGraph> graph;
111 if (IsType<RCP<LWGraph>>(currentLevel, "Graph"))
112 graph = Get<RCP<LWGraph>>(currentLevel, "Graph");
113 else {
114 auto graph_k = Get<RCP<LWGraph_kokkos>>(currentLevel, "Graph");
115 graph = graph_k->copyToHost();
116 }
117 RCP<const Matrix> A = Get<RCP<Matrix>>(currentLevel, "A");
118
119 // Setup aggregates & aggStat objects
120 RCP<Aggregates> aggregates = rcp(new Aggregates(*graph));
121 aggregates->setObjectLabel("PW");
122
123 const LO numRows = graph->GetNodeNumVertices();
124
125 // construct aggStat information
126 std::vector<unsigned> aggStat(numRows, READY);
127
128 const int DofsPerNode = Get<int>(currentLevel, "DofsPerNode");
129 TEUCHOS_TEST_FOR_EXCEPTION(DofsPerNode != 1, Exceptions::RuntimeError,
130 "Pairwise only supports one dof per node");
131
132 // This follows the paper:
133 // Notay, "Aggregation-based algebraic multigrid for convection-diffusion equations",
134 // SISC 34(3), pp. A2288-2316.
135
136 // Handle Ordering
137 std::string orderingStr = pL.get<std::string>("aggregation: ordering");
138 enum {
139 O_NATURAL,
140 O_RANDOM,
141 O_CUTHILL_MCKEE,
142 } ordering;
143
144 ordering = O_NATURAL;
145 if (orderingStr == "random")
146 ordering = O_RANDOM;
147 else if (orderingStr == "natural") {
148 } else if (orderingStr == "cuthill-mckee" || orderingStr == "cm")
149 ordering = O_CUTHILL_MCKEE;
150 else {
151 TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, "Invalid ordering type");
152 }
153
154 // Get an ordering vector
155 // NOTE: The orderingVector only orders *rows* of the matrix. Off-proc columns
156 // will get ignored in the aggregation phases, so we don't need to worry about
157 // running off the end.
158 Array<LO> orderingVector(numRows);
159 for (LO i = 0; i < numRows; i++)
160 orderingVector[i] = i;
161 if (ordering == O_RANDOM)
162 MueLu::NotayUtils::RandomReorder(orderingVector);
163 else if (ordering == O_CUTHILL_MCKEE) {
164 RCP<Xpetra::Vector<LO, LO, GO, NO>> rcmVector = MueLu::Utilities<SC, LO, GO, NO>::CuthillMcKee(*A);
165 auto localVector = rcmVector->getData(0);
166 for (LO i = 0; i < numRows; i++)
167 orderingVector[i] = localVector[i];
168 }
169
170 // Get the party stated
171 LO numNonAggregatedNodes = numRows, numDirichletNodes = 0;
172 BuildInitialAggregates(pL, A, orderingVector(), kappa,
173 *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes);
174 TEUCHOS_TEST_FOR_EXCEPTION(0 < numNonAggregatedNodes, Exceptions::RuntimeError,
175 "Initial pairwise aggregation failed to aggregate all nodes");
176 LO numLocalAggregates = aggregates->GetNumAggregates();
177 GetOStream(Statistics0) << "Init : " << numLocalAggregates << " - "
178 << A->getLocalNumRows() / numLocalAggregates << std::endl;
179
180 // Temporary data storage for further aggregation steps
181 local_matrix_type intermediateP;
182 local_matrix_type coarseLocalA;
183
184 // Compute the on rank part of the local matrix
185 // that the square submatrix that only contains
186 // columns corresponding to local rows.
187 LO numLocalDirichletNodes = numDirichletNodes;
188 Array<LO> localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows));
189 BuildOnRankLocalMatrix(A->getLocalMatrixDevice(), coarseLocalA);
190 for (LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) {
191 // Compute the intermediate prolongator
192 BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates,
193 localVertex2AggId(), intermediateP);
194
195 // Compute the coarse local matrix and coarse row sum
196 BuildCoarseLocalMatrix(intermediateP, coarseLocalA);
197
198 // Directly compute rowsum from A, rather than coarseA
199 row_sum_type rowSum("rowSum", numLocalAggregates);
200 {
201 std::vector<std::vector<LO>> agg2vertex(numLocalAggregates);
202 auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
203 for (LO i = 0; i < (LO)numRows; i++) {
204 if (aggStat[i] != AGGREGATED)
205 continue;
206 LO agg = vertex2AggId[i];
207 agg2vertex[agg].push_back(i);
208 }
209
210 typename row_sum_type::host_mirror_type rowSum_h = Kokkos::create_mirror_view(rowSum);
211 for (LO i = 0; i < numRows; i++) {
212 // If not aggregated already, skip this guy
213 if (aggStat[i] != AGGREGATED)
214 continue;
215 int agg = vertex2AggId[i];
216 std::vector<LO>& myagg = agg2vertex[agg];
217
218 size_t nnz = A->getNumEntriesInLocalRow(i);
219 ArrayView<const LO> indices;
220 ArrayView<const SC> vals;
221 A->getLocalRowView(i, indices, vals);
222
223 SC mysum = Teuchos::ScalarTraits<Scalar>::zero();
224 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
225 bool found = false;
226 if (indices[colidx] < numRows) {
227 for (LO j = 0; j < (LO)myagg.size(); j++)
228 if (vertex2AggId[indices[colidx]] == agg)
229 found = true;
230 }
231 if (!found) {
232 *out << "- ADDING col " << indices[colidx] << " = " << vals[colidx] << std::endl;
233 mysum += vals[colidx];
234 } else {
235 *out << "- NOT ADDING col " << indices[colidx] << " = " << vals[colidx] << std::endl;
236 }
237 }
238
239 rowSum_h[agg] = mysum;
240 }
241 Kokkos::deep_copy(rowSum, rowSum_h);
242 }
243
244 // Get local orderingVector
245 Array<LO> localOrderingVector(numRows);
246 for (LO i = 0; i < numRows; i++)
247 localOrderingVector[i] = i;
248 if (ordering == O_RANDOM)
249 MueLu::NotayUtils::RandomReorder(localOrderingVector);
250 else if (ordering == O_CUTHILL_MCKEE) {
251 RCP<Xpetra::Vector<LO, LO, GO, NO>> rcmVector = MueLu::Utilities<SC, LO, GO, NO>::CuthillMcKee(*A);
252 auto localVector = rcmVector->getData(0);
253 for (LO i = 0; i < numRows; i++)
254 localOrderingVector[i] = localVector[i];
255 }
256
257 // Compute new aggregates
258 numLocalAggregates = 0;
259 numNonAggregatedNodes = static_cast<LO>(coarseLocalA.numRows());
260 std::vector<LO> localAggStat(numNonAggregatedNodes, READY);
261 localVertex2AggId.resize(numNonAggregatedNodes, -1);
262 BuildFurtherAggregates(pL, A, localOrderingVector, coarseLocalA, kappa, rowSum,
263 localAggStat, localVertex2AggId,
264 numLocalAggregates, numNonAggregatedNodes);
265
266 // After the first initial pairwise aggregation
267 // the Dirichlet nodes have been removed.
268 numLocalDirichletNodes = 0;
269
270 // Update the aggregates
271 RCP<LOMultiVector> vertex2AggIdMV = aggregates->GetVertex2AggId();
272 ArrayRCP<LO> vertex2AggId = vertex2AggIdMV->getDataNonConst(0);
273 for (size_t vertexIdx = 0; vertexIdx < A->getLocalNumRows(); ++vertexIdx) {
274 LO oldAggIdx = vertex2AggId[vertexIdx];
275 if (oldAggIdx != Teuchos::OrdinalTraits<LO>::invalid()) {
276 vertex2AggId[vertexIdx] = localVertex2AggId[oldAggIdx];
277 }
278 }
279
280 // We could probably print some better statistics at some point
281 GetOStream(Statistics0) << "Iter " << aggregationIter << ": " << numLocalAggregates << " - "
282 << A->getLocalNumRows() / numLocalAggregates << std::endl;
283 }
284 aggregates->SetNumAggregates(numLocalAggregates);
285 aggregates->AggregatesCrossProcessors(false);
286 aggregates->ComputeAggregateSizes(true /*forceRecompute*/);
287
288 // DO stuff
289 Set(currentLevel, "Aggregates", aggregates);
290 GetOStream(Statistics0) << aggregates->description() << std::endl;
291}
292
293template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
295 BuildInitialAggregates(const Teuchos::ParameterList& params,
296 const RCP<const Matrix>& A,
297 const Teuchos::ArrayView<const LO>& orderingVector,
298 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
299 Aggregates& aggregates,
300 std::vector<unsigned>& aggStat,
301 LO& numNonAggregatedNodes,
302 LO& numDirichletNodes) const {
303 Monitor m(*this, "BuildInitialAggregates");
304 using STS = Teuchos::ScalarTraits<Scalar>;
305 using MT = typename STS::magnitudeType;
306 using RealValuedVector = Xpetra::Vector<MT, LocalOrdinal, GlobalOrdinal, Node>;
307
308 RCP<Teuchos::FancyOStream> out;
309 if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
310 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
311 out->setShowAllFrontMatter(false).setShowProcRank(true);
312 } else {
313 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
314 }
315
316 const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
317 const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
318 const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
319 const MT MT_TWO = MT_ONE + MT_ONE;
320 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
321 const LO LO_ZERO = Teuchos::OrdinalTraits<LO>::zero();
322
323 const MT kappa_init = kappa / (kappa - MT_TWO);
324 const LO numRows = aggStat.size();
325 const int myRank = A->getMap()->getComm()->getRank();
326
327 // For finding "ties" where we fall back to the ordering. Making this larger than
328 // hard zero substantially increases code robustness.
329 double tie_criterion = params.get<double>("aggregation: pairwise: tie threshold");
330 double tie_less = 1.0 - tie_criterion;
331 double tie_more = 1.0 + tie_criterion;
332
333 // NOTE: Assumes 1 dof per node. This constraint is enforced in Build(),
334 // and so we're not doing again here.
335 // This should probably be fixed at some point.
336
337 // Extract diagonal, rowsums, etc
338 // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
341 RCP<RealValuedVector> ghostedAbsRowSum = MueLu::Utilities<SC, LO, GO, NO>::GetMatrixOverlappedAbsDeletedRowsum(*A);
342 const ArrayRCP<const SC> D = ghostedDiag->getData(0);
343 const ArrayRCP<const SC> S = ghostedRowSum->getData(0);
344 const ArrayRCP<const MT> AbsRs = ghostedAbsRowSum->getData(0);
345
346 // Aggregates stuff
347 ArrayRCP<LO> vertex2AggId_rcp = aggregates.GetVertex2AggId()->getDataNonConst(0);
348 ArrayRCP<LO> procWinner_rcp = aggregates.GetProcWinner()->getDataNonConst(0);
349 ArrayView<LO> vertex2AggId = vertex2AggId_rcp();
350 ArrayView<LO> procWinner = procWinner_rcp();
351
352 // Algorithm 4.2
353
354 // 0,1 : Initialize: Flag boundary conditions
355 // Modification: We assume symmetry here aij = aji
356 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
357 MT aii = STS::magnitude(D[row]);
358 MT rowsum = AbsRs[row];
359
360 if (aii >= kappa_init * rowsum) {
361 *out << "Flagging index " << row << " as dirichlet "
362 "aii >= kappa*rowsum = "
363 << aii << " >= " << kappa_init << " " << rowsum << std::endl;
364 aggStat[row] = IGNORED;
365 --numNonAggregatedNodes;
366 ++numDirichletNodes;
367 }
368 }
369
370 // 2 : Iteration
371 LO aggIndex = LO_ZERO;
372 for (LO i = 0; i < numRows; i++) {
373 LO current_idx = orderingVector[i];
374 // If we're aggregated already, skip this guy
375 if (aggStat[current_idx] != READY)
376 continue;
377
378 MT best_mu = MT_ZERO;
379 LO best_idx = LO_INVALID;
380
381 size_t nnz = A->getNumEntriesInLocalRow(current_idx);
382 ArrayView<const LO> indices;
383 ArrayView<const SC> vals;
384 A->getLocalRowView(current_idx, indices, vals);
385
386 MT aii = STS::real(D[current_idx]);
387 MT si = STS::real(S[current_idx]);
388 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
389 // Skip aggregated neighbors, off-rank neighbors, hard zeros and self
390 LO col = indices[colidx];
391 SC val = vals[colidx];
392 if (current_idx == col || col >= numRows || aggStat[col] != READY || val == SC_ZERO)
393 continue;
394
395 MT aij = STS::real(val);
396 MT ajj = STS::real(D[col]);
397 MT sj = -STS::real(S[col]); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
398 if (aii - si + ajj - sj >= MT_ZERO) {
399 // Modification: We assume symmetry here aij = aji
400 MT mu_top = MT_TWO / (MT_ONE / aii + MT_ONE / ajj);
401 MT mu_bottom = -aij + MT_ONE / (MT_ONE / (aii - si) + MT_ONE / (ajj - sj));
402 MT mu = mu_top / mu_bottom;
403
404 // Modification: Explicitly check the tie criterion here
405 if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less ||
406 (mu < best_mu * tie_more && orderingVector[col] < orderingVector[best_idx]))) {
407 best_mu = mu;
408 best_idx = col;
409 *out << "[" << current_idx << "] Column UPDATED " << col << ": "
410 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
411 << " = " << aii - si + ajj - sj << ", aij = " << aij << ", mu = " << mu << std::endl;
412 } else {
413 *out << "[" << current_idx << "] Column NOT UPDATED " << col << ": "
414 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
415 << " = " << aii - si + ajj - sj << ", aij = " << aij << ", mu = " << mu << std::endl;
416 }
417 } else {
418 *out << "[" << current_idx << "] Column FAILED " << col << ": "
419 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
420 << " = " << aii - si + ajj - sj << std::endl;
421 }
422 } // end column for loop
423
424 if (best_idx == LO_INVALID) {
425 *out << "No node buddy found for index " << current_idx
426 << " [agg " << aggIndex << "]\n"
427 << std::endl;
428 // We found no potential node-buddy, so let's just make this a singleton
429 // NOTE: The behavior of what to do if you have no un-aggregated neighbors is not specified in
430 // the paper
431
432 aggStat[current_idx] = ONEPT;
433 vertex2AggId[current_idx] = aggIndex;
434 procWinner[current_idx] = myRank;
435 numNonAggregatedNodes--;
436 aggIndex++;
437
438 } else {
439 // We have a buddy, so aggregate, either as a singleton or as a pair, depending on mu
440 if (best_mu <= kappa) {
441 *out << "Node buddies (" << current_idx << "," << best_idx << ") [agg " << aggIndex << "]" << std::endl;
442
443 aggStat[current_idx] = AGGREGATED;
444 aggStat[best_idx] = AGGREGATED;
445 vertex2AggId[current_idx] = aggIndex;
446 vertex2AggId[best_idx] = aggIndex;
447 procWinner[current_idx] = myRank;
448 procWinner[best_idx] = myRank;
449 numNonAggregatedNodes -= 2;
450 aggIndex++;
451
452 } else {
453 *out << "No buddy found for index " << current_idx << ","
454 " but aggregating as singleton [agg "
455 << aggIndex << "]" << std::endl;
456
457 aggStat[current_idx] = ONEPT;
458 vertex2AggId[current_idx] = aggIndex;
459 procWinner[current_idx] = myRank;
460 numNonAggregatedNodes--;
461 aggIndex++;
462 } // best_mu
463 } // best_idx
464 } // end Algorithm 4.2
465
466 *out << "vertex2aggid :";
467 for (int i = 0; i < static_cast<int>(vertex2AggId.size()); ++i) {
468 *out << i << "(" << vertex2AggId[i] << ")";
469 }
470 *out << std::endl;
471
472 // update aggregate object
473 aggregates.SetNumAggregates(aggIndex);
474} // BuildInitialAggregates
475
476template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
478 BuildFurtherAggregates(const Teuchos::ParameterList& params,
479 const RCP<const Matrix>& A,
480 const Teuchos::ArrayView<const LO>& orderingVector,
481 const typename Matrix::local_matrix_device_type& coarseA,
482 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
483#if KOKKOS_VERSION >= 40799
484 const Kokkos::View<typename KokkosKernels::ArithTraits<Scalar>::val_type*,
485#else
486 const Kokkos::View<typename Kokkos::ArithTraits<Scalar>::val_type*,
487#endif
488 Kokkos::LayoutLeft,
489 typename Matrix::local_matrix_device_type::device_type>& rowSum,
490 std::vector<LocalOrdinal>& localAggStat,
491 Teuchos::Array<LocalOrdinal>& localVertex2AggID,
492 LO& numLocalAggregates,
493 LO& numNonAggregatedNodes) const {
494 Monitor m(*this, "BuildFurtherAggregates");
495
496 // Set debug outputs based on environment variable
497 RCP<Teuchos::FancyOStream> out;
498 if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
499 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
500 out->setShowAllFrontMatter(false).setShowProcRank(true);
501 } else {
502 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
503 }
504
505 using value_type = typename local_matrix_type::value_type;
506#if KOKKOS_VERSION >= 40799
507 const value_type KAT_zero = KokkosKernels::ArithTraits<value_type>::zero();
508#else
509 const value_type KAT_zero = Kokkos::ArithTraits<value_type>::zero();
510#endif
511 const magnitude_type MT_zero = Teuchos::ScalarTraits<magnitude_type>::zero();
512 const magnitude_type MT_one = Teuchos::ScalarTraits<magnitude_type>::one();
513 const magnitude_type MT_two = MT_one + MT_one;
514 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
515
516 // For finding "ties" where we fall back to the ordering. Making this larger than
517 // hard zero substantially increases code robustness.
518 double tie_criterion = params.get<double>("aggregation: pairwise: tie threshold");
519 double tie_less = 1.0 - tie_criterion;
520 double tie_more = 1.0 + tie_criterion;
521
522 typename row_sum_type::host_mirror_type rowSum_h = Kokkos::create_mirror_view(rowSum);
523 Kokkos::deep_copy(rowSum_h, rowSum);
524
525 // Extracting the diagonal of a KokkosSparse::CrsMatrix
526 // is not currently provided in kokkos-kernels so here
527 // is an ugly way to get that done...
528 const LO numRows = static_cast<LO>(coarseA.numRows());
529 typename local_matrix_type::values_type::host_mirror_type diagA_h("diagA host", numRows);
530 typename local_matrix_type::row_map_type::host_mirror_type row_map_h = Kokkos::create_mirror_view(coarseA.graph.row_map);
531 Kokkos::deep_copy(row_map_h, coarseA.graph.row_map);
532 typename local_matrix_type::index_type::host_mirror_type entries_h = Kokkos::create_mirror_view(coarseA.graph.entries);
533 Kokkos::deep_copy(entries_h, coarseA.graph.entries);
534 typename local_matrix_type::values_type::host_mirror_type values_h = Kokkos::create_mirror_view(coarseA.values);
535 Kokkos::deep_copy(values_h, coarseA.values);
536 for (LO rowIdx = 0; rowIdx < numRows; ++rowIdx) {
537 for (LO entryIdx = static_cast<LO>(row_map_h(rowIdx));
538 entryIdx < static_cast<LO>(row_map_h(rowIdx + 1));
539 ++entryIdx) {
540 if (rowIdx == static_cast<LO>(entries_h(entryIdx))) {
541 diagA_h(rowIdx) = values_h(entryIdx);
542 }
543 }
544 }
545
546 for (LO currentIdx = 0; currentIdx < numRows; ++currentIdx) {
547 if (localAggStat[currentIdx] != READY) {
548 continue;
549 }
550
551 LO bestIdx = Teuchos::OrdinalTraits<LO>::invalid();
552 magnitude_type best_mu = Teuchos::ScalarTraits<magnitude_type>::zero();
553 const magnitude_type aii = Teuchos::ScalarTraits<value_type>::real(diagA_h(currentIdx));
554 const magnitude_type si = Teuchos::ScalarTraits<value_type>::real(rowSum_h(currentIdx));
555 for (auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) {
556 const LO colIdx = static_cast<LO>(entries_h(entryIdx));
557 if (currentIdx == colIdx || colIdx >= numRows || localAggStat[colIdx] != READY || values_h(entryIdx) == KAT_zero) {
558 continue;
559 }
560
561 const magnitude_type aij = Teuchos::ScalarTraits<value_type>::real(values_h(entryIdx));
562 const magnitude_type ajj = Teuchos::ScalarTraits<value_type>::real(diagA_h(colIdx));
563 const magnitude_type sj = -Teuchos::ScalarTraits<value_type>::real(rowSum_h(colIdx)); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
564 if (aii - si + ajj - sj >= MT_zero) {
565 const magnitude_type mu_top = MT_two / (MT_one / aii + MT_one / ajj);
566 const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj));
567 const magnitude_type mu = mu_top / mu_bottom;
568
569 // Modification: Explicitly check the tie criterion here
570 if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less ||
571 (mu < best_mu * tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) {
572 best_mu = mu;
573 bestIdx = colIdx;
574 *out << "[" << currentIdx << "] Column UPDATED " << colIdx << ": "
575 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
576 << " = " << aii - si + ajj - sj << ", aij = " << aij << " mu = " << mu << std::endl;
577 } else {
578 *out << "[" << currentIdx << "] Column NOT UPDATED " << colIdx << ": "
579 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
580 << " = " << aii - si + ajj - sj << ", aij = " << aij << ", mu = " << mu << std::endl;
581 }
582 } else {
583 *out << "[" << currentIdx << "] Column FAILED " << colIdx << ": "
584 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
585 << " = " << aii - si + ajj - sj << std::endl;
586 }
587 } // end loop over row entries
588
589 if (bestIdx == Teuchos::OrdinalTraits<LO>::invalid()) {
590 localAggStat[currentIdx] = ONEPT;
591 localVertex2AggID[currentIdx] = numLocalAggregates;
592 --numNonAggregatedNodes;
593 ++numLocalAggregates;
594 } else {
595 if (best_mu <= kappa) {
596 *out << "Node buddies (" << currentIdx << "," << bestIdx << ") [agg " << numLocalAggregates << "]" << std::endl;
597
598 localAggStat[currentIdx] = AGGREGATED;
599 localVertex2AggID[currentIdx] = numLocalAggregates;
600 --numNonAggregatedNodes;
601
602 localAggStat[bestIdx] = AGGREGATED;
603 localVertex2AggID[bestIdx] = numLocalAggregates;
604 --numNonAggregatedNodes;
605
606 ++numLocalAggregates;
607 } else {
608 *out << "No buddy found for index " << currentIdx << ","
609 " but aggregating as singleton [agg "
610 << numLocalAggregates << "]" << std::endl;
611
612 localAggStat[currentIdx] = ONEPT;
613 localVertex2AggID[currentIdx] = numLocalAggregates;
614 --numNonAggregatedNodes;
615 ++numLocalAggregates;
616 }
617 }
618 } // end loop over matrix rows
619
620} // BuildFurtherAggregates
621
622template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
624 BuildOnRankLocalMatrix(const typename Matrix::local_matrix_device_type& localA,
625 typename Matrix::local_matrix_device_type& onrankA) const {
626 Monitor m(*this, "BuildOnRankLocalMatrix");
627
628 // Set debug outputs based on environment variable
629 RCP<Teuchos::FancyOStream> out;
630 if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
631 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
632 out->setShowAllFrontMatter(false).setShowProcRank(true);
633 } else {
634 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
635 }
636
637 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
638 using values_type = typename local_matrix_type::values_type;
639 using size_type = typename local_graph_type::size_type;
640 using col_index_type = typename local_graph_type::data_type;
641 using array_layout = typename local_graph_type::array_layout;
642 using memory_traits = typename local_graph_type::memory_traits;
643 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
644 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
645 // Extract on rank part of A
646 // Simply check that the column index is less than the number of local rows
647 // otherwise remove it.
648
649 const int numRows = static_cast<int>(localA.numRows());
650 row_pointer_type rowPtr("onrankA row pointer", numRows + 1);
651 typename row_pointer_type::host_mirror_type rowPtr_h = Kokkos::create_mirror_view(rowPtr);
652 typename local_graph_type::row_map_type::host_mirror_type origRowPtr_h = Kokkos::create_mirror_view(localA.graph.row_map);
653 typename local_graph_type::entries_type::host_mirror_type origColind_h = Kokkos::create_mirror_view(localA.graph.entries);
654 typename values_type::host_mirror_type origValues_h = Kokkos::create_mirror_view(localA.values);
655 Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map);
656 Kokkos::deep_copy(origColind_h, localA.graph.entries);
657 Kokkos::deep_copy(origValues_h, localA.values);
658
659 // Compute the number of nnz entries per row
660 rowPtr_h(0) = 0;
661 for (int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
662 for (size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
663 if (origColind_h(entryIdx) < numRows) {
664 rowPtr_h(rowIdx + 1) += 1;
665 }
666 }
667 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx);
668 }
669 Kokkos::deep_copy(rowPtr, rowPtr_h);
670
671 const LO nnzOnrankA = rowPtr_h(numRows);
672
673 // Now use nnz per row to allocate matrix views and store column indices and values
674 col_indices_type colInd("onrankA column indices", rowPtr_h(numRows));
675 values_type values("onrankA values", rowPtr_h(numRows));
676 typename col_indices_type::host_mirror_type colInd_h = Kokkos::create_mirror_view(colInd);
677 typename values_type::host_mirror_type values_h = Kokkos::create_mirror_view(values);
678 int entriesInRow;
679 for (int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
680 entriesInRow = 0;
681 for (size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
682 if (origColind_h(entryIdx) < numRows) {
683 colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx);
684 values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx);
685 ++entriesInRow;
686 }
687 }
688 }
689 Kokkos::deep_copy(colInd, colInd_h);
690 Kokkos::deep_copy(values, values_h);
691
692 onrankA = local_matrix_type("onrankA", numRows, numRows,
693 nnzOnrankA, values, rowPtr, colInd);
694}
695
696template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
699 const LocalOrdinal numDirichletNodes,
700 const LocalOrdinal numLocalAggregates,
701 const Teuchos::ArrayView<const LocalOrdinal>& localVertex2AggID,
702 typename Matrix::local_matrix_device_type& intermediateP) const {
703 Monitor m(*this, "BuildIntermediateProlongator");
704
705 // Set debug outputs based on environment variable
706 RCP<Teuchos::FancyOStream> out;
707 if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
708 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
709 out->setShowAllFrontMatter(false).setShowProcRank(true);
710 } else {
711 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
712 }
713
714 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
715 using values_type = typename local_matrix_type::values_type;
716 using size_type = typename local_graph_type::size_type;
717 using col_index_type = typename local_graph_type::data_type;
718 using array_layout = typename local_graph_type::array_layout;
719 using memory_traits = typename local_graph_type::memory_traits;
720 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
721 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
722
723 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
724
725 const int intermediatePnnz = numRows - numDirichletNodes;
726 row_pointer_type rowPtr("intermediateP row pointer", numRows + 1);
727 col_indices_type colInd("intermediateP column indices", intermediatePnnz);
728 values_type values("intermediateP values", intermediatePnnz);
729 typename row_pointer_type::host_mirror_type rowPtr_h = Kokkos::create_mirror_view(rowPtr);
730 typename col_indices_type::host_mirror_type colInd_h = Kokkos::create_mirror_view(colInd);
731
732 rowPtr_h(0) = 0;
733 for (int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
734 // Skip Dirichlet nodes
735 if (localVertex2AggID[rowIdx] == LO_INVALID) {
736 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx);
737 } else {
738 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1;
739 colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx];
740 }
741 }
742
743 Kokkos::deep_copy(rowPtr, rowPtr_h);
744 Kokkos::deep_copy(colInd, colInd_h);
745#if KOKKOS_VERSION >= 40799
746 Kokkos::deep_copy(values, KokkosKernels::ArithTraits<typename values_type::value_type>::one());
747#else
748 Kokkos::deep_copy(values, Kokkos::ArithTraits<typename values_type::value_type>::one());
749#endif
750
751 intermediateP = local_matrix_type("intermediateP",
752 numRows, numLocalAggregates, intermediatePnnz,
753 values, rowPtr, colInd);
754} // BuildIntermediateProlongator
755
756template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
758 BuildCoarseLocalMatrix(const typename Matrix::local_matrix_device_type& intermediateP,
759 typename Matrix::local_matrix_device_type& coarseA) const {
760 Monitor m(*this, "BuildCoarseLocalMatrix");
761
762 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
763 using values_type = typename local_matrix_type::values_type;
764 using size_type = typename local_graph_type::size_type;
765 using col_index_type = typename local_graph_type::data_type;
766 using array_layout = typename local_graph_type::array_layout;
767 using memory_traits = typename local_graph_type::memory_traits;
768 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
769 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
770
772 localSpGEMM(coarseA, intermediateP, "AP", AP);
773
774 // Note 03/11/20, lbv: does kh need to destroy and recreate the spgemm handle
775 // I am not sure but doing it for safety in case it stashes data from the previous
776 // spgemm computation...
777
778 // Compute Ac = Pt * AP
779 // Two steps needed:
780 // 1. compute Pt
781 // 2. perform multiplication
782
783 // Step 1 compute Pt
784 // Obviously this requires the same amount of storage as P except for the rowPtr
785 row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing("Pt row pointer"),
786 intermediateP.numCols() + 1);
787 col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing("Pt column indices"),
788 intermediateP.nnz());
789 values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing("Pt values"),
790 intermediateP.nnz());
791
792 typename row_pointer_type::host_mirror_type rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt);
793 typename col_indices_type::host_mirror_type entries_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
794 Kokkos::deep_copy(entries_h, intermediateP.graph.entries);
795 Kokkos::deep_copy(rowPtrPt_h, 0);
796 for (size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) {
797 rowPtrPt_h(entries_h(entryIdx) + 1) += 1;
798 }
799 for (LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) {
800 rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx);
801 }
802 Kokkos::deep_copy(rowPtrPt, rowPtrPt_h);
803
804 typename row_pointer_type::host_mirror_type rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map);
805 Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map);
806 typename col_indices_type::host_mirror_type colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
807 Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries);
808 typename values_type::host_mirror_type valuesP_h = Kokkos::create_mirror_view(intermediateP.values);
809 Kokkos::deep_copy(valuesP_h, intermediateP.values);
810 typename col_indices_type::host_mirror_type colIndPt_h = Kokkos::create_mirror_view(colIndPt);
811 typename values_type::host_mirror_type valuesPt_h = Kokkos::create_mirror_view(valuesPt);
812 const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits<col_index_type>::invalid();
813 Kokkos::deep_copy(colIndPt_h, invalidColumnIndex);
814
815 col_index_type colIdx = 0;
816 for (LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) {
817 for (size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) {
818 colIdx = entries_h(entryIdxP);
819 for (size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) {
820 if (colIndPt_h(entryIdxPt) == invalidColumnIndex) {
821 colIndPt_h(entryIdxPt) = rowIdx;
822 valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP);
823 break;
824 }
825 } // Loop over entries in row of Pt
826 } // Loop over entries in row of P
827 } // Loop over rows of P
828
829 Kokkos::deep_copy(colIndPt, colIndPt_h);
830 Kokkos::deep_copy(valuesPt, valuesPt_h);
831
832 local_matrix_type intermediatePt("intermediatePt",
833 intermediateP.numCols(),
834 intermediateP.numRows(),
835 intermediateP.nnz(),
836 valuesPt, rowPtrPt, colIndPt);
837
838 // Create views for coarseA matrix
839 localSpGEMM(intermediatePt, AP, "coarseA", coarseA);
840} // BuildCoarseLocalMatrix
841
842template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
844 localSpGEMM(const typename Matrix::local_matrix_device_type& A,
845 const typename Matrix::local_matrix_device_type& B,
846 const std::string matrixLabel,
847 typename Matrix::local_matrix_device_type& C) const {
848 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
849 using values_type = typename local_matrix_type::values_type;
850 using size_type = typename local_graph_type::size_type;
851 using col_index_type = typename local_graph_type::data_type;
852 using array_layout = typename local_graph_type::array_layout;
853 using memory_space = typename device_type::memory_space;
854 using memory_traits = typename local_graph_type::memory_traits;
855 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
856 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
857
858 // Options
859 int team_work_size = 16;
860 std::string myalg("SPGEMM_KK_MEMORY");
861 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
862 KokkosKernels::Experimental::KokkosKernelsHandle<typename row_pointer_type::const_value_type,
863 typename col_indices_type::const_value_type,
864 typename values_type::const_value_type,
866 memory_space,
867 memory_space>
868 kh;
869 kh.create_spgemm_handle(alg_enum);
870 kh.set_team_work_size(team_work_size);
871
872 // Create views for AP matrix
873 row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing("C row pointer"),
874 A.numRows() + 1);
875 col_indices_type colIndC;
876 values_type valuesC;
877
878 // Symbolic multiplication
879 KokkosSparse::spgemm_symbolic(&kh, A.numRows(),
880 B.numRows(), B.numCols(),
881 A.graph.row_map, A.graph.entries, false,
882 B.graph.row_map, B.graph.entries, false,
883 rowPtrC);
884
885 // allocate column indices and values of AP
886 size_t nnzC = kh.get_spgemm_handle()->get_c_nnz();
887 if (nnzC) {
888 colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing("C column inds"), nnzC);
889 valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing("C values"), nnzC);
890 }
891
892 // Numeric multiplication
893 KokkosSparse::spgemm_numeric(&kh, A.numRows(),
894 B.numRows(), B.numCols(),
895 A.graph.row_map, A.graph.entries, A.values, false,
896 B.graph.row_map, B.graph.entries, B.values, false,
897 rowPtrC, colIndC, valuesC);
898 kh.destroy_spgemm_handle();
899
900 C = local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC);
901
902} // localSpGEMM
903
904} // namespace MueLu
905
906#endif /* MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_ */
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
Container class for aggregation information.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
Timer to be used in non-factories.
typename Kokkos::View< impl_scalar_type *, Kokkos::LayoutLeft, device_type > row_sum_type
void BuildInitialAggregates(const Teuchos::ParameterList &params, const RCP< const Matrix > &A, const ArrayView< const LO > &orderingVector, const magnitude_type kappa, Aggregates &aggregates, std::vector< unsigned > &aggStat, LO &numNonAggregatedNodes, LO &numDirichletNodes) const
Initial aggregation phase.
void BuildFurtherAggregates(const Teuchos::ParameterList &params, const RCP< const Matrix > &A, const Teuchos::ArrayView< const LO > &orderingVector, const local_matrix_type &coarseA, const magnitude_type kappa, const row_sum_type &rowSum, std::vector< LO > &localAggStat, Array< LO > &localVertex2AggID, LO &numLocalAggregates, LO &numNonAggregatedNodes) const
Further aggregation phase increases coarsening rate by a factor of ~2 per iteration.
typename Teuchos::ScalarTraits< Scalar >::magnitudeType magnitude_type
void BuildIntermediateProlongator(const LO numRows, const LO numDirichletNodes, const LO numLocalAggregates, const ArrayView< const LO > &localVertex2AggID, local_matrix_type &intermediateP) const
Construction of a local prolongator with values equal to 1.0.
void BuildCoarseLocalMatrix(const local_matrix_type &intermediateP, local_matrix_type &coarseA) const
Implementation of a local Galerkin projection called inside BuildFurtherAggregates.
typename Matrix::local_matrix_device_type local_matrix_type
void BuildOnRankLocalMatrix(const local_matrix_type &localA, local_matrix_type &onRankA) const
typename device_type::execution_space execution_space
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void DeclareInput(Level &currentLevel) const
Input.
void Build(Level &currentLevel) const
Build aggregates.
void localSpGEMM(const local_matrix_type &A, const local_matrix_type &B, const std::string matrixLabel, local_matrix_type &C) const
Wrapper for kokkos-kernels' spgemm that takes in CrsMatrix.
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Matrix &Op)
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
static RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
void RandomReorder(Teuchos::Array< LocalOrdinal > &list)
LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max)
Namespace for MueLu classes and methods.
@ Statistics0
Print statistics that do not involve significant additional computation.