MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_ClassicalPFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_CLASSICALPFACTORY_DEF_HPP
11#define MUELU_CLASSICALPFACTORY_DEF_HPP
12
13#include <Xpetra_MultiVectorFactory.hpp>
14#include <Xpetra_VectorFactory.hpp>
15#include <Xpetra_CrsGraphFactory.hpp>
16#include <Xpetra_Matrix.hpp>
17#include <Xpetra_Map.hpp>
18#include <Xpetra_MapFactory.hpp>
19#include <Xpetra_Vector.hpp>
20#include <Xpetra_Import.hpp>
21#include <Xpetra_IO.hpp>
22#include <Xpetra_StridedMapFactory.hpp>
23
24#include <Teuchos_OrdinalTraits.hpp>
25
26#include "MueLu_MasterList.hpp"
27#include "MueLu_Monitor.hpp"
28#include "MueLu_PerfUtils.hpp"
29#include "MueLu_ImportUtils.hpp"
31#include "MueLu_ClassicalMapFactory.hpp"
32#include "MueLu_AmalgamationInfo.hpp"
33#include "MueLu_LWGraph.hpp"
34
35//#define CMS_DEBUG
36//#define CMS_DUMP
37
38namespace {
39
40template <class SC>
41int Sign(SC val) {
42 using STS = typename Teuchos::ScalarTraits<SC>;
43 typename STS::magnitudeType MT_ZERO = Teuchos::ScalarTraits<typename STS::magnitudeType>::zero();
44 if (STS::real(val) > MT_ZERO)
45 return 1;
46 else if (STS::real(val) < MT_ZERO)
47 return -1;
48 else
49 return 0;
50}
51
52} // namespace
53
54namespace MueLu {
55
56template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
58 RCP<ParameterList> validParamList = rcp(new ParameterList());
59#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
60 SET_VALID_ENTRY("aggregation: deterministic");
61 SET_VALID_ENTRY("aggregation: coloring algorithm");
62 SET_VALID_ENTRY("aggregation: classical scheme");
63
64 // To know if we need BlockNumber
65 SET_VALID_ENTRY("aggregation: drop scheme");
66 {
67 validParamList->getEntry("aggregation: classical scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("direct", "ext+i", "classical modified"))));
68 }
69
70#undef SET_VALID_ENTRY
71 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
72 // validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
73 validParamList->set<RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
74 validParamList->set<RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
75 validParamList->set<RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the CoarseMap");
76 validParamList->set<RCP<const FactoryBase> >("FC Splitting", Teuchos::null, "Generating factory of the FC Splitting");
77 validParamList->set<RCP<const FactoryBase> >("BlockNumber", Teuchos::null, "Generating factory for Block Number");
78 // validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
79
80 return validParamList;
81}
82
83template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
85 Input(fineLevel, "A");
86 Input(fineLevel, "Graph");
87 Input(fineLevel, "DofsPerNode");
88 // Input(fineLevel, "UnAmalgamationInfo");
89 Input(fineLevel, "CoarseMap");
90 Input(fineLevel, "FC Splitting");
91
92 const ParameterList& pL = GetParameterList();
93 std::string drop_algo = pL.get<std::string>("aggregation: drop scheme");
94 if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") {
95 Input(fineLevel, "BlockNumber");
96 }
97}
98
99template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
101 return BuildP(fineLevel, coarseLevel);
102}
103
104template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
106 FactoryMonitor m(*this, "Build", coarseLevel);
107 // using STS = Teuchos::ScalarTraits<SC>;
108
109 // We start by assuming that someone did a reasonable strength of connection
110 // algorithm before we start to get our Graph, DofsPerNode and UnAmalgamationInfo
111
112 // We begin by getting a MIS (from a graph coloring) and then at that point we need
113 // to start generating entries for the prolongator.
114 RCP<const Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
115 RCP<const Map> ownedCoarseMap = Get<RCP<const Map> >(fineLevel, "CoarseMap");
116 RCP<const LocalOrdinalVector> owned_fc_splitting = Get<RCP<LocalOrdinalVector> >(fineLevel, "FC Splitting");
117 RCP<const LWGraph> graph = Get<RCP<LWGraph> >(fineLevel, "Graph");
118 // LO nDofsPerNode = Get<LO>(fineLevel, "DofsPerNode");
119 // RCP<AmalgamationInfo> amalgInfo = Get< RCP<AmalgamationInfo> > (fineLevel, "UnAmalgamationInfo");
120 RCP<const Import> Importer = A->getCrsGraph()->getImporter();
121 Xpetra::UnderlyingLib lib = ownedCoarseMap->lib();
122
123 // RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
124 RCP<Matrix> P;
125 // SC SC_ZERO = STS::zero();
126 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
129 const ParameterList& pL = GetParameterList();
130
131 // FIXME: This guy doesn't work right now for NumPDEs != 1
132 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() != 1, Exceptions::RuntimeError, "ClassicalPFactory: Multiple PDEs per node not supported yet");
133
134 // NOTE: Let's hope we never need to deal with this case
135 TEUCHOS_TEST_FOR_EXCEPTION(!A->getRowMap()->isSameAs(*A->getDomainMap()), Exceptions::RuntimeError, "ClassicalPFactory: MPI Ranks > 1 not supported yet");
136
137 // Do we need ghosts rows of A and myPointType?
138 std::string scheme = pL.get<std::string>("aggregation: classical scheme");
139 bool need_ghost_rows = false;
140 if (scheme == "ext+i")
141 need_ghost_rows = true;
142 else if (scheme == "direct")
143 need_ghost_rows = false;
144 else if (scheme == "classical modified")
145 need_ghost_rows = true;
146 // NOTE: ParameterList validator will check this guy so we don't really need an "else" here
147
148 if (GetVerbLevel() & Statistics1) {
149 GetOStream(Statistics1) << "ClassicalPFactory: scheme = " << scheme << std::endl;
150 }
151
152 // Ghost the FC splitting and grab the data (if needed)
153 RCP<const LocalOrdinalVector> fc_splitting;
154 ArrayRCP<const LO> myPointType;
155 if (Importer.is_null()) {
156 fc_splitting = owned_fc_splitting;
157 } else {
158 RCP<LocalOrdinalVector> fc_splitting_nonconst = LocalOrdinalVectorFactory::Build(A->getCrsGraph()->getColMap());
159 fc_splitting_nonconst->doImport(*owned_fc_splitting, *Importer, Xpetra::INSERT);
160 fc_splitting = fc_splitting_nonconst;
161 }
162 myPointType = fc_splitting->getData(0);
163
164 /* Ghost A (if needed) */
165 RCP<const Matrix> Aghost;
166 RCP<const LocalOrdinalVector> fc_splitting_ghost;
167 ArrayRCP<const LO> myPointType_ghost;
168 RCP<const Import> remoteOnlyImporter;
169 if (need_ghost_rows && !Importer.is_null()) {
170 ArrayView<const LO> remoteLIDs = Importer->getRemoteLIDs();
171 size_t numRemote = Importer->getNumRemoteIDs();
172 Array<GO> remoteRows(numRemote);
173 for (size_t i = 0; i < numRemote; i++)
174 remoteRows[i] = Importer->getTargetMap()->getGlobalElement(remoteLIDs[i]);
175
176 RCP<const Map> remoteRowMap = MapFactory::Build(lib, Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), remoteRows(),
177 A->getDomainMap()->getIndexBase(), A->getDomainMap()->getComm());
178
179 remoteOnlyImporter = Importer->createRemoteOnlyImport(remoteRowMap);
180 RCP<const CrsMatrix> Acrs = rcp_dynamic_cast<const CrsMatrixWrap>(A)->getCrsMatrix();
181 RCP<CrsMatrix> Aghost_crs = CrsMatrixFactory::Build(Acrs, *remoteOnlyImporter, A->getDomainMap(), remoteOnlyImporter->getTargetMap());
182 Aghost = rcp(new CrsMatrixWrap(Aghost_crs));
183 // CAG: It seems that this isn't actually needed?
184 // We also may need need to ghost myPointType for Aghost
185 // RCP<const Import> Importer2 = Aghost->getCrsGraph()->getImporter();
186 // if(Importer2.is_null()) {
187 // RCP<LocalOrdinalVector> fc_splitting_ghost_nonconst = LocalOrdinalVectorFactory::Build(Aghost->getColMap());
188 // fc_splitting_ghost_nonconst->doImport(*owned_fc_splitting,*Importer,Xpetra::INSERT);
189 // fc_splitting_ghost = fc_splitting_ghost_nonconst;
190 // myPointType_ghost = fc_splitting_ghost->getData(0);
191 // }
192 }
193
194 /* Generate the ghosted Coarse map using the "Tuminaro maneuver" (if needed)*/
195 RCP<const Map> coarseMap;
196 if (Importer.is_null())
197 coarseMap = ownedCoarseMap;
198 else {
199 // Generate a domain vector with the coarse ID's as entries for C points
200 GhostCoarseMap(*A, *Importer, myPointType, ownedCoarseMap, coarseMap);
201 }
202
203 /* Get the block number, if we need it (and ghost it) */
204 RCP<LocalOrdinalVector> BlockNumber;
205 std::string drop_algo = pL.get<std::string>("aggregation: drop scheme");
206 if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") {
207 RCP<LocalOrdinalVector> OwnedBlockNumber;
208 OwnedBlockNumber = Get<RCP<LocalOrdinalVector> >(fineLevel, "BlockNumber");
209 if (Importer.is_null())
210 BlockNumber = OwnedBlockNumber;
211 else {
212 BlockNumber = LocalOrdinalVectorFactory::Build(A->getColMap());
213 BlockNumber->doImport(*OwnedBlockNumber, *Importer, Xpetra::INSERT);
214 }
215 }
216
217#if defined(CMS_DEBUG) || defined(CMS_DUMP)
218 {
219 RCP<const CrsMatrix> Acrs = rcp_dynamic_cast<const CrsMatrixWrap>(A)->getCrsMatrix();
220 int rank = A->getRowMap()->getComm()->getRank();
221 printf("[%d] A local size = %dx%d\n", rank, (int)Acrs->getRowMap()->getLocalNumElements(), (int)Acrs->getColMap()->getLocalNumElements());
222
223 printf("[%d] graph local size = %dx%d\n", rank, (int)graph->GetDomainMap()->getLocalNumElements(), (int)graph->GetImportMap()->getLocalNumElements());
224
225 std::ofstream ofs(std::string("dropped_graph_") + std::to_string(fineLevel.GetLevelID()) + std::string("_") + std::to_string(rank) + std::string(".dat"), std::ofstream::out);
226 RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
227 graph->print(*fancy, Debug);
228 std::string out_fc = std::string("fc_splitting_") + std::to_string(fineLevel.GetLevelID()) + std::string("_") + std::to_string(rank) + std::string(".dat");
229 std::string out_block = std::string("block_number_") + std::to_string(fineLevel.GetLevelID()) + std::string("_") + std::to_string(rank) + std::string(".dat");
230
231 // We don't support writing LO vectors in Xpetra (boo!) so....
232 using real_type = typename Teuchos::ScalarTraits<SC>::magnitudeType;
233 using RealValuedMultiVector = typename Xpetra::MultiVector<real_type, LO, GO, NO>;
234 typedef Xpetra::MultiVectorFactory<real_type, LO, GO, NO> RealValuedMultiVectorFactory;
235
236 RCP<RealValuedMultiVector> mv = RealValuedMultiVectorFactory::Build(fc_splitting->getMap(), 1);
237 ArrayRCP<real_type> mv_data = mv->getDataNonConst(0);
238
239 // FC Splitting
240 ArrayRCP<const LO> fc_data = fc_splitting->getData(0);
241 for (LO i = 0; i < (LO)fc_data.size(); i++)
242 mv_data[i] = Teuchos::as<real_type>(fc_data[i]);
243 Xpetra::IO<real_type, LO, GO, NO>::Write(out_fc, *mv);
244
245 // Block Number
246 if (!BlockNumber.is_null()) {
247 RCP<RealValuedMultiVector> mv2 = RealValuedMultiVectorFactory::Build(BlockNumber->getMap(), 1);
248 ArrayRCP<real_type> mv_data2 = mv2->getDataNonConst(0);
249 ArrayRCP<const LO> b_data = BlockNumber->getData(0);
250 for (LO i = 0; i < (LO)b_data.size(); i++) {
251 mv_data2[i] = Teuchos::as<real_type>(b_data[i]);
252 }
253 Xpetra::IO<real_type, LO, GO, NO>::Write(out_block, *mv2);
254 }
255 }
256
257#endif
258
259 /* Generate reindexing arrays */
260 // Note: cpoint2pcol is ghosted if myPointType is
261 // NOTE: Since the ghosted coarse column map follows the ordering of
262 // the fine column map, this *should* work, because it is in local indices.
263 // pcol2cpoint - Takes a LCID for P and turns in into an LCID for A.
264 // cpoint2pcol - Takes a LCID for A --- if it is a C Point --- and turns in into an LCID for P.
265 Array<LO> cpoint2pcol(myPointType.size(), LO_INVALID);
266 Array<LO> pcol2cpoint(coarseMap->getLocalNumElements(), LO_INVALID);
267 LO num_c_points = 0;
268 LO num_f_points = 0;
269 for (LO i = 0; i < (LO)myPointType.size(); i++) {
270 if (myPointType[i] == C_PT) {
271 cpoint2pcol[i] = num_c_points;
272 num_c_points++;
273 } else if (myPointType[i] == F_PT)
274 num_f_points++;
275 }
276 for (LO i = 0; i < (LO)cpoint2pcol.size(); i++) {
277 if (cpoint2pcol[i] != LO_INVALID)
278 pcol2cpoint[cpoint2pcol[i]] = i;
279 }
280
281 // Generate edge strength flags (this will make everything easier later)
282 // These do *not* need to be ghosted (unlike A)
283 Teuchos::Array<size_t> eis_rowptr;
284 Teuchos::Array<bool> edgeIsStrong;
285 {
286 SubFactoryMonitor sfm(*this, "Strength Flags", coarseLevel);
287 GenerateStrengthFlags(*A, *graph, eis_rowptr, edgeIsStrong);
288 }
289
290 // Phase 3: Generate the P matrix
291 RCP<const Map> coarseColMap = coarseMap;
292 RCP<const Map> coarseDomainMap = ownedCoarseMap;
293 if (scheme == "ext+i") {
294 SubFactoryMonitor sfm(*this, "Ext+i Interpolation", coarseLevel);
295 Coarsen_Ext_Plus_I(*A, Aghost, *graph, coarseColMap, coarseDomainMap, num_c_points, num_f_points, myPointType(), myPointType_ghost(), cpoint2pcol, pcol2cpoint, eis_rowptr, edgeIsStrong, BlockNumber, P);
296 } else if (scheme == "direct") {
297 SubFactoryMonitor sfm(*this, "Direct Interpolation", coarseLevel);
298 Coarsen_Direct(*A, Aghost, *graph, coarseColMap, coarseDomainMap, num_c_points, num_f_points, myPointType(), myPointType_ghost(), cpoint2pcol, pcol2cpoint, eis_rowptr, edgeIsStrong, BlockNumber, P);
299 } else if (scheme == "classical modified") {
300 SubFactoryMonitor sfm(*this, "Classical Modified Interpolation", coarseLevel);
301 Coarsen_ClassicalModified(*A, Aghost, *graph, coarseColMap, coarseDomainMap, num_c_points, num_f_points, myPointType(), myPointType_ghost(), cpoint2pcol, pcol2cpoint, eis_rowptr, edgeIsStrong, BlockNumber, remoteOnlyImporter, P);
302 }
303 // NOTE: ParameterList validator will check this guy so we don't really need an "else" here
304
305#ifdef CMS_DEBUG
306 Xpetra::IO<SC, LO, GO, NO>::Write("classical_p.mat." + std::to_string(coarseLevel.GetLevelID()), *P);
307 // Xpetra::IO<SC,LO,GO,NO>::WriteLocal("classical_p.mat."+std::to_string(coarseLevel.GetLevelID()), *P);
308#endif
309
310 // Save output
311 Set(coarseLevel, "P", P);
312 RCP<const CrsGraph> pg = P->getCrsGraph();
313 Set(coarseLevel, "P Graph", pg);
314
315 // RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
316 // P->apply(*fineNullspace, *coarseNullspace, Teuchos::TRANS, Teuchos::ScalarTraits<SC>::one(), Teuchos::ScalarTraits<SC>::zero());
317 // Set(coarseLevel, "Nullspace", coarseNullspace);
318
319 if (IsPrint(Statistics1)) {
320 RCP<ParameterList> params = rcp(new ParameterList());
321 params->set("printLoadBalancingInfo", true);
322 GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*P, "P", params);
323 }
324}
325/* ************************************************************************* */
326template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
328 Coarsen_ClassicalModified(const Matrix& A, const RCP<const Matrix>& Aghost, const LWGraph& graph, RCP<const Map>& coarseColMap, RCP<const Map>& coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO>& myPointType, const Teuchos::ArrayView<const LO>& myPointType_ghost, const Teuchos::Array<LO>& cpoint2pcol, const Teuchos::Array<LO>& pcol2cpoint, Teuchos::Array<size_t>& eis_rowptr, Teuchos::Array<bool>& edgeIsStrong, RCP<LocalOrdinalVector>& BlockNumber, RCP<const Import> remoteOnlyImporter, RCP<Matrix>& P) const {
329 /* ============================================================= */
330 /* Phase 3 : Classical Modified Interpolation */
331 /* De Sterck, Falgout, Nolting and Yang. "Distance-two */
332 /* interpolation for parallel algebraic multigrid", NLAA 2008 */
333 /* 15:115-139 */
334 /* ============================================================= */
335 /* Definitions: */
336 /* F = F-points */
337 /* C = C-points */
338 /* N_i = non-zero neighbors of node i */
339 /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
340 /* F_i^s = F \cap S_i [strong F-neighbors of i] */
341 /* C_i^s = C \cap S_i [strong C-neighbors of i] */
342
343 /* N_i^w = N_i\ (F_i^s \cup C_i^s) [weak neighbors of i] */
344 /* This guy has a typo. The paper had a \cap instead of \cup */
345 /* I would note that this set can contain both F-points and */
346 /* C-points. They're just weak neighbors of this guy. */
347 /* Note that N_i^w \cup F_i^s \cup C_i^s = N_i by construction */
348
349 /* \bar{a}_ij = { 0, if sign(a_ij) == sign(a_ii) */
350 /* { a_ij, otherwise */
351
352 /* F_i^s\star = {k\in N_i | C_i^s \cap C_k^s = \emptyset} */
353 /* [set of F-neighbors of i that do not share a strong */
354 /* C-neighbor with i] */
355
356 /* Rewritten Equation (9) on p. 120 */
357 /* \tilde{a}_ii = (a_ij + \sum_{k\in{N_i^w \cup F_i^s\star}} a_ik */
358 /* */
359 /* f_ij = \sum_{k\in{F_i^s\setminusF_i^s*}} \frac{a_ik \bar{a}_kj}{\sum_{m\inC_i^s \bar{a}_km}} */
360 /* */
361 /* w_ij = \frac{1}{\tilde{a}_ii} ( a_ij + f_ij) for all j in C_i^s */
362
363 // const point_type F_PT = ClassicalMapFactory::F_PT;
366 using STS = typename Teuchos::ScalarTraits<SC>;
367 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
368 // size_t ST_INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
369 SC SC_ZERO = STS::zero();
370#ifdef CMS_DEBUG
371 int rank = A.getRowMap()->getComm()->getRank();
372#endif
373
374 // Get the block number if we have it.
375 ArrayRCP<const LO> block_id;
376 if (!BlockNumber.is_null())
377 block_id = BlockNumber->getData(0);
378
379 // Initial (estimated) allocation
380 // NOTE: If we only used Tpetra, then we could use these guys as is, but because Epetra, we can't, so there
381 // needs to be a copy below.
382 size_t Nrows = A.getLocalNumRows();
383 double c_point_density = (double)num_c_points / (num_c_points + num_f_points);
384 double mean_strong_neighbors_per_row = (double)graph.GetNodeNumEdges() / graph.GetNodeNumVertices();
385 // double mean_neighbors_per_row = (double)A.getLocalNumEntries() / Nrows;
386 double nnz_per_row_est = c_point_density * mean_strong_neighbors_per_row;
387
388 size_t nnz_est = std::max(Nrows, std::min((size_t)A.getLocalNumEntries(), (size_t)(nnz_per_row_est * Nrows)));
389 Array<size_t> tmp_rowptr(Nrows + 1);
390 Array<LO> tmp_colind(nnz_est);
391
392 // Algorithm (count+realloc)
393 // For each row, i,
394 // - Count the number of elements in \hat{C}_j, aka [C-neighbors and C-neighbors of strong F-neighbors of i]
395 size_t ct = 0;
396 for (LO row = 0; row < (LO)Nrows; row++) {
397 size_t row_start = eis_rowptr[row];
398 ArrayView<const LO> indices;
399 ArrayView<const SC> vals;
400 std::set<LO> C_hat;
401 if (myPointType[row] == DIRICHLET_PT) {
402 // Dirichlet points get ignored completely
403 } else if (myPointType[row] == C_PT) {
404 // C-Points get a single 1 in their row
405 C_hat.insert(cpoint2pcol[row]);
406 } else {
407 // F-Points have a more complicated interpolation
408
409 // C-neighbors of row
410 A.getLocalRowView(row, indices, vals);
411 for (LO j = 0; j < indices.size(); j++)
412 if (myPointType[indices[j]] == C_PT && edgeIsStrong[row_start + j])
413 C_hat.insert(cpoint2pcol[indices[j]]);
414 } // end else
415
416 // Realloc if needed
417 if (ct + (size_t)C_hat.size() > (size_t)tmp_colind.size()) {
418 tmp_colind.resize(std::max(ct + (size_t)C_hat.size(), (size_t)2 * tmp_colind.size()));
419 }
420
421 // Copy
422 std::copy(C_hat.begin(), C_hat.end(), tmp_colind.begin() + ct);
423 ct += C_hat.size();
424 tmp_rowptr[row + 1] = tmp_rowptr[row] + C_hat.size();
425 }
426 // Resize down
427 tmp_colind.resize(tmp_rowptr[Nrows]);
428
429 RCP<CrsMatrix> Pcrs = CrsMatrixFactory::Build(A.getRowMap(), coarseColMap, 0);
430 ArrayRCP<size_t> P_rowptr;
431 ArrayRCP<LO> P_colind;
432 ArrayRCP<SC> P_values;
433
434 if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
435 P_rowptr = Teuchos::arcpFromArray(tmp_rowptr);
436 P_colind = Teuchos::arcpFromArray(tmp_colind);
437 P_values.resize(P_rowptr[Nrows]);
438 } else {
439 // Make the matrix and then get the graph out of it (necessary for Epetra)
440 // NOTE: The lack of an Epetra_CrsGraph::ExpertStaticFillComplete makes this
441 // impossible to do the obvious way
442 Pcrs->allocateAllValues(tmp_rowptr[Nrows], P_rowptr, P_colind, P_values);
443 std::copy(tmp_rowptr.begin(), tmp_rowptr.end(), P_rowptr.begin());
444 std::copy(tmp_colind.begin(), tmp_colind.end(), P_colind.begin());
445 Pcrs->setAllValues(P_rowptr, P_colind, P_values);
446 Pcrs->expertStaticFillComplete(/*domain*/ coarseDomainMap, /*range*/ A.getDomainMap());
447 }
448
449 // Generate a remote-ghosted version of the graph (if we're in parallel)
450 RCP<const CrsGraph> Pgraph;
451 RCP<const CrsGraph> Pghost;
452 // TODO: We might want to be more efficient here and actually use
453 // Pgraph in the matrix constructor.
454 ArrayRCP<const size_t> Pghost_rowptr;
455 ArrayRCP<const LO> Pghost_colind;
456 if (!remoteOnlyImporter.is_null()) {
457 if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
458 RCP<CrsGraph> tempPgraph = CrsGraphFactory::Build(A.getRowMap(), coarseColMap, P_rowptr, P_colind);
459 tempPgraph->fillComplete(coarseDomainMap, A.getDomainMap());
460 Pgraph = tempPgraph;
461 } else {
462 // Epetra does not have a graph constructor that uses rowptr and colind.
463 Pgraph = Pcrs->getCrsGraph();
464 }
465 TEUCHOS_ASSERT(!Pgraph.is_null());
466 Pghost = CrsGraphFactory::Build(Pgraph, *remoteOnlyImporter, Pgraph->getDomainMap(), remoteOnlyImporter->getTargetMap());
467 Pghost->getAllIndices(Pghost_rowptr, Pghost_colind);
468 }
469
470 // Gustavson-style perfect hashing
471 ArrayRCP<LO> Acol_to_Pcol(A.getColMap()->getLocalNumElements(), LO_INVALID);
472
473 // Get a quick reindexing array from Pghost LCIDs to P LCIDs
474 ArrayRCP<LO> Pghostcol_to_Pcol;
475 if (!Pghost.is_null()) {
476 Pghostcol_to_Pcol.resize(Pghost->getColMap()->getLocalNumElements(), LO_INVALID);
477 for (LO i = 0; i < (LO)Pghost->getColMap()->getLocalNumElements(); i++)
478 Pghostcol_to_Pcol[i] = Pgraph->getColMap()->getLocalElement(Pghost->getColMap()->getGlobalElement(i));
479 } // end Pghost
480
481 // Get a quick reindexing array from Aghost LCIDs to A LCIDs
482 ArrayRCP<LO> Aghostcol_to_Acol;
483 if (!Aghost.is_null()) {
484 Aghostcol_to_Acol.resize(Aghost->getColMap()->getLocalNumElements(), LO_INVALID);
485 for (LO i = 0; i < (LO)Aghost->getColMap()->getLocalNumElements(); i++)
486 Aghostcol_to_Acol[i] = A.getColMap()->getLocalElement(Aghost->getColMap()->getGlobalElement(i));
487 } // end Aghost
488
489 // Algorithm (numeric)
490 for (LO i = 0; i < (LO)Nrows; i++) {
491 if (myPointType[i] == DIRICHLET_PT) {
492 // Dirichlet points get ignored completely
493#ifdef CMS_DEBUG
494 // DEBUG
495 printf("[%d] ** A(%d,:) is a Dirichlet-Point.\n", rank, i);
496#endif
497 } else if (myPointType[i] == C_PT) {
498 // C Points get a single 1 in their row
499 P_values[P_rowptr[i]] = Teuchos::ScalarTraits<SC>::one();
500#ifdef CMS_DEBUG
501 // DEBUG
502 printf("[%d] ** A(%d,:) is a C-Point.\n", rank, i);
503#endif
504 } else {
505 // F Points get all of the fancy stuff
506#ifdef CMS_DEBUG
507 // DEBUG
508 printf("[%d] ** A(%d,:) is a F-Point.\n", rank, i);
509#endif
510
511 // Get all of the relevant information about this row
512 ArrayView<const LO> A_indices_i, A_indices_k;
513 ArrayView<const SC> A_vals_i, A_vals_k;
514 A.getLocalRowView(i, A_indices_i, A_vals_i);
515 size_t row_start = eis_rowptr[i];
516
517 ArrayView<const LO> P_indices_i = P_colind.view(P_rowptr[i], P_rowptr[i + 1] - P_rowptr[i]);
518 ArrayView<SC> P_vals_i = P_values.view(P_rowptr[i], P_rowptr[i + 1] - P_rowptr[i]);
519
520 // FIXME: Do we need this?
521 for (LO j = 0; j < (LO)P_vals_i.size(); j++)
522 P_vals_i[j] = SC_ZERO;
523
524 // Stash the hash: Flag any strong C-points with their index into P_colind
525 // NOTE: We'll consider any points that are LO_INVALID or less than P_rowptr[i] as not strong C-points
526 for (LO j = 0; j < (LO)P_indices_i.size(); j++) {
527 Acol_to_Pcol[pcol2cpoint[P_indices_i[j]]] = P_rowptr[i] + j;
528 }
529
530 // Loop over the entries in the row
531 SC first_denominator = SC_ZERO;
532#ifdef CMS_DEBUG
533 SC a_ii = SC_ZERO;
534#endif
535 for (LO k0 = 0; k0 < (LO)A_indices_i.size(); k0++) {
536 LO k = A_indices_i[k0];
537 SC a_ik = A_vals_i[k0];
538 LO pcol_k = Acol_to_Pcol[k];
539
540 if (k == i) {
541 // Case A: Diagonal value (add to first denominator)
542 // FIXME: Add BlockNumber matching here
543 first_denominator += a_ik;
544#ifdef CMS_DEBUG
545 a_ii = a_ik;
546 printf("- A(%d,%d) is the diagonal\n", i, k);
547#endif
548
549 } else if (myPointType[k] == DIRICHLET_PT) {
550 // Case B: Ignore dirichlet connections completely
551#ifdef CMS_DEBUG
552 printf("- A(%d,%d) is a Dirichlet point\n", i, k);
553#endif
554
555 } else if (pcol_k != LO_INVALID && pcol_k >= (LO)P_rowptr[i]) {
556 // Case C: a_ik is strong C-Point (goes directly into the weight)
557 P_values[pcol_k] += a_ik;
558#ifdef CMS_DEBUG
559 printf("- A(%d,%d) is a strong C-Point\n", i, k);
560#endif
561 } else if (!edgeIsStrong[row_start + k0]) {
562 // Case D: Weak non-Dirichlet neighbor (add to first denominator)
563 if (block_id.size() == 0 || block_id[i] == block_id[k]) {
564 first_denominator += a_ik;
565#ifdef CMS_DEBUG
566 printf("- A(%d,%d) is weak adding to diagonal(%d,%d) (%6.4e)\n", i, k, block_id[i], block_id[k], a_ik);
567 } else {
568 printf("- A(%d,%d) is weak but does not match blocks (%d,%d), discarding\n", i, k, block_id[i], block_id[k]);
569#endif
570 }
571
572 } else { // Case E
573 // Case E: Strong F-Point (adds to the first denominator if we don't share a
574 // a strong C-Point with i; adds to the second denominator otherwise)
575#ifdef CMS_DEBUG
576 printf("- A(%d,%d) is a strong F-Point\n", i, k);
577#endif
578
579 SC a_kk = SC_ZERO;
580 SC second_denominator = SC_ZERO;
581 int sign_akk = 0;
582
583 if (k < (LO)Nrows) {
584 // Grab the diagonal a_kk
585 A.getLocalRowView(k, A_indices_k, A_vals_k);
586 for (LO m0 = 0; m0 < (LO)A_indices_k.size(); m0++) {
587 LO m = A_indices_k[m0];
588 if (k == m) {
589 a_kk = A_vals_k[m0];
590 break;
591 }
592 } // end for A_indices_k
593
594 // Compute the second denominator term
595 sign_akk = Sign(a_kk);
596 for (LO m0 = 0; m0 < (LO)A_indices_k.size(); m0++) {
597 LO m = A_indices_k[m0];
598 if (m != k && Acol_to_Pcol[A_indices_k[m0]] >= (LO)P_rowptr[i]) {
599 SC a_km = A_vals_k[m0];
600 second_denominator += (Sign(a_km) == sign_akk ? SC_ZERO : a_km);
601 }
602 } // end for A_indices_k
603
604 // Now we have the second denominator, for this particular strong F point.
605 // So we can now add the sum to the w_ij components for the P values
606 if (second_denominator != SC_ZERO) {
607 for (LO j0 = 0; j0 < (LO)A_indices_k.size(); j0++) {
608 LO j = A_indices_k[j0];
609 // NOTE: Row k should be in fis_star, so I should have to check for diagonals here
610 // printf("Acol_to_Pcol[%d] = %d P_values.size() = %d\n",j,Acol_to_Pcol[j],(int)P_values.size());
611 if (Acol_to_Pcol[j] >= (LO)P_rowptr[i]) {
612 SC a_kj = A_vals_k[j0];
613 SC sign_akj_val = sign_akk == Sign(a_kj) ? SC_ZERO : a_kj;
614 P_values[Acol_to_Pcol[j]] += a_ik * sign_akj_val / second_denominator;
615#ifdef CMS_DEBUG
616 printf("- - Unscaled P(%d,A-%d) += %6.4e = %5.4e\n", i, j, a_ik * sign_akj_val / second_denominator, P_values[Acol_to_Pcol[j]]);
617#endif
618 }
619 } // end for A_indices_k
620 } // end if second_denominator != 0
621 else {
622#ifdef CMS_DEBUG
623 printf("- - A(%d,%d) second denominator is zero\n", i, k);
624#endif
625 if (block_id.size() == 0 || block_id[i] == block_id[k])
626 first_denominator += a_ik;
627 } // end else second_denominator != 0
628 } // end if k < Nrows
629 else {
630 // Ghost row
631 LO kless = k - Nrows;
632 // Grab the diagonal a_kk
633 // NOTE: ColMap is not locally fitted to the RowMap
634 // so we need to check GIDs here
635 Aghost->getLocalRowView(kless, A_indices_k, A_vals_k);
636 GO k_g = Aghost->getRowMap()->getGlobalElement(kless);
637 for (LO m0 = 0; m0 < (LO)A_indices_k.size(); m0++) {
638 GO m_g = Aghost->getColMap()->getGlobalElement(A_indices_k[m0]);
639 if (k_g == m_g) {
640 a_kk = A_vals_k[m0];
641 break;
642 }
643 } // end for A_indices_k
644
645 // Compute the second denominator term
646 sign_akk = Sign(a_kk);
647 for (LO m0 = 0; m0 < (LO)A_indices_k.size(); m0++) {
648 GO m_g = Aghost->getColMap()->getGlobalElement(A_indices_k[m0]);
649 LO mghost = A_indices_k[m0]; // Aghost LCID
650 LO m = Aghostcol_to_Acol[mghost]; // A's LID (could be LO_INVALID)
651 if (m_g != k_g && m != LO_INVALID && Acol_to_Pcol[m] >= (LO)P_rowptr[i]) {
652 SC a_km = A_vals_k[m0];
653 second_denominator += (Sign(a_km) == sign_akk ? SC_ZERO : a_km);
654 }
655 } // end for A_indices_k
656
657 // Now we have the second denominator, for this particular strong F point.
658 // So we can now add the sum to the w_ij components for the P values
659 if (second_denominator != SC_ZERO) {
660 for (LO j0 = 0; j0 < (LO)A_indices_k.size(); j0++) {
661 LO jghost = A_indices_k[j0]; // Aghost LCID
662 LO j = Aghostcol_to_Acol[jghost]; // A's LID (could be LO_INVALID)
663 // NOTE: Row k should be in fis_star, so I should have to check for diagonals here
664 if ((j != LO_INVALID) && (Acol_to_Pcol[j] >= (LO)P_rowptr[i])) {
665 SC a_kj = A_vals_k[j0];
666 SC sign_akj_val = sign_akk == Sign(a_kj) ? SC_ZERO : a_kj;
667 P_values[Acol_to_Pcol[j]] += a_ik * sign_akj_val / second_denominator;
668#ifdef CMS_DEBUG
669 printf("- - Unscaled P(%d,A-%d) += %6.4e\n", i, j, a_ik * sign_akj_val / second_denominator);
670#endif
671 }
672
673 } // end for A_indices_k
674 } // end if second_denominator != 0
675 else {
676#ifdef CMS_DEBUG
677 printf("- - A(%d,%d) second denominator is zero\n", i, k);
678#endif
679 if (block_id.size() == 0 || block_id[i] == block_id[k])
680 first_denominator += a_ik;
681 } // end else second_denominator != 0
682 } // end else k < Nrows
683 } // end else Case A,...,E
684
685 } // end for A_indices_i
686
687 // Now, downscale by the first_denominator
688 if (first_denominator != SC_ZERO) {
689 for (LO j0 = 0; j0 < (LO)P_indices_i.size(); j0++) {
690#ifdef CMS_DEBUG
691 SC old_pij = P_vals_i[j0];
692 P_vals_i[j0] /= -first_denominator;
693 printf("P(%d,%d) = %6.4e = %6.4e / (%6.4e + %6.4e)\n", i, P_indices_i[j0], P_vals_i[j0], old_pij, a_ii, first_denominator - a_ii);
694#else
695 P_vals_i[j0] /= -first_denominator;
696#endif
697 } // end for P_indices_i
698 } // end if first_denominator != 0
699
700 } // end else C-Point
701
702 } // end if i < Nrows
703
704 // Finish up
705 Pcrs->setAllValues(P_rowptr, P_colind, P_values);
706 Pcrs->expertStaticFillComplete(/*domain*/ coarseDomainMap, /*range*/ A.getDomainMap());
707 // Wrap from CrsMatrix to Matrix
708 P = rcp(new CrsMatrixWrap(Pcrs));
709
710} // end Coarsen_ClassicalModified
711
712/* ************************************************************************* */
713template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
715 Coarsen_Direct(const Matrix& A, const RCP<const Matrix>& Aghost, const LWGraph& graph, RCP<const Map>& coarseColMap, RCP<const Map>& coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO>& myPointType, const Teuchos::ArrayView<const LO>& myPointType_ghost, const Teuchos::Array<LO>& cpoint2pcol, const Teuchos::Array<LO>& pcol2cpoint, Teuchos::Array<size_t>& eis_rowptr, Teuchos::Array<bool>& edgeIsStrong, RCP<LocalOrdinalVector>& BlockNumber, RCP<Matrix>& P) const {
716 /* ============================================================= */
717 /* Phase 3 : Direct Interpolation */
718 /* We do not use De Sterck, Falgout, Nolting and Yang (2008) */
719 /* here. Instead we follow: */
720 /* Trottenberg, Oosterlee and Schueller, Multigrid, 2001. */
721 /* with some modifications inspirted by PyAMG */
722 /* ============================================================= */
723 /* Definitions: */
724 /* F = F-points */
725 /* C = C-points */
726 /* N_i = non-zero neighbors of node i */
727 /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
728 /* F_i^s = F \cap S_i [strong F-neighbors of i] */
729 /* C_i^s = C \cap S_i [strong C-neighbors of i] */
730 /* P_i = Set of interpolatory variables for row i [here = C_i^s] */
731
732 /* (A.2.17) from p. 426 */
733 /* a_ij^- = { a_ij, if a_ij < 0 */
734 /* { 0, otherwise */
735 /* a_ij^+ = { a_ij, if a_ij > 0 */
736 /* { 0, otherwise */
737 /* P_i^- = P_i \cap {k | a_ij^- != 0 and a_ij^- = a_ij} */
738 /* [strong C-neighbors with negative edges] */
739 /* P_i^+ = P_i \cap {k | a_ij^+ != 0 and a_ij^+ = a_ij} */
740 /* [strong C-neighbors with positive edges] */
741
742 /* de Sterck et al., gives us this: */
743 /* Rewritten Equation (6) on p. 119 */
744 /* w_ij = - a_ji / a_ii \frac{\sum_{k\in N_i} a_ik} {\sum k\inC_i^s} a_ik}, j\in C_i^s */
745
746 /* Trottenberg et al. (A.7.6) and (A.7.7) on p. 479 gives this: */
747 /* alpha_i = \frac{ \sum_{j\in N_i} a_ij^- }{ \sum_{k\in P_i} a_ik^- } */
748 /* beta_i = \frac{ \sum_{j\in N_i} a_ij^+ }{ \sum_{k\in P_i} a_ik^+ } */
749 /* w_ik = { - alpha_i (a_ik / a_ii), if k\in P_i^- */
750 /* { - beta_i (a_ik / a_ii), if k\in P_i^+ */
751 /* NOTE: The text says to modify, if P_i^+ is zero but it isn't entirely clear how that */
752 /* works. We'll follow the PyAMG implementation in a few important ways. */
753
756
757 // Initial (estimated) allocation
758 // NOTE: If we only used Tpetra, then we could use these guys as is, but because Epetra, we can't, so there
759 // needs to be a copy below.
760 using STS = typename Teuchos::ScalarTraits<SC>;
761 using MT = typename STS::magnitudeType;
762 using MTS = typename Teuchos::ScalarTraits<MT>;
763 size_t Nrows = A.getLocalNumRows();
764 double c_point_density = (double)num_c_points / (num_c_points + num_f_points);
765 double mean_strong_neighbors_per_row = (double)graph.GetNodeNumEdges() / graph.GetNodeNumVertices();
766 // double mean_neighbors_per_row = (double)A.getLocalNumEntries() / Nrows;
767 double nnz_per_row_est = c_point_density * mean_strong_neighbors_per_row;
768
769 size_t nnz_est = std::max(Nrows, std::min((size_t)A.getLocalNumEntries(), (size_t)(nnz_per_row_est * Nrows)));
770 SC SC_ZERO = STS::zero();
771 MT MT_ZERO = MTS::zero();
772 Array<size_t> tmp_rowptr(Nrows + 1);
773 Array<LO> tmp_colind(nnz_est);
774
775 // Algorithm (count+realloc)
776 // For each row, i,
777 // - Count the number of elements in \hat{C}_j, aka [C-neighbors and C-neighbors of strong F-neighbors of i]
778 size_t ct = 0;
779 for (LO row = 0; row < (LO)Nrows; row++) {
780 size_t row_start = eis_rowptr[row];
781 ArrayView<const LO> indices;
782 ArrayView<const SC> vals;
783 std::set<LO> C_hat;
784 if (myPointType[row] == DIRICHLET_PT) {
785 // Dirichlet points get ignored completely
786 } else if (myPointType[row] == C_PT) {
787 // C-Points get a single 1 in their row
788 C_hat.insert(cpoint2pcol[row]);
789 } else {
790 // F-Points have a more complicated interpolation
791
792 // C-neighbors of row
793 A.getLocalRowView(row, indices, vals);
794 for (LO j = 0; j < indices.size(); j++)
795 if (myPointType[indices[j]] == C_PT && edgeIsStrong[row_start + j])
796 C_hat.insert(cpoint2pcol[indices[j]]);
797 } // end else
798
799 // Realloc if needed
800 if (ct + (size_t)C_hat.size() > (size_t)tmp_colind.size()) {
801 tmp_colind.resize(std::max(ct + (size_t)C_hat.size(), (size_t)2 * tmp_colind.size()));
802 }
803
804 // Copy
805 std::copy(C_hat.begin(), C_hat.end(), tmp_colind.begin() + ct);
806 ct += C_hat.size();
807 tmp_rowptr[row + 1] = tmp_rowptr[row] + C_hat.size();
808 }
809 // Resize down
810 tmp_colind.resize(tmp_rowptr[Nrows]);
811
812 // Allocate memory & copy
813 P = rcp(new CrsMatrixWrap(A.getRowMap(), coarseColMap, 0));
814 RCP<CrsMatrix> PCrs = toCrsMatrix(P);
815 ArrayRCP<size_t> P_rowptr;
816 ArrayRCP<LO> P_colind;
817 ArrayRCP<SC> P_values;
818
819#ifdef CMS_DEBUG
820 printf("CMS: Allocating P w/ %d nonzeros\n", (int)tmp_rowptr[Nrows]);
821#endif
822 PCrs->allocateAllValues(tmp_rowptr[Nrows], P_rowptr, P_colind, P_values);
823 TEUCHOS_TEST_FOR_EXCEPTION(tmp_rowptr.size() != P_rowptr.size(), Exceptions::RuntimeError, "ClassicalPFactory: Allocation size error (rowptr)");
824 TEUCHOS_TEST_FOR_EXCEPTION(tmp_colind.size() != P_colind.size(), Exceptions::RuntimeError, "ClassicalPFactory: Allocation size error (colind)");
825 // FIXME: This can be short-circuited for Tpetra, if we decide we care
826 for (LO i = 0; i < (LO)Nrows + 1; i++)
827 P_rowptr[i] = tmp_rowptr[i];
828 for (LO i = 0; i < (LO)tmp_rowptr[Nrows]; i++)
829 P_colind[i] = tmp_colind[i];
830
831 // Algorithm (numeric)
832 for (LO i = 0; i < (LO)Nrows; i++) {
833 if (myPointType[i] == DIRICHLET_PT) {
834 // Dirichlet points get ignored completely
835#ifdef CMS_DEBUG
836 // DEBUG
837 printf("** A(%d,:) is a Dirichlet-Point.\n", i);
838#endif
839 } else if (myPointType[i] == C_PT) {
840 // C Points get a single 1 in their row
841 P_values[P_rowptr[i]] = Teuchos::ScalarTraits<SC>::one();
842#ifdef CMS_DEBUG
843 // DEBUG
844 printf("** A(%d,:) is a C-Point.\n", i);
845#endif
846 } else {
847 /* Trottenberg et al. (A.7.6) and (A.7.7) on p. 479 gives this: */
848 /* alpha_i = \frac{ \sum_{j\in N_i} a_ij^- }{ \sum_{k\in P_i} a_ik^- } */
849 /* beta_i = \frac{ \sum_{j\in N_i} a_ij^+ }{ \sum_{k\in P_i} a_ik^+ } */
850 /* w_ik = { - alpha_i (a_ik / a_ii), if k\in P_i^- */
851 /* { - beta_i (a_ik / a_ii), if k\in P_i^+ */
852 ArrayView<const LO> A_indices_i, A_incides_k;
853 ArrayView<const SC> A_vals_i, A_indices_k;
854 A.getLocalRowView(i, A_indices_i, A_vals_i);
855 size_t row_start = eis_rowptr[i];
856
857 ArrayView<LO> P_indices_i = P_colind.view(P_rowptr[i], P_rowptr[i + 1] - P_rowptr[i]);
858 ArrayView<SC> P_vals_i = P_values.view(P_rowptr[i], P_rowptr[i + 1] - P_rowptr[i]);
859
860#ifdef CMS_DEBUG
861 // DEBUG
862 {
863 char mylabel[5] = "FUCD";
864 char sw[3] = "ws";
865 printf("** A(%d,:) = ", i);
866 for (LO j = 0; j < (LO)A_indices_i.size(); j++) {
867 printf("%6.4e(%d-%c%c) ", A_vals_i[j], A_indices_i[j], mylabel[1 + myPointType[A_indices_i[j]]], sw[(int)edgeIsStrong[row_start + j]]);
868 }
869 printf("\n");
870 }
871#endif
872
873 SC a_ii = SC_ZERO;
874 SC pos_numerator = SC_ZERO, neg_numerator = SC_ZERO;
875 SC pos_denominator = SC_ZERO, neg_denominator = SC_ZERO;
876 // Find the diagonal and compute the sum ratio
877 for (LO j = 0; j < (LO)A_indices_i.size(); j++) {
878 SC a_ik = A_vals_i[j];
879 LO k = A_indices_i[j];
880
881 // Diagonal
882 if (i == k) {
883 a_ii = a_ik;
884 }
885 // Only strong C-neighbors are in the denomintor
886 if (myPointType[k] == C_PT && edgeIsStrong[row_start + j]) {
887 if (STS::real(a_ik) > MT_ZERO)
888 pos_denominator += a_ik;
889 else
890 neg_denominator += a_ik;
891 }
892
893 // All neighbors are in the numerator
894 // NOTE: As per PyAMG, this does not include the diagonal
895 if (i != k) {
896 if (STS::real(a_ik) > MT_ZERO)
897 pos_numerator += a_ik;
898 else
899 neg_numerator += a_ik;
900 }
901 }
902 SC alpha = (neg_denominator == MT_ZERO) ? SC_ZERO : (neg_numerator / neg_denominator);
903 SC beta = (pos_denominator == MT_ZERO) ? SC_ZERO : (pos_numerator / pos_denominator);
904 alpha /= -a_ii;
905 beta /= -a_ii;
906
907 // Loop over the entries
908 for (LO p_j = 0; p_j < (LO)P_indices_i.size(); p_j++) {
909 LO P_col = pcol2cpoint[P_indices_i[p_j]];
910 SC a_ij = SC_ZERO;
911
912 // Find A_ij (if it is there)
913 // FIXME: We can optimize this if we assume sorting
914 for (LO a_j = 0; a_j < (LO)A_indices_i.size(); a_j++) {
915 if (A_indices_i[a_j] == P_col) {
916 a_ij = A_vals_i[a_j];
917 break;
918 }
919 }
920 SC w_ij = (STS::real(a_ij) < 0) ? (alpha * a_ij) : (beta * a_ij);
921#ifdef CMS_DEBUG
922 SC alpha_or_beta = (STS::real(a_ij) < 0) ? alpha : beta;
923 printf("P(%d,%d/%d) = - %6.4e * %6.4e = %6.4e\n", i, P_indices_i[p_j], pcol2cpoint[P_indices_i[p_j]], alpha_or_beta, a_ij, w_ij);
924#endif
925 P_vals_i[p_j] = w_ij;
926 } // end for A_indices_i
927 } // end else C_PT
928 } // end for Numrows
929
930 // Finish up
931 PCrs->setAllValues(P_rowptr, P_colind, P_values);
932 PCrs->expertStaticFillComplete(/*domain*/ coarseDomainMap, /*range*/ A.getDomainMap());
933}
934
935/* ************************************************************************* */
936template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
938 Coarsen_Ext_Plus_I(const Matrix& A, const RCP<const Matrix>& Aghost, const LWGraph& graph, RCP<const Map>& coarseColMap, RCP<const Map>& coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO>& myPointType, const Teuchos::ArrayView<const LO>& myPointType_ghost, const Teuchos::Array<LO>& cpoint2pcol, const Teuchos::Array<LO>& pcol2cpoint, Teuchos::Array<size_t>& eis_rowptr, Teuchos::Array<bool>& edgeIsStrong, RCP<LocalOrdinalVector>& BlockNumber, RCP<Matrix>& P) const {
939 /* ============================================================= */
940 /* Phase 3 : Extended+i Interpolation */
941 /* De Sterck, Falgout, Nolting and Yang. "Distance-two */
942 /* interpolation for parallel algebraic multigrid", NLAA 2008 */
943 /* 15:115-139 */
944 /* ============================================================= */
945 /* Definitions: */
946 /* F = F-points */
947 /* C = C-points */
948 /* N_i = non-zero neighbors of node i */
949 /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
950 /* F_i^s = F \cap S_i [strong F-neighbors of i] */
951 /* C_i^s = C \cap S_i [strong C-neighbors of i] */
952 /* N_i^w = N_i\ (F_i^s \cup C_i^s) [weak neighbors of i] */
953 /* This guy has a typo. The paper had a \cap instead of \cup */
954 /* I would note that this set can contain both F-points and */
955 /* C-points. They're just weak neighbors of this guy. */
956 /* Note that N_i^w \cup F_i^s \cup C_i^s = N_i by construction */
957
958 /* \hat{C}_i = C_i \cup (\bigcup_{j\inF_i^s} C_j) */
959 /* [C-neighbors and C-neighbors of strong F-neighbors of i] */
960 /* */
961
962 /* \bar{a}_ij = { 0, if sign(a_ij) == sign(a_ii) */
963 /* { a_ij, otherwise */
964
965 /* Rewritten Equation (19) on p. 123 */
966 /* f_ik = \frac{\bar{a}_kj}{\sum{l\in \hat{C}_i\cup {i}} \bar{a}_kl */
967 /* w_ij = -\tilde{a}_ii^{-1} (a_ij + \sum_{k\inF_i^s} a_ik f_ik */
968 /* for j in \hat{C}_i */
969
970 /* Rewritten Equation (20) on p. 124 [for the lumped diagonal] */
971 /* g_ik = \frac{\bar{a}_ki}{\sum{l\in \hat{C}_i\cup {i}} \bar{a}_kl */
972 /* \tilde{a}_ii = a_ii + \sum_{n\inN_i^w\setminus \hat{C}_i} a_in + \sum_{k\inF_i^s} a_ik g_ik */
973 TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, "ClassicalPFactory: Ext+i not implemented");
974}
975
976/* ************************************************************************* */
977template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
979 GenerateStrengthFlags(const Matrix& A, const LWGraph& graph, Teuchos::Array<size_t>& eis_rowptr, Teuchos::Array<bool>& edgeIsStrong) const {
980 // To make this easier, we'll create a bool array equal to the nnz in the matrix
981 // so we know whether each edge is strong or not. This will save us a bunch of
982 // trying to match the graph and matrix later
983 size_t Nrows = A.getLocalNumRows();
984 eis_rowptr.resize(Nrows + 1);
985
986 if (edgeIsStrong.size() == 0) {
987 // Preferred
988 edgeIsStrong.resize(A.getLocalNumEntries(), false);
989 } else {
990 edgeIsStrong.resize(A.getLocalNumEntries(), false);
991 edgeIsStrong.assign(A.getLocalNumEntries(), false);
992 }
993
994 eis_rowptr[0] = 0;
995 for (LO i = 0; i < (LO)Nrows; i++) {
996 LO rowstart = eis_rowptr[i];
997 ArrayView<const LO> A_indices;
998 ArrayView<const SC> A_values;
999 A.getLocalRowView(i, A_indices, A_values);
1000 LO A_size = (LO)A_indices.size();
1001
1002 auto G_indices = graph.getNeighborVertices(i);
1003 LO G_size = (LO)G_indices.length;
1004
1005 // Both of these guys should be in the same (sorted) order, but let's check
1006 bool is_ok = true;
1007 for (LO j = 0; j < A_size - 1; j++)
1008 if (A_indices[j] >= A_indices[j + 1]) {
1009 is_ok = false;
1010 break;
1011 }
1012 for (LO j = 0; j < G_size - 1; j++)
1013 if (G_indices(j) >= G_indices(j + 1)) {
1014 is_ok = false;
1015 break;
1016 }
1017 TEUCHOS_TEST_FOR_EXCEPTION(!is_ok, Exceptions::RuntimeError, "ClassicalPFactory: Exected A and Graph to be sorted");
1018
1019 // Now cycle through and set the flags - if the edge is in G it is strong
1020 for (LO g_idx = 0, a_idx = 0; g_idx < G_size; g_idx++) {
1021 LO col = G_indices(g_idx);
1022 while (A_indices[a_idx] != col && a_idx < A_size) a_idx++;
1023 if (a_idx == A_size) {
1024 is_ok = false;
1025 break;
1026 }
1027 edgeIsStrong[rowstart + a_idx] = true;
1028 }
1029
1030 eis_rowptr[i + 1] = eis_rowptr[i] + A_size;
1031 }
1032}
1033
1034/* ************************************************************************* */
1035template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1037 GhostCoarseMap(const Matrix& A, const Import& Importer, const ArrayRCP<const LO> myPointType, const RCP<const Map>& coarseMap, RCP<const Map>& coarseColMap) const {
1039 const GO GO_INVALID = Teuchos::OrdinalTraits<GO>::invalid();
1040 RCP<GlobalOrdinalVector> d_coarseIds = GlobalOrdinalVectorFactory::Build(A.getRowMap());
1041 ArrayRCP<GO> d_data = d_coarseIds->getDataNonConst(0);
1042 LO ct = 0;
1043
1044 for (LO i = 0; i < (LO)d_data.size(); i++) {
1045 if (myPointType[i] == C_PT) {
1046 d_data[i] = coarseMap->getGlobalElement(ct);
1047 ct++;
1048 } else
1049 d_data[i] = GO_INVALID;
1050 }
1051
1052 // Ghost this guy
1053 RCP<GlobalOrdinalVector> c_coarseIds = GlobalOrdinalVectorFactory::Build(A.getColMap());
1054 c_coarseIds->doImport(*d_coarseIds, Importer, Xpetra::INSERT);
1055
1056 // If we assume that A is in Aztec ordering, then any subset of A's unknowns will
1057 // be in Aztec ordering as well, which means we can just condense these guys down
1058 // Overallocate, count and view
1059 ArrayRCP<GO> c_data = c_coarseIds->getDataNonConst(0);
1060
1061 Array<GO> c_gids(c_data.size());
1062 LO count = 0;
1063
1064 for (LO i = 0; i < (LO)c_data.size(); i++) {
1065 if (c_data[i] != GO_INVALID) {
1066 c_gids[count] = c_data[i];
1067 count++;
1068 }
1069 }
1070 // FIXME: Assumes scalar PDE
1071 std::vector<size_t> stridingInfo_(1);
1072 stridingInfo_[0] = 1;
1073 GO domainGIDOffset = 0;
1074
1075 coarseColMap = StridedMapFactory::Build(coarseMap->lib(),
1076 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
1077 c_gids.view(0, count),
1078 coarseMap->getIndexBase(),
1079 stridingInfo_,
1080 coarseMap->getComm(),
1081 domainGIDOffset);
1082}
1083
1084} // namespace MueLu
1085
1086#define MUELU_CLASSICALPFACTORY_SHORT
1087#endif // MUELU_CLASSICALPFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void Coarsen_ClassicalModified(const Matrix &A, const RCP< const Matrix > &Aghost, const LWGraph &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< const Import > remoteOnlyImporter, RCP< Matrix > &P) const
void Coarsen_Ext_Plus_I(const Matrix &A, const RCP< const Matrix > &Aghost, const LWGraph &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< Matrix > &P) const
void Coarsen_Direct(const Matrix &A, const RCP< const Matrix > &Aghost, const LWGraph &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< Matrix > &P) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
typename ClassicalMapFactory::point_type point_type
void GenerateStrengthFlags(const Matrix &A, const LWGraph &graph, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong) const
void GhostCoarseMap(const Matrix &A, const Import &Importer, const ArrayRCP< const LO > myPointType, const RCP< const Map > &coarseMap, RCP< const Map > &coarseColMap) const
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const
Return number of graph edges.
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex 'v'.
Lightweight MueLu representation of a compressed row storage graph.
Class that holds all level-specific information.
int GetLevelID() const
Return level number.
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
Namespace for MueLu classes and methods.
@ Debug
Print additional debugging information.
@ Statistics1
Print more statistics.