MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_TentativePFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_TENTATIVEPFACTORY_DEF_HPP
11#define MUELU_TENTATIVEPFACTORY_DEF_HPP
12
13#include <Xpetra_MapFactory.hpp>
14#include <Xpetra_Map.hpp>
15#include <Xpetra_CrsMatrix.hpp>
16#include <Xpetra_CrsGraphFactory.hpp>
17#include <Xpetra_Matrix.hpp>
18#include <Xpetra_MatrixMatrix.hpp>
19#include <Xpetra_MultiVector.hpp>
20#include <Xpetra_MultiVectorFactory.hpp>
21#include <Xpetra_VectorFactory.hpp>
22#include <Xpetra_Import.hpp>
23#include <Xpetra_ImportFactory.hpp>
24#include <Xpetra_CrsMatrixWrap.hpp>
25#include <Xpetra_StridedMap.hpp>
26#include <Xpetra_StridedMapFactory.hpp>
27#include <Xpetra_IO.hpp>
28
29#include "MueLu_KeepType.hpp"
30#include "Xpetra_TpetraBlockCrsMatrix.hpp"
31
33
34#include "MueLu_Aggregates.hpp"
35#include "MueLu_AmalgamationInfo.hpp"
36#include "MueLu_MasterList.hpp"
37#include "MueLu_Monitor.hpp"
38#include "MueLu_PerfUtils.hpp"
39#include "MueLu_Utilities.hpp"
40
41namespace MueLu {
42
43template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
45 RCP<ParameterList> validParamList = rcp(new ParameterList());
46
47#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
48 SET_VALID_ENTRY("tentative: calculate qr");
49 SET_VALID_ENTRY("tentative: build coarse coordinates");
50 SET_VALID_ENTRY("tentative: constant column sums");
51 SET_VALID_ENTRY("sa: keep tentative prolongator");
52#undef SET_VALID_ENTRY
53 validParamList->set<std::string>("Nullspace name", "Nullspace", "Name for the input nullspace");
54
55 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
56 validParamList->set<RCP<const FactoryBase> >("Aggregates", Teuchos::null, "Generating factory of the aggregates");
57 validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
58 validParamList->set<RCP<const FactoryBase> >("Scaled Nullspace", Teuchos::null, "Generating factory of the scaled nullspace");
59 validParamList->set<RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
60 validParamList->set<RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the coarse map");
61 validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory of the coordinates");
62 validParamList->set<RCP<const FactoryBase> >("Node Comm", Teuchos::null, "Generating factory of the node level communicator");
63
64 // Make sure we don't recursively validate options for the matrixmatrix kernels
65 ParameterList norecurse;
66 norecurse.disableRecursiveValidation();
67 validParamList->set<ParameterList>("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
68
69 return validParamList;
70}
71
72template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
74 const ParameterList& pL = GetParameterList();
75 // NOTE: This guy can only either be 'Nullspace' or 'Scaled Nullspace' or else the validator above will cause issues
76 std::string nspName = "Nullspace";
77 if (pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
78
79 Input(fineLevel, "A");
80 Input(fineLevel, "Aggregates");
81 Input(fineLevel, nspName);
82 Input(fineLevel, "UnAmalgamationInfo");
83 Input(fineLevel, "CoarseMap");
84 if (fineLevel.GetLevelID() == 0 &&
85 fineLevel.IsAvailable("Coordinates", NoFactory::get()) && // we have coordinates (provided by user app)
86 pL.get<bool>("tentative: build coarse coordinates")) { // and we want coordinates on other levels
87 bTransferCoordinates_ = true; // then set the transfer coordinates flag to true
88 Input(fineLevel, "Coordinates");
89 } else if (bTransferCoordinates_) {
90 Input(fineLevel, "Coordinates");
91 }
92}
93
94template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
96 return BuildP(fineLevel, coarseLevel);
97}
98
99template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
101 FactoryMonitor m(*this, "Build", coarseLevel);
102 typedef typename Teuchos::ScalarTraits<Scalar>::coordinateType coordinate_type;
103 typedef Xpetra::MultiVector<coordinate_type, LO, GO, NO> RealValuedMultiVector;
104 typedef Xpetra::MultiVectorFactory<coordinate_type, LO, GO, NO> RealValuedMultiVectorFactory;
105
106 const ParameterList& pL = GetParameterList();
107 std::string nspName = "Nullspace";
108 if (pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
109
110 RCP<Matrix> Ptentative;
111 RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
112 RCP<Aggregates> aggregates = Get<RCP<Aggregates> >(fineLevel, "Aggregates");
113 // No coarse DoFs so we need to bail by setting Ptentattive to null and returning
114 // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
115 if (aggregates->GetNumGlobalAggregatesComputeIfNeeded() == 0) {
116 Ptentative = Teuchos::null;
117 Set(coarseLevel, "P", Ptentative);
118 return;
119 }
120
121 RCP<AmalgamationInfo> amalgInfo = Get<RCP<AmalgamationInfo> >(fineLevel, "UnAmalgamationInfo");
122 RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, nspName);
123 RCP<const Map> coarseMap = Get<RCP<const Map> >(fineLevel, "CoarseMap");
124 RCP<RealValuedMultiVector> fineCoords;
125 if (bTransferCoordinates_) {
126 fineCoords = Get<RCP<RealValuedMultiVector> >(fineLevel, "Coordinates");
127 }
128
129 // FIXME: We should remove the NodeComm on levels past the threshold
130 if (fineLevel.IsAvailable("Node Comm")) {
131 RCP<const Teuchos::Comm<int> > nodeComm = Get<RCP<const Teuchos::Comm<int> > >(fineLevel, "Node Comm");
132 Set<RCP<const Teuchos::Comm<int> > >(coarseLevel, "Node Comm", nodeComm);
133 }
134
135 // NOTE: We check DomainMap here rather than RowMap because those are different for BlockCrs matrices
136 TEUCHOS_TEST_FOR_EXCEPTION(A->getDomainMap()->getLocalNumElements() != fineNullspace->getMap()->getLocalNumElements(),
137 Exceptions::RuntimeError, "MueLu::TentativePFactory::MakeTentative: Size mismatch between A and Nullspace");
138
139 RCP<MultiVector> coarseNullspace;
140 RCP<RealValuedMultiVector> coarseCoords;
141
142 if (bTransferCoordinates_) {
143 //*** Create the coarse coordinates ***
144 // First create the coarse map and coarse multivector
145 ArrayView<const GO> elementAList = coarseMap->getLocalElementList();
146 LO blkSize = 1;
147 if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null) {
148 blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize();
149 }
150 GO indexBase = coarseMap->getIndexBase();
151 LO numCoarseNodes = Teuchos::as<LO>(elementAList.size() / blkSize);
152 Array<GO> nodeList(numCoarseNodes);
153 const int numDimensions = fineCoords->getNumVectors();
154
155 for (LO i = 0; i < numCoarseNodes; i++) {
156 nodeList[i] = (elementAList[i * blkSize] - indexBase) / blkSize + indexBase;
157 }
158 Teuchos::RCP<Teuchos::ParameterList> params = Teuchos::rcp(new Teuchos::ParameterList());
159 params->set("compute global constants", false);
160 RCP<const Map> coarseCoordsMap = MapFactory::Build(fineCoords->getMap()->lib(),
161 blkSize * coarseMap->getGlobalNumElements(),
162 nodeList,
163 indexBase,
164 fineCoords->getMap()->getComm(),
165 params);
166 coarseCoords = RealValuedMultiVectorFactory::Build(coarseCoordsMap, numDimensions);
167
168 // Create overlapped fine coordinates to reduce global communication
169 RCP<RealValuedMultiVector> ghostedCoords;
170 if (aggregates->AggregatesCrossProcessors()) {
171 RCP<const Map> aggMap = aggregates->GetMap();
172 RCP<const Import> importer = ImportFactory::Build(fineCoords->getMap(), aggMap);
173
174 ghostedCoords = RealValuedMultiVectorFactory::Build(aggMap, numDimensions);
175 ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);
176 } else {
177 ghostedCoords = fineCoords;
178 }
179
180 // Get some info about aggregates
181 int myPID = coarseCoordsMap->getComm()->getRank();
182 LO numAggs = aggregates->GetNumAggregates();
183 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
184 const ArrayRCP<const LO> vertex2AggID = aggregates->GetVertex2AggId()->getData(0);
185 const ArrayRCP<const LO> procWinner = aggregates->GetProcWinner()->getData(0);
186
187 // Fill in coarse coordinates
188 for (int dim = 0; dim < numDimensions; ++dim) {
189 ArrayRCP<const coordinate_type> fineCoordsData = ghostedCoords->getData(dim);
190 ArrayRCP<coordinate_type> coarseCoordsData = coarseCoords->getDataNonConst(dim);
191
192 for (LO lnode = 0; lnode < Teuchos::as<LO>(vertex2AggID.size()); lnode++) {
193 if (procWinner[lnode] == myPID &&
194 lnode < fineCoordsData.size() &&
195 vertex2AggID[lnode] < coarseCoordsData.size() &&
196 Teuchos::ScalarTraits<coordinate_type>::isnaninf(fineCoordsData[lnode]) == false) {
197 coarseCoordsData[vertex2AggID[lnode]] += fineCoordsData[lnode];
198 }
199 }
200 for (LO agg = 0; agg < numAggs; agg++) {
201 coarseCoordsData[agg] /= aggSizes[agg];
202 }
203 }
204 }
205
206 if (!aggregates->AggregatesCrossProcessors()) {
207 if (Xpetra::Helpers<SC, LO, GO, NO>::isTpetraBlockCrs(A)) {
208 BuildPuncoupledBlockCrs(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.GetLevelID());
209 } else {
210 BuildPuncoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.GetLevelID());
211 }
212 } else
213 BuildPcoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
214
215 // If available, use striding information of fine level matrix A for range
216 // map and coarseMap as domain map; otherwise use plain range map of
217 // Ptent = plain range map of A for range map and coarseMap as domain map.
218 // NOTE:
219 // The latter is not really safe, since there is no striding information
220 // for the range map. This is not really a problem, since striding
221 // information is always available on the intermedium levels and the
222 // coarsest levels.
223 if (A->IsView("stridedMaps") == true)
224 Ptentative->CreateView("stridedMaps", A->getRowMap("stridedMaps"), coarseMap);
225
226 if (bTransferCoordinates_) {
227 Set(coarseLevel, "Coordinates", coarseCoords);
228 }
229 Set(coarseLevel, "Nullspace", coarseNullspace);
230 Set(coarseLevel, "P", Ptentative);
231
232 if (pL.get<bool>("sa: keep tentative prolongator")) {
233 coarseLevel.Set("Ptent", Ptentative, NoFactory::get());
234 coarseLevel.AddKeepFlag("Ptent", NoFactory::get(), MueLu::Final);
235 }
236
237 if (IsPrint(Statistics2)) {
238 RCP<ParameterList> params = rcp(new ParameterList());
239 params->set("printLoadBalancingInfo", true);
240 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ptentative, "Ptent", params);
241 }
242}
243
244template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
246 BuildPuncoupledBlockCrs(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
247 RCP<const Map> coarsePointMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
248 /* This routine generates a BlockCrs P for a BlockCrs A. There are a few assumptions here, which meet the use cases we care about, but could
249 be generalized later, if we ever need to do so:
250 1) Null space dimension === block size of matrix: So no elasticity right now
251 2) QR is not supported: Under assumption #1, this shouldn't cause problems.
252 3) Maps are "good": Aka the first chunk of the ColMap is the RowMap.
253
254 These assumptions keep our code way simpler and still support the use cases we actually care about.
255 */
256
257 RCP<const Map> rowMap = A->getRowMap();
258 RCP<const Map> rangeMap = A->getRangeMap();
259 RCP<const Map> colMap = A->getColMap();
260 // const size_t numFinePointRows = rangeMap->getLocalNumElements();
261 const size_t numFineBlockRows = rowMap->getLocalNumElements();
262
263 typedef Teuchos::ScalarTraits<SC> STS;
264 // typedef typename STS::magnitudeType Magnitude;
265 const SC zero = STS::zero();
266 const SC one = STS::one();
267 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
268
269 const GO numAggs = aggregates->GetNumAggregates();
270 const size_t NSDim = fineNullspace->getNumVectors();
271 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
272
273 // Need to generate the coarse block map
274 // NOTE: We assume NSDim == block size here
275 // NOTE: We also assume that coarseMap has contiguous GIDs
276 // const size_t numCoarsePointRows = coarsePointMap->getLocalNumElements();
277 const size_t numCoarseBlockRows = coarsePointMap->getLocalNumElements() / NSDim;
278 RCP<const Map> coarseBlockMap = MapFactory::Build(coarsePointMap->lib(),
279 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
280 numCoarseBlockRows,
281 coarsePointMap->getIndexBase(),
282 coarsePointMap->getComm());
283 // Sanity checking
284 const ParameterList& pL = GetParameterList();
285 const bool& doQRStep = pL.get<bool>("tentative: calculate qr");
286 const bool& constantColSums = pL.get<bool>("tentative: constant column sums");
287
288 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums, Exceptions::RuntimeError,
289 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
290
291 // The aggregates use the amalgamated column map, which in this case is what we want
292
293 // Aggregates map is based on the amalgamated column map
294 // We can skip global-to-local conversion if LIDs in row map are
295 // same as LIDs in column map
296 bool goodMap = MueLu::Utilities<SC, LO, GO, NO>::MapsAreNested(*rowMap, *colMap);
297
298 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
299 // aggStart is a pointer into aggToRowMapLO
300 // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
301 // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
302 ArrayRCP<LO> aggStart;
303 ArrayRCP<LO> aggToRowMapLO;
304 ArrayRCP<GO> aggToRowMapGO;
305 if (goodMap) {
306 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
307 GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
308 } else {
309 throw std::runtime_error("TentativePFactory::PuncoupledBlockCrs: Inconsistent maps not currently supported");
310 }
311
312 coarseNullspace = MultiVectorFactory::Build(coarsePointMap, NSDim);
313
314 // Pull out the nullspace vectors so that we can have random access.
315 ArrayRCP<ArrayRCP<const SC> > fineNS(NSDim);
316 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
317 for (size_t i = 0; i < NSDim; i++) {
318 fineNS[i] = fineNullspace->getData(i);
319 if (coarsePointMap->getLocalNumElements() > 0)
320 coarseNS[i] = coarseNullspace->getDataNonConst(i);
321 }
322
323 // BlockCrs requires that we build the (block) graph first, so let's do that...
324 // NOTE: Because we're assuming that the NSDim == BlockSize, we only have one
325 // block non-zero per row in the matrix;
326 RCP<CrsGraph> BlockGraph = CrsGraphFactory::Build(rowMap, coarseBlockMap, 0);
327 ArrayRCP<size_t> iaPtent;
328 ArrayRCP<LO> jaPtent;
329 BlockGraph->allocateAllIndices(numFineBlockRows, iaPtent, jaPtent);
330 ArrayView<size_t> ia = iaPtent();
331 ArrayView<LO> ja = jaPtent();
332
333 for (size_t i = 0; i < numFineBlockRows; i++) {
334 ia[i] = i;
335 ja[i] = INVALID;
336 }
337 ia[numCoarseBlockRows] = numCoarseBlockRows;
338
339 for (GO agg = 0; agg < numAggs; agg++) {
340 LO aggSize = aggStart[agg + 1] - aggStart[agg];
341 Xpetra::global_size_t offset = agg;
342
343 for (LO j = 0; j < aggSize; j++) {
344 // FIXME: Allow for bad maps
345 const LO localRow = aggToRowMapLO[aggStart[agg] + j];
346 const size_t rowStart = ia[localRow];
347 ja[rowStart] = offset;
348 }
349 }
350
351 // Compress storage (remove all INVALID, which happen when we skip zeros)
352 // We do that in-place
353 size_t ia_tmp = 0, nnz = 0;
354 for (size_t i = 0; i < numFineBlockRows; i++) {
355 for (size_t j = ia_tmp; j < ia[i + 1]; j++)
356 if (ja[j] != INVALID) {
357 ja[nnz] = ja[j];
358 nnz++;
359 }
360 ia_tmp = ia[i + 1];
361 ia[i + 1] = nnz;
362 }
363
364 if (rowMap->lib() == Xpetra::UseTpetra) {
365 // - Cannot resize for Epetra, as it checks for same pointers
366 // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
367 // NOTE: these invalidate ja and val views
368 jaPtent.resize(nnz);
369 }
370
371 GetOStream(Runtime1) << "TentativePFactory : generating block graph" << std::endl;
372 BlockGraph->setAllIndices(iaPtent, jaPtent);
373
374 // Managing labels & constants for ESFC
375 {
376 RCP<ParameterList> FCparams;
377 if (pL.isSublist("matrixmatrix: kernel params"))
378 FCparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
379 else
380 FCparams = rcp(new ParameterList);
381 // By default, we don't need global constants for TentativeP, but we do want it for the graph
382 // if we're printing statistics, so let's leave it on for now.
383 FCparams->set("compute global constants", FCparams->get("compute global constants", true));
384 std::string levelIDs = toString(levelID);
385 FCparams->set("Timer Label", std::string("MueLu::TentativeP-") + levelIDs);
386 RCP<const Export> dummy_e;
387 RCP<const Import> dummy_i;
388 BlockGraph->expertStaticFillComplete(coarseBlockMap, rowMap, dummy_i, dummy_e, FCparams);
389 }
390
391 // Now let's make a BlockCrs Matrix
392 // NOTE: Assumes block size== NSDim
393 RCP<Xpetra::CrsMatrix<SC, LO, GO, NO> > P_xpetra = Xpetra::CrsMatrixFactory<SC, LO, GO, NO>::BuildBlock(BlockGraph, coarsePointMap, rangeMap, NSDim);
394 RCP<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO> > P_tpetra = rcp_dynamic_cast<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO> >(P_xpetra);
395 if (P_tpetra.is_null()) throw std::runtime_error("BuildPUncoupled: Matrix factory did not return a Tpetra::BlockCrsMatrix");
396 RCP<CrsMatrixWrap> P_wrap = rcp(new CrsMatrixWrap(P_xpetra));
397
399 // "no-QR" option //
401 // Local Q factor is just the fine nullspace support over the current aggregate.
402 // Local R factor is the identity.
403 // NOTE: We're not going to do a QR here as we're assuming that blocksize == NSDim
404 // NOTE: "goodMap" case only
405 Teuchos::Array<Scalar> block(NSDim * NSDim, zero);
406 Teuchos::Array<LO> bcol(1);
407
408 GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
409 for (LO agg = 0; agg < numAggs; agg++) {
410 bcol[0] = agg;
411 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
412 Xpetra::global_size_t offset = agg * NSDim;
413
414 // Process each row in the local Q factor
415 // NOTE: Blocks are in row-major order
416 for (LO j = 0; j < aggSize; j++) {
417 const LO localBlockRow = aggToRowMapLO[aggStart[agg] + j];
418
419 for (size_t r = 0; r < NSDim; r++) {
420 LO localPointRow = localBlockRow * NSDim + r;
421 for (size_t c = 0; c < NSDim; c++)
422 block[r * NSDim + c] = fineNS[c][localPointRow];
423 }
424 // NOTE: Assumes columns==aggs and are ordered sequentially
425 P_tpetra->replaceLocalValues(localBlockRow, bcol(), block());
426
427 } // end aggSize
428
429 for (size_t j = 0; j < NSDim; j++)
430 coarseNS[j][offset + j] = one;
431
432 } // for (GO agg = 0; agg < numAggs; agg++)
433
434 Ptentative = P_wrap;
435}
436
437template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
439 BuildPcoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
440 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace) const {
441 typedef Teuchos::ScalarTraits<SC> STS;
442 typedef typename STS::magnitudeType Magnitude;
443 const SC zero = STS::zero();
444 const SC one = STS::one();
445
446 // number of aggregates
447 GO numAggs = aggregates->GetNumAggregates();
448
449 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
450 // aggStart is a pointer into aggToRowMap
451 // aggStart[i]..aggStart[i+1] are indices into aggToRowMap
452 // aggToRowMap[aggStart[i]]..aggToRowMap[aggStart[i+1]-1] are the DOFs in aggregate i
453 ArrayRCP<LO> aggStart;
454 ArrayRCP<GO> aggToRowMap;
455 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMap);
456
457 // find size of the largest aggregate
458 LO maxAggSize = 0;
459 for (GO i = 0; i < numAggs; ++i) {
460 LO sizeOfThisAgg = aggStart[i + 1] - aggStart[i];
461 if (sizeOfThisAgg > maxAggSize) maxAggSize = sizeOfThisAgg;
462 }
463
464 // dimension of fine level nullspace
465 const size_t NSDim = fineNullspace->getNumVectors();
466
467 // index base for coarse Dof map (usually 0)
468 GO indexBase = A->getRowMap()->getIndexBase();
469
470 const RCP<const Map> nonUniqueMap = amalgInfo->ComputeUnamalgamatedImportDofMap(*aggregates);
471 const RCP<const Map> uniqueMap = A->getDomainMap();
472 RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
473 RCP<MultiVector> fineNullspaceWithOverlap = MultiVectorFactory::Build(nonUniqueMap, NSDim);
474 fineNullspaceWithOverlap->doImport(*fineNullspace, *importer, Xpetra::INSERT);
475
476 // Pull out the nullspace vectors so that we can have random access.
477 ArrayRCP<ArrayRCP<const SC> > fineNS(NSDim);
478 for (size_t i = 0; i < NSDim; ++i)
479 fineNS[i] = fineNullspaceWithOverlap->getData(i);
480
481 // Allocate storage for the coarse nullspace.
482 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
483
484 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
485 for (size_t i = 0; i < NSDim; ++i)
486 if (coarseMap->getLocalNumElements() > 0) coarseNS[i] = coarseNullspace->getDataNonConst(i);
487
488 // This makes the rowmap of Ptent the same as that of A->
489 // This requires moving some parts of some local Q's to other processors
490 // because aggregates can span processors.
491 RCP<const Map> rowMapForPtent = A->getRowMap();
492 const Map& rowMapForPtentRef = *rowMapForPtent;
493
494 // Set up storage for the rows of the local Qs that belong to other processors.
495 // FIXME This is inefficient and could be done within the main loop below with std::vector's.
496 RCP<const Map> colMap = A->getColMap();
497
498 RCP<const Map> ghostQMap;
499 RCP<MultiVector> ghostQvalues;
500 Array<RCP<Xpetra::Vector<GO, LO, GO, Node> > > ghostQcolumns;
501 RCP<Xpetra::Vector<GO, LO, GO, Node> > ghostQrowNums;
502 ArrayRCP<ArrayRCP<SC> > ghostQvals;
503 ArrayRCP<ArrayRCP<GO> > ghostQcols;
504 ArrayRCP<GO> ghostQrows;
505
506 Array<GO> ghostGIDs;
507 for (LO j = 0; j < numAggs; ++j) {
508 for (LO k = aggStart[j]; k < aggStart[j + 1]; ++k) {
509 if (rowMapForPtentRef.isNodeGlobalElement(aggToRowMap[k]) == false) {
510 ghostGIDs.push_back(aggToRowMap[k]);
511 }
512 }
513 }
514 ghostQMap = MapFactory::Build(A->getRowMap()->lib(),
515 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
516 ghostGIDs,
517 indexBase, A->getRowMap()->getComm()); // JG:Xpetra::global_size_t>?
518 // Vector to hold bits of Q that go to other processors.
519 ghostQvalues = MultiVectorFactory::Build(ghostQMap, NSDim);
520 // Note that Epetra does not support MultiVectors templated on Scalar != double.
521 // So to work around this, we allocate an array of Vectors. This shouldn't be too
522 // expensive, as the number of Vectors is NSDim.
523 ghostQcolumns.resize(NSDim);
524 for (size_t i = 0; i < NSDim; ++i)
525 ghostQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
526 ghostQrowNums = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
527 if (ghostQvalues->getLocalLength() > 0) {
528 ghostQvals.resize(NSDim);
529 ghostQcols.resize(NSDim);
530 for (size_t i = 0; i < NSDim; ++i) {
531 ghostQvals[i] = ghostQvalues->getDataNonConst(i);
532 ghostQcols[i] = ghostQcolumns[i]->getDataNonConst(0);
533 }
534 ghostQrows = ghostQrowNums->getDataNonConst(0);
535 }
536
537 // importer to handle moving Q
538 importer = ImportFactory::Build(ghostQMap, A->getRowMap());
539
540 // Dense QR solver
541 Teuchos::SerialQRDenseSolver<LO, SC> qrSolver;
542
543 // Allocate temporary storage for the tentative prolongator.
544 Array<GO> globalColPtr(maxAggSize * NSDim, 0);
545 Array<LO> localColPtr(maxAggSize * NSDim, 0);
546 Array<SC> valPtr(maxAggSize * NSDim, 0.);
547
548 // Create column map for Ptent, estimate local #nonzeros in Ptent, and create Ptent itself.
549 const Map& coarseMapRef = *coarseMap;
550
551 // For the 3-arrays constructor
552 ArrayRCP<size_t> ptent_rowptr;
553 ArrayRCP<LO> ptent_colind;
554 ArrayRCP<Scalar> ptent_values;
555
556 // Because ArrayRCPs are slow...
557 ArrayView<size_t> rowptr_v;
558 ArrayView<LO> colind_v;
559 ArrayView<Scalar> values_v;
560
561 // For temporary usage
562 Array<size_t> rowptr_temp;
563 Array<LO> colind_temp;
564 Array<Scalar> values_temp;
565
566 RCP<CrsMatrix> PtentCrs;
567
568 RCP<CrsMatrixWrap> PtentCrsWrap = rcp(new CrsMatrixWrap(rowMapForPtent, NSDim));
569 PtentCrs = PtentCrsWrap->getCrsMatrix();
570 Ptentative = PtentCrsWrap;
571
572 //*****************************************************************
573 // Loop over all aggregates and calculate local QR decompositions.
574 //*****************************************************************
575 GO qctr = 0; // for indexing into Ptent data vectors
576 const Map& nonUniqueMapRef = *nonUniqueMap;
577
578 size_t total_nnz_count = 0;
579
580 for (GO agg = 0; agg < numAggs; ++agg) {
581 LO myAggSize = aggStart[agg + 1] - aggStart[agg];
582 // For each aggregate, extract the corresponding piece of the nullspace and put it in the flat array,
583 // "localQR" (in column major format) for the QR routine.
584 Teuchos::SerialDenseMatrix<LO, SC> localQR(myAggSize, NSDim);
585 for (size_t j = 0; j < NSDim; ++j) {
586 bool bIsZeroNSColumn = true;
587 for (LO k = 0; k < myAggSize; ++k) {
588 // aggToRowMap[aggPtr[i]+k] is the kth DOF in the ith aggregate
589 // fineNS[j][n] is the nth entry in the jth NS vector
590 try {
591 SC nsVal = fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])]; // extract information from fine level NS
592 localQR(k, j) = nsVal;
593 if (nsVal != zero) bIsZeroNSColumn = false;
594 } catch (...) {
595 GetOStream(Runtime1, -1) << "length of fine level nsp: " << fineNullspace->getGlobalLength() << std::endl;
596 GetOStream(Runtime1, -1) << "length of fine level nsp w overlap: " << fineNullspaceWithOverlap->getGlobalLength() << std::endl;
597 GetOStream(Runtime1, -1) << "(local?) aggId=" << agg << std::endl;
598 GetOStream(Runtime1, -1) << "aggSize=" << myAggSize << std::endl;
599 GetOStream(Runtime1, -1) << "agg DOF=" << k << std::endl;
600 GetOStream(Runtime1, -1) << "NS vector j=" << j << std::endl;
601 GetOStream(Runtime1, -1) << "j*myAggSize + k = " << j * myAggSize + k << std::endl;
602 GetOStream(Runtime1, -1) << "aggToRowMap[" << agg << "][" << k << "] = " << aggToRowMap[aggStart[agg] + k] << std::endl;
603 GetOStream(Runtime1, -1) << "id aggToRowMap[agg][k]=" << aggToRowMap[aggStart[agg] + k] << " is global element in nonUniqueMap = " << nonUniqueMapRef.isNodeGlobalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
604 GetOStream(Runtime1, -1) << "colMap local id aggToRowMap[agg][k]=" << nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
605 GetOStream(Runtime1, -1) << "fineNS...=" << fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])] << std::endl;
606 GetOStream(Errors, -1) << "caught an error!" << std::endl;
607 }
608 } // for (LO k=0 ...
609 TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError, "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column. Error.");
610 } // for (LO j=0 ...
611
612 Xpetra::global_size_t offset = agg * NSDim;
613
614 if (myAggSize >= Teuchos::as<LocalOrdinal>(NSDim)) {
615 // calculate QR decomposition (standard)
616 // R is stored in localQR (size: myAggSize x NSDim)
617
618 // Householder multiplier
619 SC tau = localQR(0, 0);
620
621 if (NSDim == 1) {
622 // Only one nullspace vector, so normalize by hand
623 Magnitude dtemp = 0;
624 for (size_t k = 0; k < Teuchos::as<size_t>(myAggSize); ++k) {
625 Magnitude tmag = STS::magnitude(localQR(k, 0));
626 dtemp += tmag * tmag;
627 }
628 dtemp = Teuchos::ScalarTraits<Magnitude>::squareroot(dtemp);
629 tau = localQR(0, 0);
630 localQR(0, 0) = dtemp;
631 } else {
632 qrSolver.setMatrix(Teuchos::rcp(&localQR, false));
633 qrSolver.factor();
634 }
635
636 // Extract R, the coarse nullspace. This is stored in upper triangular part of localQR.
637 // Note: coarseNS[i][.] is the ith coarse nullspace vector, which may be counter to your intuition.
638 // This stores the (offset+k)th entry only if it is local according to the coarseMap.
639 for (size_t j = 0; j < NSDim; ++j) {
640 for (size_t k = 0; k <= j; ++k) {
641 try {
642 if (coarseMapRef.isNodeLocalElement(offset + k)) {
643 coarseNS[j][offset + k] = localQR(k, j); // TODO is offset+k the correct local ID?!
644 }
645 } catch (...) {
646 GetOStream(Errors, -1) << "caught error in coarseNS insert, j=" << j << ", offset+k = " << offset + k << std::endl;
647 }
648 }
649 }
650
651 // Calculate Q, the tentative prolongator.
652 // The Lapack GEQRF call only works for myAggsize >= NSDim
653
654 if (NSDim == 1) {
655 // Only one nullspace vector, so calculate Q by hand
656 Magnitude dtemp = Teuchos::ScalarTraits<SC>::magnitude(localQR(0, 0));
657 localQR(0, 0) = tau;
658 dtemp = 1 / dtemp;
659 for (LocalOrdinal i = 0; i < myAggSize; ++i) {
660 localQR(i, 0) *= dtemp;
661 }
662 } else {
663 qrSolver.formQ();
664 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO, SC> > qFactor = qrSolver.getQ();
665 for (size_t j = 0; j < NSDim; j++) {
666 for (size_t i = 0; i < Teuchos::as<size_t>(myAggSize); i++) {
667 localQR(i, j) = (*qFactor)(i, j);
668 }
669 }
670 }
671
672 // end default case (myAggSize >= NSDim)
673 } else { // special handling for myAggSize < NSDim (i.e. 1pt nodes)
674 // See comments for the uncoupled case
675
676 // R = extended (by adding identity rows) localQR
677 for (size_t j = 0; j < NSDim; j++)
678 for (size_t k = 0; k < NSDim; k++) {
679 TEUCHOS_TEST_FOR_EXCEPTION(!coarseMapRef.isNodeLocalElement(offset + k), Exceptions::RuntimeError,
680 "Caught error in coarseNS insert, j=" << j << ", offset+k = " << offset + k);
681
682 if (k < as<size_t>(myAggSize))
683 coarseNS[j][offset + k] = localQR(k, j);
684 else
685 coarseNS[j][offset + k] = (k == j ? one : zero);
686 }
687
688 // Q = I (rectangular)
689 for (size_t i = 0; i < as<size_t>(myAggSize); i++)
690 for (size_t j = 0; j < NSDim; j++)
691 localQR(i, j) = (j == i ? one : zero);
692 } // end else (special handling for 1pt aggregates)
693
694 // Process each row in the local Q factor. If the row is local to the current processor
695 // according to the rowmap, insert it into Ptentative. Otherwise, save it in ghostQ
696 // to be communicated later to the owning processor.
697 // FIXME -- what happens if maps are blocked?
698 for (GO j = 0; j < myAggSize; ++j) {
699 // This loop checks whether row associated with current DOF is local, according to rowMapForPtent.
700 // If it is, the row is inserted. If not, the row number, columns, and values are saved in
701 // MultiVectors that will be sent to other processors.
702 GO globalRow = aggToRowMap[aggStart[agg] + j];
703
704 // TODO is the use of Xpetra::global_size_t below correct?
705 if (rowMapForPtentRef.isNodeGlobalElement(globalRow) == false) {
706 ghostQrows[qctr] = globalRow;
707 for (size_t k = 0; k < NSDim; ++k) {
708 ghostQcols[k][qctr] = coarseMapRef.getGlobalElement(agg * NSDim + k);
709 ghostQvals[k][qctr] = localQR(j, k);
710 }
711 ++qctr;
712 } else {
713 size_t nnz = 0;
714 for (size_t k = 0; k < NSDim; ++k) {
715 try {
716 if (localQR(j, k) != Teuchos::ScalarTraits<SC>::zero()) {
717 localColPtr[nnz] = agg * NSDim + k;
718 globalColPtr[nnz] = coarseMapRef.getGlobalElement(localColPtr[nnz]);
719 valPtr[nnz] = localQR(j, k);
720 ++total_nnz_count;
721 ++nnz;
722 }
723 } catch (...) {
724 GetOStream(Errors, -1) << "caught error in colPtr/valPtr insert, current index=" << nnz << std::endl;
725 }
726 } // for (size_t k=0; k<NSDim; ++k)
727
728 try {
729 Ptentative->insertGlobalValues(globalRow, globalColPtr.view(0, nnz), valPtr.view(0, nnz));
730 } catch (...) {
731 GetOStream(Errors, -1) << "pid " << A->getRowMap()->getComm()->getRank()
732 << "caught error during Ptent row insertion, global row "
733 << globalRow << std::endl;
734 }
735 }
736 } // for (GO j=0; j<myAggSize; ++j)
737
738 } // for (LO agg=0; agg<numAggs; ++agg)
739
740 // ***********************************************************
741 // ************* end of aggregate-wise QR ********************
742 // ***********************************************************
743 GetOStream(Runtime1) << "TentativePFactory : aggregates may cross process boundaries" << std::endl;
744 // Import ghost parts of Q factors and insert into Ptentative.
745 // First import just the global row numbers.
746 RCP<Xpetra::Vector<GO, LO, GO, Node> > targetQrowNums = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(rowMapForPtent);
747 targetQrowNums->putScalar(-1);
748 targetQrowNums->doImport(*ghostQrowNums, *importer, Xpetra::INSERT);
749 ArrayRCP<GO> targetQrows = targetQrowNums->getDataNonConst(0);
750
751 // Now create map based on just the row numbers imported.
752 Array<GO> gidsToImport;
753 gidsToImport.reserve(targetQrows.size());
754 for (typename ArrayRCP<GO>::iterator r = targetQrows.begin(); r != targetQrows.end(); ++r) {
755 if (*r > -1) {
756 gidsToImport.push_back(*r);
757 }
758 }
759 RCP<const Map> reducedMap = MapFactory::Build(A->getRowMap()->lib(),
760 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
761 gidsToImport, indexBase, A->getRowMap()->getComm());
762
763 // Import using the row numbers that this processor will receive.
764 importer = ImportFactory::Build(ghostQMap, reducedMap);
765
766 Array<RCP<Xpetra::Vector<GO, LO, GO, Node> > > targetQcolumns(NSDim);
767 for (size_t i = 0; i < NSDim; ++i) {
768 targetQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(reducedMap);
769 targetQcolumns[i]->doImport(*(ghostQcolumns[i]), *importer, Xpetra::INSERT);
770 }
771 RCP<MultiVector> targetQvalues = MultiVectorFactory::Build(reducedMap, NSDim);
772 targetQvalues->doImport(*ghostQvalues, *importer, Xpetra::INSERT);
773
774 ArrayRCP<ArrayRCP<SC> > targetQvals;
775 ArrayRCP<ArrayRCP<GO> > targetQcols;
776 if (targetQvalues->getLocalLength() > 0) {
777 targetQvals.resize(NSDim);
778 targetQcols.resize(NSDim);
779 for (size_t i = 0; i < NSDim; ++i) {
780 targetQvals[i] = targetQvalues->getDataNonConst(i);
781 targetQcols[i] = targetQcolumns[i]->getDataNonConst(0);
782 }
783 }
784
785 valPtr = Array<SC>(NSDim, 0.);
786 globalColPtr = Array<GO>(NSDim, 0);
787 for (typename Array<GO>::iterator r = gidsToImport.begin(); r != gidsToImport.end(); ++r) {
788 if (targetQvalues->getLocalLength() > 0) {
789 for (size_t j = 0; j < NSDim; ++j) {
790 valPtr[j] = targetQvals[j][reducedMap->getLocalElement(*r)];
791 globalColPtr[j] = targetQcols[j][reducedMap->getLocalElement(*r)];
792 }
793 Ptentative->insertGlobalValues(*r, globalColPtr.view(0, NSDim), valPtr.view(0, NSDim));
794 } // if (targetQvalues->getLocalLength() > 0)
795 }
796
797 Ptentative->fillComplete(coarseMap, A->getDomainMap());
798}
799
800template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
802 BuildPuncoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
803 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
804 RCP<const Map> rowMap = A->getRowMap();
805 RCP<const Map> colMap = A->getColMap();
806 const size_t numRows = rowMap->getLocalNumElements();
807
808 typedef Teuchos::ScalarTraits<SC> STS;
809 typedef typename STS::magnitudeType Magnitude;
810 const SC zero = STS::zero();
811 const SC one = STS::one();
812 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
813
814 const GO numAggs = aggregates->GetNumAggregates();
815 const size_t NSDim = fineNullspace->getNumVectors();
816 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
817
818 // Sanity checking
819 const ParameterList& pL = GetParameterList();
820 const bool& doQRStep = pL.get<bool>("tentative: calculate qr");
821 const bool& constantColSums = pL.get<bool>("tentative: constant column sums");
822
823 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums, Exceptions::RuntimeError,
824 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
825
826 // Aggregates map is based on the amalgamated column map
827 // We can skip global-to-local conversion if LIDs in row map are
828 // same as LIDs in column map
829 bool goodMap = MueLu::Utilities<SC, LO, GO, NO>::MapsAreNested(*rowMap, *colMap);
830
831 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
832 // aggStart is a pointer into aggToRowMapLO
833 // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
834 // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
835 ArrayRCP<LO> aggStart;
836 ArrayRCP<LO> aggToRowMapLO;
837 ArrayRCP<GO> aggToRowMapGO;
838 if (goodMap) {
839 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
840 GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
841
842 } else {
843 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMapGO);
844 GetOStream(Warnings0) << "Column map is not consistent with the row map\n"
845 << "using GO->LO conversion with performance penalty" << std::endl;
846 }
847 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
848
849 // Pull out the nullspace vectors so that we can have random access.
850 ArrayRCP<ArrayRCP<const SC> > fineNS(NSDim);
851 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
852 for (size_t i = 0; i < NSDim; i++) {
853 fineNS[i] = fineNullspace->getData(i);
854 if (coarseMap->getLocalNumElements() > 0)
855 coarseNS[i] = coarseNullspace->getDataNonConst(i);
856 }
857
858 size_t nnzEstimate = numRows * NSDim;
859
860 // Time to construct the matrix and fill in the values
861 Ptentative = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
862 RCP<CrsMatrix> PtentCrs = toCrsMatrix(Ptentative);
863
864 ArrayRCP<size_t> iaPtent;
865 ArrayRCP<LO> jaPtent;
866 ArrayRCP<SC> valPtent;
867
868 PtentCrs->allocateAllValues(nnzEstimate, iaPtent, jaPtent, valPtent);
869
870 ArrayView<size_t> ia = iaPtent();
871 ArrayView<LO> ja = jaPtent();
872 ArrayView<SC> val = valPtent();
873
874 ia[0] = 0;
875 for (size_t i = 1; i <= numRows; i++)
876 ia[i] = ia[i - 1] + NSDim;
877
878 for (size_t j = 0; j < nnzEstimate; j++) {
879 ja[j] = INVALID;
880 val[j] = zero;
881 }
882
883 if (doQRStep) {
885 // Standard aggregate-wise QR //
887 for (GO agg = 0; agg < numAggs; agg++) {
888 LO aggSize = aggStart[agg + 1] - aggStart[agg];
889
890 Xpetra::global_size_t offset = agg * NSDim;
891
892 // Extract the piece of the nullspace corresponding to the aggregate, and
893 // put it in the flat array, "localQR" (in column major format) for the
894 // QR routine.
895 Teuchos::SerialDenseMatrix<LO, SC> localQR(aggSize, NSDim);
896 if (goodMap) {
897 for (size_t j = 0; j < NSDim; j++)
898 for (LO k = 0; k < aggSize; k++)
899 localQR(k, j) = fineNS[j][aggToRowMapLO[aggStart[agg] + k]];
900 } else {
901 for (size_t j = 0; j < NSDim; j++)
902 for (LO k = 0; k < aggSize; k++)
903 localQR(k, j) = fineNS[j][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + k])];
904 }
905
906 // Test for zero columns
907 for (size_t j = 0; j < NSDim; j++) {
908 bool bIsZeroNSColumn = true;
909
910 for (LO k = 0; k < aggSize; k++)
911 if (localQR(k, j) != zero)
912 bIsZeroNSColumn = false;
913
914 TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError,
915 "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column in NS column " << j);
916 }
917
918 // Calculate QR decomposition (standard)
919 // NOTE: Q is stored in localQR and R is stored in coarseNS
920 if (aggSize >= Teuchos::as<LO>(NSDim)) {
921 if (NSDim == 1) {
922 // Only one nullspace vector, calculate Q and R by hand
923 Magnitude norm = STS::magnitude(zero);
924 for (size_t k = 0; k < Teuchos::as<size_t>(aggSize); k++)
925 norm += STS::magnitude(localQR(k, 0) * localQR(k, 0));
926 norm = Teuchos::ScalarTraits<Magnitude>::squareroot(norm);
927
928 // R = norm
929 coarseNS[0][offset] = norm;
930
931 // Q = localQR(:,0)/norm
932 for (LO i = 0; i < aggSize; i++)
933 localQR(i, 0) /= norm;
934
935 } else {
936 Teuchos::SerialQRDenseSolver<LO, SC> qrSolver;
937 qrSolver.setMatrix(Teuchos::rcp(&localQR, false));
938 qrSolver.factor();
939
940 // R = upper triangular part of localQR
941 for (size_t j = 0; j < NSDim; j++)
942 for (size_t k = 0; k <= j; k++)
943 coarseNS[j][offset + k] = localQR(k, j); // TODO is offset+k the correct local ID?!
944
945 // Calculate Q, the tentative prolongator.
946 // The Lapack GEQRF call only works for myAggsize >= NSDim
947 qrSolver.formQ();
948 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO, SC> > qFactor = qrSolver.getQ();
949 for (size_t j = 0; j < NSDim; j++)
950 for (size_t i = 0; i < Teuchos::as<size_t>(aggSize); i++)
951 localQR(i, j) = (*qFactor)(i, j);
952 }
953
954 } else {
955 // Special handling for aggSize < NSDim (i.e. single node aggregates in structural mechanics)
956
957 // The local QR decomposition is not possible in the "overconstrained"
958 // case (i.e. number of columns in localQR > number of rows), which
959 // corresponds to #DOFs in Aggregate < NSDim. For usual problems this
960 // is only possible for single node aggregates in structural mechanics.
961 // (Similar problems may arise in discontinuous Galerkin problems...)
962 // We bypass the QR decomposition and use an identity block in the
963 // tentative prolongator for the single node aggregate and transfer the
964 // corresponding fine level null space information 1-to-1 to the coarse
965 // level null space part.
966
967 // NOTE: The resulting tentative prolongation operator has
968 // (aggSize*DofsPerNode-NSDim) zero columns leading to a singular
969 // coarse level operator A. To deal with that one has the following
970 // options:
971 // - Use the "RepairMainDiagonal" flag in the RAPFactory (default:
972 // false) to add some identity block to the diagonal of the zero rows
973 // in the coarse level operator A, such that standard level smoothers
974 // can be used again.
975 // - Use special (projection-based) level smoothers, which can deal
976 // with singular matrices (very application specific)
977 // - Adapt the code below to avoid zero columns. However, we do not
978 // support a variable number of DOFs per node in MueLu/Xpetra which
979 // makes the implementation really hard.
980
981 // R = extended (by adding identity rows) localQR
982 for (size_t j = 0; j < NSDim; j++)
983 for (size_t k = 0; k < NSDim; k++)
984 if (k < as<size_t>(aggSize))
985 coarseNS[j][offset + k] = localQR(k, j);
986 else
987 coarseNS[j][offset + k] = (k == j ? one : zero);
988
989 // Q = I (rectangular)
990 for (size_t i = 0; i < as<size_t>(aggSize); i++)
991 for (size_t j = 0; j < NSDim; j++)
992 localQR(i, j) = (j == i ? one : zero);
993 }
994
995 // Process each row in the local Q factor
996 // FIXME: What happens if maps are blocked?
997 for (LO j = 0; j < aggSize; j++) {
998 LO localRow = (goodMap ? aggToRowMapLO[aggStart[agg] + j] : rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]));
999
1000 size_t rowStart = ia[localRow];
1001 for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
1002 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1003 if (localQR(j, k) != zero) {
1004 ja[rowStart + lnnz] = offset + k;
1005 val[rowStart + lnnz] = localQR(j, k);
1006 lnnz++;
1007 }
1008 }
1009 }
1010 }
1011
1012 } else {
1013 GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
1014 if (NSDim > 1)
1015 GetOStream(Warnings0) << "TentativePFactory : for nontrivial nullspace, this may degrade performance" << std::endl;
1017 // "no-QR" option //
1019 // Local Q factor is just the fine nullspace support over the current aggregate.
1020 // Local R factor is the identity.
1021 // TODO I have not implemented any special handling for aggregates that are too
1022 // TODO small to locally support the nullspace, as is done in the standard QR
1023 // TODO case above.
1024 if (goodMap) {
1025 for (GO agg = 0; agg < numAggs; agg++) {
1026 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1027 Xpetra::global_size_t offset = agg * NSDim;
1028
1029 // Process each row in the local Q factor
1030 // FIXME: What happens if maps are blocked?
1031 for (LO j = 0; j < aggSize; j++) {
1032 // TODO Here I do not check for a zero nullspace column on the aggregate.
1033 // as is done in the standard QR case.
1034
1035 const LO localRow = aggToRowMapLO[aggStart[agg] + j];
1036
1037 const size_t rowStart = ia[localRow];
1038
1039 for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
1040 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1041 SC qr_jk = fineNS[k][aggToRowMapLO[aggStart[agg] + j]];
1042 if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1043 if (qr_jk != zero) {
1044 ja[rowStart + lnnz] = offset + k;
1045 val[rowStart + lnnz] = qr_jk;
1046 lnnz++;
1047 }
1048 }
1049 }
1050 for (size_t j = 0; j < NSDim; j++)
1051 coarseNS[j][offset + j] = one;
1052 } // for (GO agg = 0; agg < numAggs; agg++)
1053
1054 } else {
1055 for (GO agg = 0; agg < numAggs; agg++) {
1056 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1057 Xpetra::global_size_t offset = agg * NSDim;
1058 for (LO j = 0; j < aggSize; j++) {
1059 const LO localRow = rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]);
1060
1061 const size_t rowStart = ia[localRow];
1062
1063 for (size_t k = 0, lnnz = 0; k < NSDim; ++k) {
1064 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1065 SC qr_jk = fineNS[k][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j])];
1066 if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1067 if (qr_jk != zero) {
1068 ja[rowStart + lnnz] = offset + k;
1069 val[rowStart + lnnz] = qr_jk;
1070 lnnz++;
1071 }
1072 }
1073 }
1074 for (size_t j = 0; j < NSDim; j++)
1075 coarseNS[j][offset + j] = one;
1076 } // for (GO agg = 0; agg < numAggs; agg++)
1077
1078 } // if (goodmap) else ...
1079
1080 } // if doQRStep ... else
1081
1082 // Compress storage (remove all INVALID, which happen when we skip zeros)
1083 // We do that in-place
1084 size_t ia_tmp = 0, nnz = 0;
1085 for (size_t i = 0; i < numRows; i++) {
1086 for (size_t j = ia_tmp; j < ia[i + 1]; j++)
1087 if (ja[j] != INVALID) {
1088 ja[nnz] = ja[j];
1089 val[nnz] = val[j];
1090 nnz++;
1091 }
1092 ia_tmp = ia[i + 1];
1093 ia[i + 1] = nnz;
1094 }
1095 if (rowMap->lib() == Xpetra::UseTpetra) {
1096 // - Cannot resize for Epetra, as it checks for same pointers
1097 // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
1098 // NOTE: these invalidate ja and val views
1099 jaPtent.resize(nnz);
1100 valPtent.resize(nnz);
1101 }
1102
1103 GetOStream(Runtime1) << "TentativePFactory : aggregates do not cross process boundaries" << std::endl;
1104
1105 PtentCrs->setAllValues(iaPtent, jaPtent, valPtent);
1106
1107 // Managing labels & constants for ESFC
1108 RCP<ParameterList> FCparams;
1109 if (pL.isSublist("matrixmatrix: kernel params"))
1110 FCparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
1111 else
1112 FCparams = rcp(new ParameterList);
1113 // By default, we don't need global constants for TentativeP
1114 FCparams->set("compute global constants", FCparams->get("compute global constants", false));
1115 std::string levelIDs = toString(levelID);
1116 FCparams->set("Timer Label", std::string("MueLu::TentativeP-") + levelIDs);
1117 RCP<const Export> dummy_e;
1118 RCP<const Import> dummy_i;
1119
1120 PtentCrs->expertStaticFillComplete(coarseMap, A->getDomainMap(), dummy_i, dummy_e, FCparams);
1121}
1122
1123} // namespace MueLu
1124
1125// TODO ReUse: If only P or Nullspace is missing, TentativePFactory can be smart and skip part of the computation.
1126
1127#define MUELU_TENTATIVEPFACTORY_SHORT
1128#endif // MUELU_TENTATIVEPFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
int GetLevelID() const
Return level number.
void AddKeepFlag(const std::string &ename, const FactoryBase *factory=NoFactory::get(), KeepType keep=MueLu::Keep)
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static const NoFactory * get()
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void BuildPuncoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void BuildPuncoupledBlockCrs(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void BuildPcoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace) const
static bool MapsAreNested(const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &rowMap, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &colMap)
Namespace for MueLu classes and methods.
@ Final
Keep data only for this run. Used to keep data useful for Hierarchy::Iterate(). Data will be deleted ...
@ Warnings0
Important warning messages (one line)
@ Statistics2
Print even more statistics.
@ Runtime1
Description of what is happening (more verbose)
std::string toString(const T &what)
Little helper function to convert non-string types to strings.