MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_TentativePFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_TENTATIVEPFACTORY_DEF_HPP
11#define MUELU_TENTATIVEPFACTORY_DEF_HPP
12
13#include <Xpetra_MapFactory.hpp>
14#include <Xpetra_Map.hpp>
15#include <Xpetra_CrsMatrix.hpp>
16#include <Xpetra_CrsGraphFactory.hpp>
17#include <Xpetra_Matrix.hpp>
18#include <Xpetra_MatrixMatrix.hpp>
19#include <Xpetra_MultiVector.hpp>
20#include <Xpetra_MultiVectorFactory.hpp>
21#include <Xpetra_VectorFactory.hpp>
22#include <Xpetra_Import.hpp>
23#include <Xpetra_ImportFactory.hpp>
24#include <Xpetra_CrsMatrixWrap.hpp>
25#include <Xpetra_StridedMap.hpp>
26#include <Xpetra_StridedMapFactory.hpp>
27#include <Xpetra_IO.hpp>
28
29#include "Xpetra_TpetraBlockCrsMatrix.hpp"
30
32
33#include "MueLu_Aggregates.hpp"
34#include "MueLu_AmalgamationInfo.hpp"
35#include "MueLu_MasterList.hpp"
36#include "MueLu_Monitor.hpp"
37#include "MueLu_PerfUtils.hpp"
38#include "MueLu_Utilities.hpp"
39
40namespace MueLu {
41
42template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
44 RCP<ParameterList> validParamList = rcp(new ParameterList());
45
46#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
47 SET_VALID_ENTRY("tentative: calculate qr");
48 SET_VALID_ENTRY("tentative: build coarse coordinates");
49 SET_VALID_ENTRY("tentative: constant column sums");
50#undef SET_VALID_ENTRY
51 validParamList->set<std::string>("Nullspace name", "Nullspace", "Name for the input nullspace");
52
53 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
54 validParamList->set<RCP<const FactoryBase> >("Aggregates", Teuchos::null, "Generating factory of the aggregates");
55 validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
56 validParamList->set<RCP<const FactoryBase> >("Scaled Nullspace", Teuchos::null, "Generating factory of the scaled nullspace");
57 validParamList->set<RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
58 validParamList->set<RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the coarse map");
59 validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory of the coordinates");
60 validParamList->set<RCP<const FactoryBase> >("Node Comm", Teuchos::null, "Generating factory of the node level communicator");
61
62 // Make sure we don't recursively validate options for the matrixmatrix kernels
63 ParameterList norecurse;
64 norecurse.disableRecursiveValidation();
65 validParamList->set<ParameterList>("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
66
67 return validParamList;
68}
69
70template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
72 const ParameterList& pL = GetParameterList();
73 // NOTE: This guy can only either be 'Nullspace' or 'Scaled Nullspace' or else the validator above will cause issues
74 std::string nspName = "Nullspace";
75 if (pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
76
77 Input(fineLevel, "A");
78 Input(fineLevel, "Aggregates");
79 Input(fineLevel, nspName);
80 Input(fineLevel, "UnAmalgamationInfo");
81 Input(fineLevel, "CoarseMap");
82 if (fineLevel.GetLevelID() == 0 &&
83 fineLevel.IsAvailable("Coordinates", NoFactory::get()) && // we have coordinates (provided by user app)
84 pL.get<bool>("tentative: build coarse coordinates")) { // and we want coordinates on other levels
85 bTransferCoordinates_ = true; // then set the transfer coordinates flag to true
86 Input(fineLevel, "Coordinates");
87 } else if (bTransferCoordinates_) {
88 Input(fineLevel, "Coordinates");
89 }
90}
91
92template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
94 return BuildP(fineLevel, coarseLevel);
95}
96
97template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
99 FactoryMonitor m(*this, "Build", coarseLevel);
100 typedef typename Teuchos::ScalarTraits<Scalar>::coordinateType coordinate_type;
101 typedef Xpetra::MultiVector<coordinate_type, LO, GO, NO> RealValuedMultiVector;
102 typedef Xpetra::MultiVectorFactory<coordinate_type, LO, GO, NO> RealValuedMultiVectorFactory;
103
104 const ParameterList& pL = GetParameterList();
105 std::string nspName = "Nullspace";
106 if (pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
107
108 RCP<Matrix> Ptentative;
109 RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
110 RCP<Aggregates> aggregates = Get<RCP<Aggregates> >(fineLevel, "Aggregates");
111 // No coarse DoFs so we need to bail by setting Ptentattive to null and returning
112 // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
113 if (aggregates->GetNumGlobalAggregatesComputeIfNeeded() == 0) {
114 Ptentative = Teuchos::null;
115 Set(coarseLevel, "P", Ptentative);
116 return;
117 }
118
119 RCP<AmalgamationInfo> amalgInfo = Get<RCP<AmalgamationInfo> >(fineLevel, "UnAmalgamationInfo");
120 RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, nspName);
121 RCP<const Map> coarseMap = Get<RCP<const Map> >(fineLevel, "CoarseMap");
122 RCP<RealValuedMultiVector> fineCoords;
123 if (bTransferCoordinates_) {
124 fineCoords = Get<RCP<RealValuedMultiVector> >(fineLevel, "Coordinates");
125 }
126
127 // FIXME: We should remove the NodeComm on levels past the threshold
128 if (fineLevel.IsAvailable("Node Comm")) {
129 RCP<const Teuchos::Comm<int> > nodeComm = Get<RCP<const Teuchos::Comm<int> > >(fineLevel, "Node Comm");
130 Set<RCP<const Teuchos::Comm<int> > >(coarseLevel, "Node Comm", nodeComm);
131 }
132
133 // NOTE: We check DomainMap here rather than RowMap because those are different for BlockCrs matrices
134 TEUCHOS_TEST_FOR_EXCEPTION(A->getDomainMap()->getLocalNumElements() != fineNullspace->getMap()->getLocalNumElements(),
135 Exceptions::RuntimeError, "MueLu::TentativePFactory::MakeTentative: Size mismatch between A and Nullspace");
136
137 RCP<MultiVector> coarseNullspace;
138 RCP<RealValuedMultiVector> coarseCoords;
139
140 if (bTransferCoordinates_) {
141 //*** Create the coarse coordinates ***
142 // First create the coarse map and coarse multivector
143 ArrayView<const GO> elementAList = coarseMap->getLocalElementList();
144 LO blkSize = 1;
145 if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null) {
146 blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize();
147 }
148 GO indexBase = coarseMap->getIndexBase();
149 LO numCoarseNodes = Teuchos::as<LO>(elementAList.size() / blkSize);
150 Array<GO> nodeList(numCoarseNodes);
151 const int numDimensions = fineCoords->getNumVectors();
152
153 for (LO i = 0; i < numCoarseNodes; i++) {
154 nodeList[i] = (elementAList[i * blkSize] - indexBase) / blkSize + indexBase;
155 }
156 RCP<const Map> coarseCoordsMap = MapFactory::Build(fineCoords->getMap()->lib(),
157 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
158 nodeList,
159 indexBase,
160 fineCoords->getMap()->getComm());
161 coarseCoords = RealValuedMultiVectorFactory::Build(coarseCoordsMap, numDimensions);
162
163 // Create overlapped fine coordinates to reduce global communication
164 RCP<RealValuedMultiVector> ghostedCoords;
165 if (aggregates->AggregatesCrossProcessors()) {
166 RCP<const Map> aggMap = aggregates->GetMap();
167 RCP<const Import> importer = ImportFactory::Build(fineCoords->getMap(), aggMap);
168
169 ghostedCoords = RealValuedMultiVectorFactory::Build(aggMap, numDimensions);
170 ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);
171 } else {
172 ghostedCoords = fineCoords;
173 }
174
175 // Get some info about aggregates
176 int myPID = coarseCoordsMap->getComm()->getRank();
177 LO numAggs = aggregates->GetNumAggregates();
178 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
179 const ArrayRCP<const LO> vertex2AggID = aggregates->GetVertex2AggId()->getData(0);
180 const ArrayRCP<const LO> procWinner = aggregates->GetProcWinner()->getData(0);
181
182 // Fill in coarse coordinates
183 for (int dim = 0; dim < numDimensions; ++dim) {
184 ArrayRCP<const coordinate_type> fineCoordsData = ghostedCoords->getData(dim);
185 ArrayRCP<coordinate_type> coarseCoordsData = coarseCoords->getDataNonConst(dim);
186
187 for (LO lnode = 0; lnode < Teuchos::as<LO>(vertex2AggID.size()); lnode++) {
188 if (procWinner[lnode] == myPID &&
189 lnode < fineCoordsData.size() &&
190 vertex2AggID[lnode] < coarseCoordsData.size() &&
191 Teuchos::ScalarTraits<coordinate_type>::isnaninf(fineCoordsData[lnode]) == false) {
192 coarseCoordsData[vertex2AggID[lnode]] += fineCoordsData[lnode];
193 }
194 }
195 for (LO agg = 0; agg < numAggs; agg++) {
196 coarseCoordsData[agg] /= aggSizes[agg];
197 }
198 }
199 }
200
201 if (!aggregates->AggregatesCrossProcessors()) {
202 if (Xpetra::Helpers<SC, LO, GO, NO>::isTpetraBlockCrs(A)) {
203 BuildPuncoupledBlockCrs(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.GetLevelID());
204 } else {
205 BuildPuncoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.GetLevelID());
206 }
207 } else
208 BuildPcoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
209
210 // If available, use striding information of fine level matrix A for range
211 // map and coarseMap as domain map; otherwise use plain range map of
212 // Ptent = plain range map of A for range map and coarseMap as domain map.
213 // NOTE:
214 // The latter is not really safe, since there is no striding information
215 // for the range map. This is not really a problem, since striding
216 // information is always available on the intermedium levels and the
217 // coarsest levels.
218 if (A->IsView("stridedMaps") == true)
219 Ptentative->CreateView("stridedMaps", A->getRowMap("stridedMaps"), coarseMap);
220 else
221 Ptentative->CreateView("stridedMaps", Ptentative->getRangeMap(), coarseMap);
222
223 if (bTransferCoordinates_) {
224 Set(coarseLevel, "Coordinates", coarseCoords);
225 }
226 Set(coarseLevel, "Nullspace", coarseNullspace);
227 Set(coarseLevel, "P", Ptentative);
228
229 if (IsPrint(Statistics2)) {
230 RCP<ParameterList> params = rcp(new ParameterList());
231 params->set("printLoadBalancingInfo", true);
232 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ptentative, "Ptent", params);
233 }
234}
235
236template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
238 BuildPuncoupledBlockCrs(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
239 RCP<const Map> coarsePointMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
240 /* This routine generates a BlockCrs P for a BlockCrs A. There are a few assumptions here, which meet the use cases we care about, but could
241 be generalized later, if we ever need to do so:
242 1) Null space dimension === block size of matrix: So no elasticity right now
243 2) QR is not supported: Under assumption #1, this shouldn't cause problems.
244 3) Maps are "good": Aka the first chunk of the ColMap is the RowMap.
245
246 These assumptions keep our code way simpler and still support the use cases we actually care about.
247 */
248
249 RCP<const Map> rowMap = A->getRowMap();
250 RCP<const Map> rangeMap = A->getRangeMap();
251 RCP<const Map> colMap = A->getColMap();
252 // const size_t numFinePointRows = rangeMap->getLocalNumElements();
253 const size_t numFineBlockRows = rowMap->getLocalNumElements();
254
255 typedef Teuchos::ScalarTraits<SC> STS;
256 // typedef typename STS::magnitudeType Magnitude;
257 const SC zero = STS::zero();
258 const SC one = STS::one();
259 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
260
261 const GO numAggs = aggregates->GetNumAggregates();
262 const size_t NSDim = fineNullspace->getNumVectors();
263 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
264
265 // Need to generate the coarse block map
266 // NOTE: We assume NSDim == block size here
267 // NOTE: We also assume that coarseMap has contiguous GIDs
268 // const size_t numCoarsePointRows = coarsePointMap->getLocalNumElements();
269 const size_t numCoarseBlockRows = coarsePointMap->getLocalNumElements() / NSDim;
270 RCP<const Map> coarseBlockMap = MapFactory::Build(coarsePointMap->lib(),
271 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
272 numCoarseBlockRows,
273 coarsePointMap->getIndexBase(),
274 coarsePointMap->getComm());
275 // Sanity checking
276 const ParameterList& pL = GetParameterList();
277 const bool& doQRStep = pL.get<bool>("tentative: calculate qr");
278 const bool& constantColSums = pL.get<bool>("tentative: constant column sums");
279
280 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums, Exceptions::RuntimeError,
281 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
282
283 // The aggregates use the amalgamated column map, which in this case is what we want
284
285 // Aggregates map is based on the amalgamated column map
286 // We can skip global-to-local conversion if LIDs in row map are
287 // same as LIDs in column map
288 bool goodMap = MueLu::Utilities<SC, LO, GO, NO>::MapsAreNested(*rowMap, *colMap);
289
290 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
291 // aggStart is a pointer into aggToRowMapLO
292 // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
293 // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
294 ArrayRCP<LO> aggStart;
295 ArrayRCP<LO> aggToRowMapLO;
296 ArrayRCP<GO> aggToRowMapGO;
297 if (goodMap) {
298 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
299 GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
300 } else {
301 throw std::runtime_error("TentativePFactory::PuncoupledBlockCrs: Inconsistent maps not currently supported");
302 }
303
304 coarseNullspace = MultiVectorFactory::Build(coarsePointMap, NSDim);
305
306 // Pull out the nullspace vectors so that we can have random access.
307 ArrayRCP<ArrayRCP<const SC> > fineNS(NSDim);
308 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
309 for (size_t i = 0; i < NSDim; i++) {
310 fineNS[i] = fineNullspace->getData(i);
311 if (coarsePointMap->getLocalNumElements() > 0)
312 coarseNS[i] = coarseNullspace->getDataNonConst(i);
313 }
314
315 // BlockCrs requires that we build the (block) graph first, so let's do that...
316 // NOTE: Because we're assuming that the NSDim == BlockSize, we only have one
317 // block non-zero per row in the matrix;
318 RCP<CrsGraph> BlockGraph = CrsGraphFactory::Build(rowMap, coarseBlockMap, 0);
319 ArrayRCP<size_t> iaPtent;
320 ArrayRCP<LO> jaPtent;
321 BlockGraph->allocateAllIndices(numFineBlockRows, iaPtent, jaPtent);
322 ArrayView<size_t> ia = iaPtent();
323 ArrayView<LO> ja = jaPtent();
324
325 for (size_t i = 0; i < numFineBlockRows; i++) {
326 ia[i] = i;
327 ja[i] = INVALID;
328 }
329 ia[numCoarseBlockRows] = numCoarseBlockRows;
330
331 for (GO agg = 0; agg < numAggs; agg++) {
332 LO aggSize = aggStart[agg + 1] - aggStart[agg];
333 Xpetra::global_size_t offset = agg;
334
335 for (LO j = 0; j < aggSize; j++) {
336 // FIXME: Allow for bad maps
337 const LO localRow = aggToRowMapLO[aggStart[agg] + j];
338 const size_t rowStart = ia[localRow];
339 ja[rowStart] = offset;
340 }
341 }
342
343 // Compress storage (remove all INVALID, which happen when we skip zeros)
344 // We do that in-place
345 size_t ia_tmp = 0, nnz = 0;
346 for (size_t i = 0; i < numFineBlockRows; i++) {
347 for (size_t j = ia_tmp; j < ia[i + 1]; j++)
348 if (ja[j] != INVALID) {
349 ja[nnz] = ja[j];
350 nnz++;
351 }
352 ia_tmp = ia[i + 1];
353 ia[i + 1] = nnz;
354 }
355
356 if (rowMap->lib() == Xpetra::UseTpetra) {
357 // - Cannot resize for Epetra, as it checks for same pointers
358 // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
359 // NOTE: these invalidate ja and val views
360 jaPtent.resize(nnz);
361 }
362
363 GetOStream(Runtime1) << "TentativePFactory : generating block graph" << std::endl;
364 BlockGraph->setAllIndices(iaPtent, jaPtent);
365
366 // Managing labels & constants for ESFC
367 {
368 RCP<ParameterList> FCparams;
369 if (pL.isSublist("matrixmatrix: kernel params"))
370 FCparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
371 else
372 FCparams = rcp(new ParameterList);
373 // By default, we don't need global constants for TentativeP, but we do want it for the graph
374 // if we're printing statistics, so let's leave it on for now.
375 FCparams->set("compute global constants", FCparams->get("compute global constants", true));
376 std::string levelIDs = toString(levelID);
377 FCparams->set("Timer Label", std::string("MueLu::TentativeP-") + levelIDs);
378 RCP<const Export> dummy_e;
379 RCP<const Import> dummy_i;
380 BlockGraph->expertStaticFillComplete(coarseBlockMap, rowMap, dummy_i, dummy_e, FCparams);
381 }
382
383 // Now let's make a BlockCrs Matrix
384 // NOTE: Assumes block size== NSDim
385 RCP<Xpetra::CrsMatrix<SC, LO, GO, NO> > P_xpetra = Xpetra::CrsMatrixFactory<SC, LO, GO, NO>::BuildBlock(BlockGraph, coarsePointMap, rangeMap, NSDim);
386 RCP<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO> > P_tpetra = rcp_dynamic_cast<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO> >(P_xpetra);
387 if (P_tpetra.is_null()) throw std::runtime_error("BuildPUncoupled: Matrix factory did not return a Tpetra::BlockCrsMatrix");
388 RCP<CrsMatrixWrap> P_wrap = rcp(new CrsMatrixWrap(P_xpetra));
389
391 // "no-QR" option //
393 // Local Q factor is just the fine nullspace support over the current aggregate.
394 // Local R factor is the identity.
395 // NOTE: We're not going to do a QR here as we're assuming that blocksize == NSDim
396 // NOTE: "goodMap" case only
397 Teuchos::Array<Scalar> block(NSDim * NSDim, zero);
398 Teuchos::Array<LO> bcol(1);
399
400 GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
401 for (LO agg = 0; agg < numAggs; agg++) {
402 bcol[0] = agg;
403 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
404 Xpetra::global_size_t offset = agg * NSDim;
405
406 // Process each row in the local Q factor
407 // NOTE: Blocks are in row-major order
408 for (LO j = 0; j < aggSize; j++) {
409 const LO localBlockRow = aggToRowMapLO[aggStart[agg] + j];
410
411 for (size_t r = 0; r < NSDim; r++) {
412 LO localPointRow = localBlockRow * NSDim + r;
413 for (size_t c = 0; c < NSDim; c++)
414 block[r * NSDim + c] = fineNS[c][localPointRow];
415 }
416 // NOTE: Assumes columns==aggs and are ordered sequentially
417 P_tpetra->replaceLocalValues(localBlockRow, bcol(), block());
418
419 } // end aggSize
420
421 for (size_t j = 0; j < NSDim; j++)
422 coarseNS[j][offset + j] = one;
423
424 } // for (GO agg = 0; agg < numAggs; agg++)
425
426 Ptentative = P_wrap;
427}
428
429template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
431 BuildPcoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
432 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace) const {
433 typedef Teuchos::ScalarTraits<SC> STS;
434 typedef typename STS::magnitudeType Magnitude;
435 const SC zero = STS::zero();
436 const SC one = STS::one();
437
438 // number of aggregates
439 GO numAggs = aggregates->GetNumAggregates();
440
441 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
442 // aggStart is a pointer into aggToRowMap
443 // aggStart[i]..aggStart[i+1] are indices into aggToRowMap
444 // aggToRowMap[aggStart[i]]..aggToRowMap[aggStart[i+1]-1] are the DOFs in aggregate i
445 ArrayRCP<LO> aggStart;
446 ArrayRCP<GO> aggToRowMap;
447 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMap);
448
449 // find size of the largest aggregate
450 LO maxAggSize = 0;
451 for (GO i = 0; i < numAggs; ++i) {
452 LO sizeOfThisAgg = aggStart[i + 1] - aggStart[i];
453 if (sizeOfThisAgg > maxAggSize) maxAggSize = sizeOfThisAgg;
454 }
455
456 // dimension of fine level nullspace
457 const size_t NSDim = fineNullspace->getNumVectors();
458
459 // index base for coarse Dof map (usually 0)
460 GO indexBase = A->getRowMap()->getIndexBase();
461
462 const RCP<const Map> nonUniqueMap = amalgInfo->ComputeUnamalgamatedImportDofMap(*aggregates);
463 const RCP<const Map> uniqueMap = A->getDomainMap();
464 RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
465 RCP<MultiVector> fineNullspaceWithOverlap = MultiVectorFactory::Build(nonUniqueMap, NSDim);
466 fineNullspaceWithOverlap->doImport(*fineNullspace, *importer, Xpetra::INSERT);
467
468 // Pull out the nullspace vectors so that we can have random access.
469 ArrayRCP<ArrayRCP<const SC> > fineNS(NSDim);
470 for (size_t i = 0; i < NSDim; ++i)
471 fineNS[i] = fineNullspaceWithOverlap->getData(i);
472
473 // Allocate storage for the coarse nullspace.
474 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
475
476 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
477 for (size_t i = 0; i < NSDim; ++i)
478 if (coarseMap->getLocalNumElements() > 0) coarseNS[i] = coarseNullspace->getDataNonConst(i);
479
480 // This makes the rowmap of Ptent the same as that of A->
481 // This requires moving some parts of some local Q's to other processors
482 // because aggregates can span processors.
483 RCP<const Map> rowMapForPtent = A->getRowMap();
484 const Map& rowMapForPtentRef = *rowMapForPtent;
485
486 // Set up storage for the rows of the local Qs that belong to other processors.
487 // FIXME This is inefficient and could be done within the main loop below with std::vector's.
488 RCP<const Map> colMap = A->getColMap();
489
490 RCP<const Map> ghostQMap;
491 RCP<MultiVector> ghostQvalues;
492 Array<RCP<Xpetra::Vector<GO, LO, GO, Node> > > ghostQcolumns;
493 RCP<Xpetra::Vector<GO, LO, GO, Node> > ghostQrowNums;
494 ArrayRCP<ArrayRCP<SC> > ghostQvals;
495 ArrayRCP<ArrayRCP<GO> > ghostQcols;
496 ArrayRCP<GO> ghostQrows;
497
498 Array<GO> ghostGIDs;
499 for (LO j = 0; j < numAggs; ++j) {
500 for (LO k = aggStart[j]; k < aggStart[j + 1]; ++k) {
501 if (rowMapForPtentRef.isNodeGlobalElement(aggToRowMap[k]) == false) {
502 ghostGIDs.push_back(aggToRowMap[k]);
503 }
504 }
505 }
506 ghostQMap = MapFactory::Build(A->getRowMap()->lib(),
507 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
508 ghostGIDs,
509 indexBase, A->getRowMap()->getComm()); // JG:Xpetra::global_size_t>?
510 // Vector to hold bits of Q that go to other processors.
511 ghostQvalues = MultiVectorFactory::Build(ghostQMap, NSDim);
512 // Note that Epetra does not support MultiVectors templated on Scalar != double.
513 // So to work around this, we allocate an array of Vectors. This shouldn't be too
514 // expensive, as the number of Vectors is NSDim.
515 ghostQcolumns.resize(NSDim);
516 for (size_t i = 0; i < NSDim; ++i)
517 ghostQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
518 ghostQrowNums = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
519 if (ghostQvalues->getLocalLength() > 0) {
520 ghostQvals.resize(NSDim);
521 ghostQcols.resize(NSDim);
522 for (size_t i = 0; i < NSDim; ++i) {
523 ghostQvals[i] = ghostQvalues->getDataNonConst(i);
524 ghostQcols[i] = ghostQcolumns[i]->getDataNonConst(0);
525 }
526 ghostQrows = ghostQrowNums->getDataNonConst(0);
527 }
528
529 // importer to handle moving Q
530 importer = ImportFactory::Build(ghostQMap, A->getRowMap());
531
532 // Dense QR solver
533 Teuchos::SerialQRDenseSolver<LO, SC> qrSolver;
534
535 // Allocate temporary storage for the tentative prolongator.
536 Array<GO> globalColPtr(maxAggSize * NSDim, 0);
537 Array<LO> localColPtr(maxAggSize * NSDim, 0);
538 Array<SC> valPtr(maxAggSize * NSDim, 0.);
539
540 // Create column map for Ptent, estimate local #nonzeros in Ptent, and create Ptent itself.
541 const Map& coarseMapRef = *coarseMap;
542
543 // For the 3-arrays constructor
544 ArrayRCP<size_t> ptent_rowptr;
545 ArrayRCP<LO> ptent_colind;
546 ArrayRCP<Scalar> ptent_values;
547
548 // Because ArrayRCPs are slow...
549 ArrayView<size_t> rowptr_v;
550 ArrayView<LO> colind_v;
551 ArrayView<Scalar> values_v;
552
553 // For temporary usage
554 Array<size_t> rowptr_temp;
555 Array<LO> colind_temp;
556 Array<Scalar> values_temp;
557
558 RCP<CrsMatrix> PtentCrs;
559
560 RCP<CrsMatrixWrap> PtentCrsWrap = rcp(new CrsMatrixWrap(rowMapForPtent, NSDim));
561 PtentCrs = PtentCrsWrap->getCrsMatrix();
562 Ptentative = PtentCrsWrap;
563
564 //*****************************************************************
565 // Loop over all aggregates and calculate local QR decompositions.
566 //*****************************************************************
567 GO qctr = 0; // for indexing into Ptent data vectors
568 const Map& nonUniqueMapRef = *nonUniqueMap;
569
570 size_t total_nnz_count = 0;
571
572 for (GO agg = 0; agg < numAggs; ++agg) {
573 LO myAggSize = aggStart[agg + 1] - aggStart[agg];
574 // For each aggregate, extract the corresponding piece of the nullspace and put it in the flat array,
575 // "localQR" (in column major format) for the QR routine.
576 Teuchos::SerialDenseMatrix<LO, SC> localQR(myAggSize, NSDim);
577 for (size_t j = 0; j < NSDim; ++j) {
578 bool bIsZeroNSColumn = true;
579 for (LO k = 0; k < myAggSize; ++k) {
580 // aggToRowMap[aggPtr[i]+k] is the kth DOF in the ith aggregate
581 // fineNS[j][n] is the nth entry in the jth NS vector
582 try {
583 SC nsVal = fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])]; // extract information from fine level NS
584 localQR(k, j) = nsVal;
585 if (nsVal != zero) bIsZeroNSColumn = false;
586 } catch (...) {
587 GetOStream(Runtime1, -1) << "length of fine level nsp: " << fineNullspace->getGlobalLength() << std::endl;
588 GetOStream(Runtime1, -1) << "length of fine level nsp w overlap: " << fineNullspaceWithOverlap->getGlobalLength() << std::endl;
589 GetOStream(Runtime1, -1) << "(local?) aggId=" << agg << std::endl;
590 GetOStream(Runtime1, -1) << "aggSize=" << myAggSize << std::endl;
591 GetOStream(Runtime1, -1) << "agg DOF=" << k << std::endl;
592 GetOStream(Runtime1, -1) << "NS vector j=" << j << std::endl;
593 GetOStream(Runtime1, -1) << "j*myAggSize + k = " << j * myAggSize + k << std::endl;
594 GetOStream(Runtime1, -1) << "aggToRowMap[" << agg << "][" << k << "] = " << aggToRowMap[aggStart[agg] + k] << std::endl;
595 GetOStream(Runtime1, -1) << "id aggToRowMap[agg][k]=" << aggToRowMap[aggStart[agg] + k] << " is global element in nonUniqueMap = " << nonUniqueMapRef.isNodeGlobalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
596 GetOStream(Runtime1, -1) << "colMap local id aggToRowMap[agg][k]=" << nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
597 GetOStream(Runtime1, -1) << "fineNS...=" << fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])] << std::endl;
598 GetOStream(Errors, -1) << "caught an error!" << std::endl;
599 }
600 } // for (LO k=0 ...
601 TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError, "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column. Error.");
602 } // for (LO j=0 ...
603
604 Xpetra::global_size_t offset = agg * NSDim;
605
606 if (myAggSize >= Teuchos::as<LocalOrdinal>(NSDim)) {
607 // calculate QR decomposition (standard)
608 // R is stored in localQR (size: myAggSize x NSDim)
609
610 // Householder multiplier
611 SC tau = localQR(0, 0);
612
613 if (NSDim == 1) {
614 // Only one nullspace vector, so normalize by hand
615 Magnitude dtemp = 0;
616 for (size_t k = 0; k < Teuchos::as<size_t>(myAggSize); ++k) {
617 Magnitude tmag = STS::magnitude(localQR(k, 0));
618 dtemp += tmag * tmag;
619 }
620 dtemp = Teuchos::ScalarTraits<Magnitude>::squareroot(dtemp);
621 tau = localQR(0, 0);
622 localQR(0, 0) = dtemp;
623 } else {
624 qrSolver.setMatrix(Teuchos::rcp(&localQR, false));
625 qrSolver.factor();
626 }
627
628 // Extract R, the coarse nullspace. This is stored in upper triangular part of localQR.
629 // Note: coarseNS[i][.] is the ith coarse nullspace vector, which may be counter to your intuition.
630 // This stores the (offset+k)th entry only if it is local according to the coarseMap.
631 for (size_t j = 0; j < NSDim; ++j) {
632 for (size_t k = 0; k <= j; ++k) {
633 try {
634 if (coarseMapRef.isNodeLocalElement(offset + k)) {
635 coarseNS[j][offset + k] = localQR(k, j); // TODO is offset+k the correct local ID?!
636 }
637 } catch (...) {
638 GetOStream(Errors, -1) << "caught error in coarseNS insert, j=" << j << ", offset+k = " << offset + k << std::endl;
639 }
640 }
641 }
642
643 // Calculate Q, the tentative prolongator.
644 // The Lapack GEQRF call only works for myAggsize >= NSDim
645
646 if (NSDim == 1) {
647 // Only one nullspace vector, so calculate Q by hand
648 Magnitude dtemp = Teuchos::ScalarTraits<SC>::magnitude(localQR(0, 0));
649 localQR(0, 0) = tau;
650 dtemp = 1 / dtemp;
651 for (LocalOrdinal i = 0; i < myAggSize; ++i) {
652 localQR(i, 0) *= dtemp;
653 }
654 } else {
655 qrSolver.formQ();
656 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO, SC> > qFactor = qrSolver.getQ();
657 for (size_t j = 0; j < NSDim; j++) {
658 for (size_t i = 0; i < Teuchos::as<size_t>(myAggSize); i++) {
659 localQR(i, j) = (*qFactor)(i, j);
660 }
661 }
662 }
663
664 // end default case (myAggSize >= NSDim)
665 } else { // special handling for myAggSize < NSDim (i.e. 1pt nodes)
666 // See comments for the uncoupled case
667
668 // R = extended (by adding identity rows) localQR
669 for (size_t j = 0; j < NSDim; j++)
670 for (size_t k = 0; k < NSDim; k++) {
671 TEUCHOS_TEST_FOR_EXCEPTION(!coarseMapRef.isNodeLocalElement(offset + k), Exceptions::RuntimeError,
672 "Caught error in coarseNS insert, j=" << j << ", offset+k = " << offset + k);
673
674 if (k < as<size_t>(myAggSize))
675 coarseNS[j][offset + k] = localQR(k, j);
676 else
677 coarseNS[j][offset + k] = (k == j ? one : zero);
678 }
679
680 // Q = I (rectangular)
681 for (size_t i = 0; i < as<size_t>(myAggSize); i++)
682 for (size_t j = 0; j < NSDim; j++)
683 localQR(i, j) = (j == i ? one : zero);
684 } // end else (special handling for 1pt aggregates)
685
686 // Process each row in the local Q factor. If the row is local to the current processor
687 // according to the rowmap, insert it into Ptentative. Otherwise, save it in ghostQ
688 // to be communicated later to the owning processor.
689 // FIXME -- what happens if maps are blocked?
690 for (GO j = 0; j < myAggSize; ++j) {
691 // This loop checks whether row associated with current DOF is local, according to rowMapForPtent.
692 // If it is, the row is inserted. If not, the row number, columns, and values are saved in
693 // MultiVectors that will be sent to other processors.
694 GO globalRow = aggToRowMap[aggStart[agg] + j];
695
696 // TODO is the use of Xpetra::global_size_t below correct?
697 if (rowMapForPtentRef.isNodeGlobalElement(globalRow) == false) {
698 ghostQrows[qctr] = globalRow;
699 for (size_t k = 0; k < NSDim; ++k) {
700 ghostQcols[k][qctr] = coarseMapRef.getGlobalElement(agg * NSDim + k);
701 ghostQvals[k][qctr] = localQR(j, k);
702 }
703 ++qctr;
704 } else {
705 size_t nnz = 0;
706 for (size_t k = 0; k < NSDim; ++k) {
707 try {
708 if (localQR(j, k) != Teuchos::ScalarTraits<SC>::zero()) {
709 localColPtr[nnz] = agg * NSDim + k;
710 globalColPtr[nnz] = coarseMapRef.getGlobalElement(localColPtr[nnz]);
711 valPtr[nnz] = localQR(j, k);
712 ++total_nnz_count;
713 ++nnz;
714 }
715 } catch (...) {
716 GetOStream(Errors, -1) << "caught error in colPtr/valPtr insert, current index=" << nnz << std::endl;
717 }
718 } // for (size_t k=0; k<NSDim; ++k)
719
720 try {
721 Ptentative->insertGlobalValues(globalRow, globalColPtr.view(0, nnz), valPtr.view(0, nnz));
722 } catch (...) {
723 GetOStream(Errors, -1) << "pid " << A->getRowMap()->getComm()->getRank()
724 << "caught error during Ptent row insertion, global row "
725 << globalRow << std::endl;
726 }
727 }
728 } // for (GO j=0; j<myAggSize; ++j)
729
730 } // for (LO agg=0; agg<numAggs; ++agg)
731
732 // ***********************************************************
733 // ************* end of aggregate-wise QR ********************
734 // ***********************************************************
735 GetOStream(Runtime1) << "TentativePFactory : aggregates may cross process boundaries" << std::endl;
736 // Import ghost parts of Q factors and insert into Ptentative.
737 // First import just the global row numbers.
738 RCP<Xpetra::Vector<GO, LO, GO, Node> > targetQrowNums = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(rowMapForPtent);
739 targetQrowNums->putScalar(-1);
740 targetQrowNums->doImport(*ghostQrowNums, *importer, Xpetra::INSERT);
741 ArrayRCP<GO> targetQrows = targetQrowNums->getDataNonConst(0);
742
743 // Now create map based on just the row numbers imported.
744 Array<GO> gidsToImport;
745 gidsToImport.reserve(targetQrows.size());
746 for (typename ArrayRCP<GO>::iterator r = targetQrows.begin(); r != targetQrows.end(); ++r) {
747 if (*r > -1) {
748 gidsToImport.push_back(*r);
749 }
750 }
751 RCP<const Map> reducedMap = MapFactory::Build(A->getRowMap()->lib(),
752 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
753 gidsToImport, indexBase, A->getRowMap()->getComm());
754
755 // Import using the row numbers that this processor will receive.
756 importer = ImportFactory::Build(ghostQMap, reducedMap);
757
758 Array<RCP<Xpetra::Vector<GO, LO, GO, Node> > > targetQcolumns(NSDim);
759 for (size_t i = 0; i < NSDim; ++i) {
760 targetQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(reducedMap);
761 targetQcolumns[i]->doImport(*(ghostQcolumns[i]), *importer, Xpetra::INSERT);
762 }
763 RCP<MultiVector> targetQvalues = MultiVectorFactory::Build(reducedMap, NSDim);
764 targetQvalues->doImport(*ghostQvalues, *importer, Xpetra::INSERT);
765
766 ArrayRCP<ArrayRCP<SC> > targetQvals;
767 ArrayRCP<ArrayRCP<GO> > targetQcols;
768 if (targetQvalues->getLocalLength() > 0) {
769 targetQvals.resize(NSDim);
770 targetQcols.resize(NSDim);
771 for (size_t i = 0; i < NSDim; ++i) {
772 targetQvals[i] = targetQvalues->getDataNonConst(i);
773 targetQcols[i] = targetQcolumns[i]->getDataNonConst(0);
774 }
775 }
776
777 valPtr = Array<SC>(NSDim, 0.);
778 globalColPtr = Array<GO>(NSDim, 0);
779 for (typename Array<GO>::iterator r = gidsToImport.begin(); r != gidsToImport.end(); ++r) {
780 if (targetQvalues->getLocalLength() > 0) {
781 for (size_t j = 0; j < NSDim; ++j) {
782 valPtr[j] = targetQvals[j][reducedMap->getLocalElement(*r)];
783 globalColPtr[j] = targetQcols[j][reducedMap->getLocalElement(*r)];
784 }
785 Ptentative->insertGlobalValues(*r, globalColPtr.view(0, NSDim), valPtr.view(0, NSDim));
786 } // if (targetQvalues->getLocalLength() > 0)
787 }
788
789 Ptentative->fillComplete(coarseMap, A->getDomainMap());
790}
791
792template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
794 BuildPuncoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
795 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
796 RCP<const Map> rowMap = A->getRowMap();
797 RCP<const Map> colMap = A->getColMap();
798 const size_t numRows = rowMap->getLocalNumElements();
799
800 typedef Teuchos::ScalarTraits<SC> STS;
801 typedef typename STS::magnitudeType Magnitude;
802 const SC zero = STS::zero();
803 const SC one = STS::one();
804 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
805
806 const GO numAggs = aggregates->GetNumAggregates();
807 const size_t NSDim = fineNullspace->getNumVectors();
808 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
809
810 // Sanity checking
811 const ParameterList& pL = GetParameterList();
812 const bool& doQRStep = pL.get<bool>("tentative: calculate qr");
813 const bool& constantColSums = pL.get<bool>("tentative: constant column sums");
814
815 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums, Exceptions::RuntimeError,
816 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
817
818 // Aggregates map is based on the amalgamated column map
819 // We can skip global-to-local conversion if LIDs in row map are
820 // same as LIDs in column map
821 bool goodMap = MueLu::Utilities<SC, LO, GO, NO>::MapsAreNested(*rowMap, *colMap);
822
823 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
824 // aggStart is a pointer into aggToRowMapLO
825 // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
826 // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
827 ArrayRCP<LO> aggStart;
828 ArrayRCP<LO> aggToRowMapLO;
829 ArrayRCP<GO> aggToRowMapGO;
830 if (goodMap) {
831 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
832 GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
833
834 } else {
835 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMapGO);
836 GetOStream(Warnings0) << "Column map is not consistent with the row map\n"
837 << "using GO->LO conversion with performance penalty" << std::endl;
838 }
839 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
840
841 // Pull out the nullspace vectors so that we can have random access.
842 ArrayRCP<ArrayRCP<const SC> > fineNS(NSDim);
843 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
844 for (size_t i = 0; i < NSDim; i++) {
845 fineNS[i] = fineNullspace->getData(i);
846 if (coarseMap->getLocalNumElements() > 0)
847 coarseNS[i] = coarseNullspace->getDataNonConst(i);
848 }
849
850 size_t nnzEstimate = numRows * NSDim;
851
852 // Time to construct the matrix and fill in the values
853 Ptentative = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
854 RCP<CrsMatrix> PtentCrs = toCrsMatrix(Ptentative);
855
856 ArrayRCP<size_t> iaPtent;
857 ArrayRCP<LO> jaPtent;
858 ArrayRCP<SC> valPtent;
859
860 PtentCrs->allocateAllValues(nnzEstimate, iaPtent, jaPtent, valPtent);
861
862 ArrayView<size_t> ia = iaPtent();
863 ArrayView<LO> ja = jaPtent();
864 ArrayView<SC> val = valPtent();
865
866 ia[0] = 0;
867 for (size_t i = 1; i <= numRows; i++)
868 ia[i] = ia[i - 1] + NSDim;
869
870 for (size_t j = 0; j < nnzEstimate; j++) {
871 ja[j] = INVALID;
872 val[j] = zero;
873 }
874
875 if (doQRStep) {
877 // Standard aggregate-wise QR //
879 for (GO agg = 0; agg < numAggs; agg++) {
880 LO aggSize = aggStart[agg + 1] - aggStart[agg];
881
882 Xpetra::global_size_t offset = agg * NSDim;
883
884 // Extract the piece of the nullspace corresponding to the aggregate, and
885 // put it in the flat array, "localQR" (in column major format) for the
886 // QR routine.
887 Teuchos::SerialDenseMatrix<LO, SC> localQR(aggSize, NSDim);
888 if (goodMap) {
889 for (size_t j = 0; j < NSDim; j++)
890 for (LO k = 0; k < aggSize; k++)
891 localQR(k, j) = fineNS[j][aggToRowMapLO[aggStart[agg] + k]];
892 } else {
893 for (size_t j = 0; j < NSDim; j++)
894 for (LO k = 0; k < aggSize; k++)
895 localQR(k, j) = fineNS[j][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + k])];
896 }
897
898 // Test for zero columns
899 for (size_t j = 0; j < NSDim; j++) {
900 bool bIsZeroNSColumn = true;
901
902 for (LO k = 0; k < aggSize; k++)
903 if (localQR(k, j) != zero)
904 bIsZeroNSColumn = false;
905
906 TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError,
907 "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column in NS column " << j);
908 }
909
910 // Calculate QR decomposition (standard)
911 // NOTE: Q is stored in localQR and R is stored in coarseNS
912 if (aggSize >= Teuchos::as<LO>(NSDim)) {
913 if (NSDim == 1) {
914 // Only one nullspace vector, calculate Q and R by hand
915 Magnitude norm = STS::magnitude(zero);
916 for (size_t k = 0; k < Teuchos::as<size_t>(aggSize); k++)
917 norm += STS::magnitude(localQR(k, 0) * localQR(k, 0));
918 norm = Teuchos::ScalarTraits<Magnitude>::squareroot(norm);
919
920 // R = norm
921 coarseNS[0][offset] = norm;
922
923 // Q = localQR(:,0)/norm
924 for (LO i = 0; i < aggSize; i++)
925 localQR(i, 0) /= norm;
926
927 } else {
928 Teuchos::SerialQRDenseSolver<LO, SC> qrSolver;
929 qrSolver.setMatrix(Teuchos::rcp(&localQR, false));
930 qrSolver.factor();
931
932 // R = upper triangular part of localQR
933 for (size_t j = 0; j < NSDim; j++)
934 for (size_t k = 0; k <= j; k++)
935 coarseNS[j][offset + k] = localQR(k, j); // TODO is offset+k the correct local ID?!
936
937 // Calculate Q, the tentative prolongator.
938 // The Lapack GEQRF call only works for myAggsize >= NSDim
939 qrSolver.formQ();
940 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO, SC> > qFactor = qrSolver.getQ();
941 for (size_t j = 0; j < NSDim; j++)
942 for (size_t i = 0; i < Teuchos::as<size_t>(aggSize); i++)
943 localQR(i, j) = (*qFactor)(i, j);
944 }
945
946 } else {
947 // Special handling for aggSize < NSDim (i.e. single node aggregates in structural mechanics)
948
949 // The local QR decomposition is not possible in the "overconstrained"
950 // case (i.e. number of columns in localQR > number of rows), which
951 // corresponds to #DOFs in Aggregate < NSDim. For usual problems this
952 // is only possible for single node aggregates in structural mechanics.
953 // (Similar problems may arise in discontinuous Galerkin problems...)
954 // We bypass the QR decomposition and use an identity block in the
955 // tentative prolongator for the single node aggregate and transfer the
956 // corresponding fine level null space information 1-to-1 to the coarse
957 // level null space part.
958
959 // NOTE: The resulting tentative prolongation operator has
960 // (aggSize*DofsPerNode-NSDim) zero columns leading to a singular
961 // coarse level operator A. To deal with that one has the following
962 // options:
963 // - Use the "RepairMainDiagonal" flag in the RAPFactory (default:
964 // false) to add some identity block to the diagonal of the zero rows
965 // in the coarse level operator A, such that standard level smoothers
966 // can be used again.
967 // - Use special (projection-based) level smoothers, which can deal
968 // with singular matrices (very application specific)
969 // - Adapt the code below to avoid zero columns. However, we do not
970 // support a variable number of DOFs per node in MueLu/Xpetra which
971 // makes the implementation really hard.
972
973 // R = extended (by adding identity rows) localQR
974 for (size_t j = 0; j < NSDim; j++)
975 for (size_t k = 0; k < NSDim; k++)
976 if (k < as<size_t>(aggSize))
977 coarseNS[j][offset + k] = localQR(k, j);
978 else
979 coarseNS[j][offset + k] = (k == j ? one : zero);
980
981 // Q = I (rectangular)
982 for (size_t i = 0; i < as<size_t>(aggSize); i++)
983 for (size_t j = 0; j < NSDim; j++)
984 localQR(i, j) = (j == i ? one : zero);
985 }
986
987 // Process each row in the local Q factor
988 // FIXME: What happens if maps are blocked?
989 for (LO j = 0; j < aggSize; j++) {
990 LO localRow = (goodMap ? aggToRowMapLO[aggStart[agg] + j] : rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]));
991
992 size_t rowStart = ia[localRow];
993 for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
994 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
995 if (localQR(j, k) != zero) {
996 ja[rowStart + lnnz] = offset + k;
997 val[rowStart + lnnz] = localQR(j, k);
998 lnnz++;
999 }
1000 }
1001 }
1002 }
1003
1004 } else {
1005 GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
1006 if (NSDim > 1)
1007 GetOStream(Warnings0) << "TentativePFactory : for nontrivial nullspace, this may degrade performance" << std::endl;
1009 // "no-QR" option //
1011 // Local Q factor is just the fine nullspace support over the current aggregate.
1012 // Local R factor is the identity.
1013 // TODO I have not implemented any special handling for aggregates that are too
1014 // TODO small to locally support the nullspace, as is done in the standard QR
1015 // TODO case above.
1016 if (goodMap) {
1017 for (GO agg = 0; agg < numAggs; agg++) {
1018 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1019 Xpetra::global_size_t offset = agg * NSDim;
1020
1021 // Process each row in the local Q factor
1022 // FIXME: What happens if maps are blocked?
1023 for (LO j = 0; j < aggSize; j++) {
1024 // TODO Here I do not check for a zero nullspace column on the aggregate.
1025 // as is done in the standard QR case.
1026
1027 const LO localRow = aggToRowMapLO[aggStart[agg] + j];
1028
1029 const size_t rowStart = ia[localRow];
1030
1031 for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
1032 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1033 SC qr_jk = fineNS[k][aggToRowMapLO[aggStart[agg] + j]];
1034 if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1035 if (qr_jk != zero) {
1036 ja[rowStart + lnnz] = offset + k;
1037 val[rowStart + lnnz] = qr_jk;
1038 lnnz++;
1039 }
1040 }
1041 }
1042 for (size_t j = 0; j < NSDim; j++)
1043 coarseNS[j][offset + j] = one;
1044 } // for (GO agg = 0; agg < numAggs; agg++)
1045
1046 } else {
1047 for (GO agg = 0; agg < numAggs; agg++) {
1048 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1049 Xpetra::global_size_t offset = agg * NSDim;
1050 for (LO j = 0; j < aggSize; j++) {
1051 const LO localRow = rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]);
1052
1053 const size_t rowStart = ia[localRow];
1054
1055 for (size_t k = 0, lnnz = 0; k < NSDim; ++k) {
1056 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1057 SC qr_jk = fineNS[k][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j])];
1058 if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1059 if (qr_jk != zero) {
1060 ja[rowStart + lnnz] = offset + k;
1061 val[rowStart + lnnz] = qr_jk;
1062 lnnz++;
1063 }
1064 }
1065 }
1066 for (size_t j = 0; j < NSDim; j++)
1067 coarseNS[j][offset + j] = one;
1068 } // for (GO agg = 0; agg < numAggs; agg++)
1069
1070 } // if (goodmap) else ...
1071
1072 } // if doQRStep ... else
1073
1074 // Compress storage (remove all INVALID, which happen when we skip zeros)
1075 // We do that in-place
1076 size_t ia_tmp = 0, nnz = 0;
1077 for (size_t i = 0; i < numRows; i++) {
1078 for (size_t j = ia_tmp; j < ia[i + 1]; j++)
1079 if (ja[j] != INVALID) {
1080 ja[nnz] = ja[j];
1081 val[nnz] = val[j];
1082 nnz++;
1083 }
1084 ia_tmp = ia[i + 1];
1085 ia[i + 1] = nnz;
1086 }
1087 if (rowMap->lib() == Xpetra::UseTpetra) {
1088 // - Cannot resize for Epetra, as it checks for same pointers
1089 // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
1090 // NOTE: these invalidate ja and val views
1091 jaPtent.resize(nnz);
1092 valPtent.resize(nnz);
1093 }
1094
1095 GetOStream(Runtime1) << "TentativePFactory : aggregates do not cross process boundaries" << std::endl;
1096
1097 PtentCrs->setAllValues(iaPtent, jaPtent, valPtent);
1098
1099 // Managing labels & constants for ESFC
1100 RCP<ParameterList> FCparams;
1101 if (pL.isSublist("matrixmatrix: kernel params"))
1102 FCparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
1103 else
1104 FCparams = rcp(new ParameterList);
1105 // By default, we don't need global constants for TentativeP
1106 FCparams->set("compute global constants", FCparams->get("compute global constants", false));
1107 std::string levelIDs = toString(levelID);
1108 FCparams->set("Timer Label", std::string("MueLu::TentativeP-") + levelIDs);
1109 RCP<const Export> dummy_e;
1110 RCP<const Import> dummy_i;
1111
1112 PtentCrs->expertStaticFillComplete(coarseMap, A->getDomainMap(), dummy_i, dummy_e, FCparams);
1113}
1114
1115} // namespace MueLu
1116
1117// TODO ReUse: If only P or Nullspace is missing, TentativePFactory can be smart and skip part of the computation.
1118
1119#define MUELU_TENTATIVEPFACTORY_SHORT
1120#endif // MUELU_TENTATIVEPFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
int GetLevelID() const
Return level number.
static const NoFactory * get()
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void BuildPuncoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void BuildPuncoupledBlockCrs(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void BuildPcoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace) const
static bool MapsAreNested(const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &rowMap, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &colMap)
Namespace for MueLu classes and methods.
@ Warnings0
Important warning messages (one line)
@ Statistics2
Print even more statistics.
@ Runtime1
Description of what is happening (more verbose)
std::string toString(const T &what)
Little helper function to convert non-string types to strings.