MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_TentativePFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_TENTATIVEPFACTORY_DEF_HPP
11#define MUELU_TENTATIVEPFACTORY_DEF_HPP
12
13#include <Xpetra_MapFactory.hpp>
14#include <Xpetra_Map.hpp>
15#include <Xpetra_CrsMatrix.hpp>
16#include <Xpetra_CrsGraphFactory.hpp>
17#include <Xpetra_Matrix.hpp>
18#include <Xpetra_MatrixMatrix.hpp>
19#include <Xpetra_MultiVector.hpp>
20#include <Xpetra_MultiVectorFactory.hpp>
21#include <Xpetra_VectorFactory.hpp>
22#include <Xpetra_Import.hpp>
23#include <Xpetra_ImportFactory.hpp>
24#include <Xpetra_CrsMatrixWrap.hpp>
25#include <Xpetra_StridedMap.hpp>
26#include <Xpetra_StridedMapFactory.hpp>
27#include <Xpetra_IO.hpp>
28
29#include "Xpetra_TpetraBlockCrsMatrix.hpp"
30
32
33#include "MueLu_Aggregates.hpp"
34#include "MueLu_AmalgamationInfo.hpp"
35#include "MueLu_MasterList.hpp"
36#include "MueLu_Monitor.hpp"
37#include "MueLu_PerfUtils.hpp"
38#include "MueLu_Utilities.hpp"
39
40namespace MueLu {
41
42template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
44 RCP<ParameterList> validParamList = rcp(new ParameterList());
45
46#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
47 SET_VALID_ENTRY("tentative: calculate qr");
48 SET_VALID_ENTRY("tentative: build coarse coordinates");
49 SET_VALID_ENTRY("tentative: constant column sums");
50#undef SET_VALID_ENTRY
51 validParamList->set<std::string>("Nullspace name", "Nullspace", "Name for the input nullspace");
52
53 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
54 validParamList->set<RCP<const FactoryBase> >("Aggregates", Teuchos::null, "Generating factory of the aggregates");
55 validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
56 validParamList->set<RCP<const FactoryBase> >("Scaled Nullspace", Teuchos::null, "Generating factory of the scaled nullspace");
57 validParamList->set<RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
58 validParamList->set<RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the coarse map");
59 validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory of the coordinates");
60 validParamList->set<RCP<const FactoryBase> >("Node Comm", Teuchos::null, "Generating factory of the node level communicator");
61
62 // Make sure we don't recursively validate options for the matrixmatrix kernels
63 ParameterList norecurse;
64 norecurse.disableRecursiveValidation();
65 validParamList->set<ParameterList>("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
66
67 return validParamList;
68}
69
70template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
72 const ParameterList& pL = GetParameterList();
73 // NOTE: This guy can only either be 'Nullspace' or 'Scaled Nullspace' or else the validator above will cause issues
74 std::string nspName = "Nullspace";
75 if (pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
76
77 Input(fineLevel, "A");
78 Input(fineLevel, "Aggregates");
79 Input(fineLevel, nspName);
80 Input(fineLevel, "UnAmalgamationInfo");
81 Input(fineLevel, "CoarseMap");
82 if (fineLevel.GetLevelID() == 0 &&
83 fineLevel.IsAvailable("Coordinates", NoFactory::get()) && // we have coordinates (provided by user app)
84 pL.get<bool>("tentative: build coarse coordinates")) { // and we want coordinates on other levels
85 bTransferCoordinates_ = true; // then set the transfer coordinates flag to true
86 Input(fineLevel, "Coordinates");
87 } else if (bTransferCoordinates_) {
88 Input(fineLevel, "Coordinates");
89 }
90}
91
92template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
94 return BuildP(fineLevel, coarseLevel);
95}
96
97template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
99 FactoryMonitor m(*this, "Build", coarseLevel);
100 typedef typename Teuchos::ScalarTraits<Scalar>::coordinateType coordinate_type;
101 typedef Xpetra::MultiVector<coordinate_type, LO, GO, NO> RealValuedMultiVector;
102 typedef Xpetra::MultiVectorFactory<coordinate_type, LO, GO, NO> RealValuedMultiVectorFactory;
103
104 const ParameterList& pL = GetParameterList();
105 std::string nspName = "Nullspace";
106 if (pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
107
108 RCP<Matrix> Ptentative;
109 RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
110 RCP<Aggregates> aggregates = Get<RCP<Aggregates> >(fineLevel, "Aggregates");
111 // No coarse DoFs so we need to bail by setting Ptentattive to null and returning
112 // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
113 if (aggregates->GetNumGlobalAggregatesComputeIfNeeded() == 0) {
114 Ptentative = Teuchos::null;
115 Set(coarseLevel, "P", Ptentative);
116 return;
117 }
118
119 RCP<AmalgamationInfo> amalgInfo = Get<RCP<AmalgamationInfo> >(fineLevel, "UnAmalgamationInfo");
120 RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, nspName);
121 RCP<const Map> coarseMap = Get<RCP<const Map> >(fineLevel, "CoarseMap");
122 RCP<RealValuedMultiVector> fineCoords;
123 if (bTransferCoordinates_) {
124 fineCoords = Get<RCP<RealValuedMultiVector> >(fineLevel, "Coordinates");
125 }
126
127 // FIXME: We should remove the NodeComm on levels past the threshold
128 if (fineLevel.IsAvailable("Node Comm")) {
129 RCP<const Teuchos::Comm<int> > nodeComm = Get<RCP<const Teuchos::Comm<int> > >(fineLevel, "Node Comm");
130 Set<RCP<const Teuchos::Comm<int> > >(coarseLevel, "Node Comm", nodeComm);
131 }
132
133 // NOTE: We check DomainMap here rather than RowMap because those are different for BlockCrs matrices
134 TEUCHOS_TEST_FOR_EXCEPTION(A->getDomainMap()->getLocalNumElements() != fineNullspace->getMap()->getLocalNumElements(),
135 Exceptions::RuntimeError, "MueLu::TentativePFactory::MakeTentative: Size mismatch between A and Nullspace");
136
137 RCP<MultiVector> coarseNullspace;
138 RCP<RealValuedMultiVector> coarseCoords;
139
140 if (bTransferCoordinates_) {
141 //*** Create the coarse coordinates ***
142 // First create the coarse map and coarse multivector
143 ArrayView<const GO> elementAList = coarseMap->getLocalElementList();
144 LO blkSize = 1;
145 if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null) {
146 blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize();
147 }
148 GO indexBase = coarseMap->getIndexBase();
149 LO numCoarseNodes = Teuchos::as<LO>(elementAList.size() / blkSize);
150 Array<GO> nodeList(numCoarseNodes);
151 const int numDimensions = fineCoords->getNumVectors();
152
153 for (LO i = 0; i < numCoarseNodes; i++) {
154 nodeList[i] = (elementAList[i * blkSize] - indexBase) / blkSize + indexBase;
155 }
156 Teuchos::RCP<Teuchos::ParameterList> params = Teuchos::rcp(new Teuchos::ParameterList());
157 params->set("compute global constants", false);
158 RCP<const Map> coarseCoordsMap = MapFactory::Build(fineCoords->getMap()->lib(),
159 blkSize * coarseMap->getGlobalNumElements(),
160 nodeList,
161 indexBase,
162 fineCoords->getMap()->getComm(),
163 params);
164 coarseCoords = RealValuedMultiVectorFactory::Build(coarseCoordsMap, numDimensions);
165
166 // Create overlapped fine coordinates to reduce global communication
167 RCP<RealValuedMultiVector> ghostedCoords;
168 if (aggregates->AggregatesCrossProcessors()) {
169 RCP<const Map> aggMap = aggregates->GetMap();
170 RCP<const Import> importer = ImportFactory::Build(fineCoords->getMap(), aggMap);
171
172 ghostedCoords = RealValuedMultiVectorFactory::Build(aggMap, numDimensions);
173 ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);
174 } else {
175 ghostedCoords = fineCoords;
176 }
177
178 // Get some info about aggregates
179 int myPID = coarseCoordsMap->getComm()->getRank();
180 LO numAggs = aggregates->GetNumAggregates();
181 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
182 const ArrayRCP<const LO> vertex2AggID = aggregates->GetVertex2AggId()->getData(0);
183 const ArrayRCP<const LO> procWinner = aggregates->GetProcWinner()->getData(0);
184
185 // Fill in coarse coordinates
186 for (int dim = 0; dim < numDimensions; ++dim) {
187 ArrayRCP<const coordinate_type> fineCoordsData = ghostedCoords->getData(dim);
188 ArrayRCP<coordinate_type> coarseCoordsData = coarseCoords->getDataNonConst(dim);
189
190 for (LO lnode = 0; lnode < Teuchos::as<LO>(vertex2AggID.size()); lnode++) {
191 if (procWinner[lnode] == myPID &&
192 lnode < fineCoordsData.size() &&
193 vertex2AggID[lnode] < coarseCoordsData.size() &&
194 Teuchos::ScalarTraits<coordinate_type>::isnaninf(fineCoordsData[lnode]) == false) {
195 coarseCoordsData[vertex2AggID[lnode]] += fineCoordsData[lnode];
196 }
197 }
198 for (LO agg = 0; agg < numAggs; agg++) {
199 coarseCoordsData[agg] /= aggSizes[agg];
200 }
201 }
202 }
203
204 if (!aggregates->AggregatesCrossProcessors()) {
205 if (Xpetra::Helpers<SC, LO, GO, NO>::isTpetraBlockCrs(A)) {
206 BuildPuncoupledBlockCrs(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.GetLevelID());
207 } else {
208 BuildPuncoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.GetLevelID());
209 }
210 } else
211 BuildPcoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
212
213 // If available, use striding information of fine level matrix A for range
214 // map and coarseMap as domain map; otherwise use plain range map of
215 // Ptent = plain range map of A for range map and coarseMap as domain map.
216 // NOTE:
217 // The latter is not really safe, since there is no striding information
218 // for the range map. This is not really a problem, since striding
219 // information is always available on the intermedium levels and the
220 // coarsest levels.
221 if (A->IsView("stridedMaps") == true)
222 Ptentative->CreateView("stridedMaps", A->getRowMap("stridedMaps"), coarseMap);
223 else
224 Ptentative->CreateView("stridedMaps", Ptentative->getRangeMap(), coarseMap);
225
226 if (bTransferCoordinates_) {
227 Set(coarseLevel, "Coordinates", coarseCoords);
228 }
229 Set(coarseLevel, "Nullspace", coarseNullspace);
230 Set(coarseLevel, "P", Ptentative);
231
232 if (IsPrint(Statistics2)) {
233 RCP<ParameterList> params = rcp(new ParameterList());
234 params->set("printLoadBalancingInfo", true);
235 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ptentative, "Ptent", params);
236 }
237}
238
239template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
241 BuildPuncoupledBlockCrs(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
242 RCP<const Map> coarsePointMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
243 /* This routine generates a BlockCrs P for a BlockCrs A. There are a few assumptions here, which meet the use cases we care about, but could
244 be generalized later, if we ever need to do so:
245 1) Null space dimension === block size of matrix: So no elasticity right now
246 2) QR is not supported: Under assumption #1, this shouldn't cause problems.
247 3) Maps are "good": Aka the first chunk of the ColMap is the RowMap.
248
249 These assumptions keep our code way simpler and still support the use cases we actually care about.
250 */
251
252 RCP<const Map> rowMap = A->getRowMap();
253 RCP<const Map> rangeMap = A->getRangeMap();
254 RCP<const Map> colMap = A->getColMap();
255 // const size_t numFinePointRows = rangeMap->getLocalNumElements();
256 const size_t numFineBlockRows = rowMap->getLocalNumElements();
257
258 typedef Teuchos::ScalarTraits<SC> STS;
259 // typedef typename STS::magnitudeType Magnitude;
260 const SC zero = STS::zero();
261 const SC one = STS::one();
262 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
263
264 const GO numAggs = aggregates->GetNumAggregates();
265 const size_t NSDim = fineNullspace->getNumVectors();
266 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
267
268 // Need to generate the coarse block map
269 // NOTE: We assume NSDim == block size here
270 // NOTE: We also assume that coarseMap has contiguous GIDs
271 // const size_t numCoarsePointRows = coarsePointMap->getLocalNumElements();
272 const size_t numCoarseBlockRows = coarsePointMap->getLocalNumElements() / NSDim;
273 RCP<const Map> coarseBlockMap = MapFactory::Build(coarsePointMap->lib(),
274 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
275 numCoarseBlockRows,
276 coarsePointMap->getIndexBase(),
277 coarsePointMap->getComm());
278 // Sanity checking
279 const ParameterList& pL = GetParameterList();
280 const bool& doQRStep = pL.get<bool>("tentative: calculate qr");
281 const bool& constantColSums = pL.get<bool>("tentative: constant column sums");
282
283 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums, Exceptions::RuntimeError,
284 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
285
286 // The aggregates use the amalgamated column map, which in this case is what we want
287
288 // Aggregates map is based on the amalgamated column map
289 // We can skip global-to-local conversion if LIDs in row map are
290 // same as LIDs in column map
291 bool goodMap = MueLu::Utilities<SC, LO, GO, NO>::MapsAreNested(*rowMap, *colMap);
292
293 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
294 // aggStart is a pointer into aggToRowMapLO
295 // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
296 // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
297 ArrayRCP<LO> aggStart;
298 ArrayRCP<LO> aggToRowMapLO;
299 ArrayRCP<GO> aggToRowMapGO;
300 if (goodMap) {
301 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
302 GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
303 } else {
304 throw std::runtime_error("TentativePFactory::PuncoupledBlockCrs: Inconsistent maps not currently supported");
305 }
306
307 coarseNullspace = MultiVectorFactory::Build(coarsePointMap, NSDim);
308
309 // Pull out the nullspace vectors so that we can have random access.
310 ArrayRCP<ArrayRCP<const SC> > fineNS(NSDim);
311 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
312 for (size_t i = 0; i < NSDim; i++) {
313 fineNS[i] = fineNullspace->getData(i);
314 if (coarsePointMap->getLocalNumElements() > 0)
315 coarseNS[i] = coarseNullspace->getDataNonConst(i);
316 }
317
318 // BlockCrs requires that we build the (block) graph first, so let's do that...
319 // NOTE: Because we're assuming that the NSDim == BlockSize, we only have one
320 // block non-zero per row in the matrix;
321 RCP<CrsGraph> BlockGraph = CrsGraphFactory::Build(rowMap, coarseBlockMap, 0);
322 ArrayRCP<size_t> iaPtent;
323 ArrayRCP<LO> jaPtent;
324 BlockGraph->allocateAllIndices(numFineBlockRows, iaPtent, jaPtent);
325 ArrayView<size_t> ia = iaPtent();
326 ArrayView<LO> ja = jaPtent();
327
328 for (size_t i = 0; i < numFineBlockRows; i++) {
329 ia[i] = i;
330 ja[i] = INVALID;
331 }
332 ia[numCoarseBlockRows] = numCoarseBlockRows;
333
334 for (GO agg = 0; agg < numAggs; agg++) {
335 LO aggSize = aggStart[agg + 1] - aggStart[agg];
336 Xpetra::global_size_t offset = agg;
337
338 for (LO j = 0; j < aggSize; j++) {
339 // FIXME: Allow for bad maps
340 const LO localRow = aggToRowMapLO[aggStart[agg] + j];
341 const size_t rowStart = ia[localRow];
342 ja[rowStart] = offset;
343 }
344 }
345
346 // Compress storage (remove all INVALID, which happen when we skip zeros)
347 // We do that in-place
348 size_t ia_tmp = 0, nnz = 0;
349 for (size_t i = 0; i < numFineBlockRows; i++) {
350 for (size_t j = ia_tmp; j < ia[i + 1]; j++)
351 if (ja[j] != INVALID) {
352 ja[nnz] = ja[j];
353 nnz++;
354 }
355 ia_tmp = ia[i + 1];
356 ia[i + 1] = nnz;
357 }
358
359 if (rowMap->lib() == Xpetra::UseTpetra) {
360 // - Cannot resize for Epetra, as it checks for same pointers
361 // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
362 // NOTE: these invalidate ja and val views
363 jaPtent.resize(nnz);
364 }
365
366 GetOStream(Runtime1) << "TentativePFactory : generating block graph" << std::endl;
367 BlockGraph->setAllIndices(iaPtent, jaPtent);
368
369 // Managing labels & constants for ESFC
370 {
371 RCP<ParameterList> FCparams;
372 if (pL.isSublist("matrixmatrix: kernel params"))
373 FCparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
374 else
375 FCparams = rcp(new ParameterList);
376 // By default, we don't need global constants for TentativeP, but we do want it for the graph
377 // if we're printing statistics, so let's leave it on for now.
378 FCparams->set("compute global constants", FCparams->get("compute global constants", true));
379 std::string levelIDs = toString(levelID);
380 FCparams->set("Timer Label", std::string("MueLu::TentativeP-") + levelIDs);
381 RCP<const Export> dummy_e;
382 RCP<const Import> dummy_i;
383 BlockGraph->expertStaticFillComplete(coarseBlockMap, rowMap, dummy_i, dummy_e, FCparams);
384 }
385
386 // Now let's make a BlockCrs Matrix
387 // NOTE: Assumes block size== NSDim
388 RCP<Xpetra::CrsMatrix<SC, LO, GO, NO> > P_xpetra = Xpetra::CrsMatrixFactory<SC, LO, GO, NO>::BuildBlock(BlockGraph, coarsePointMap, rangeMap, NSDim);
389 RCP<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO> > P_tpetra = rcp_dynamic_cast<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO> >(P_xpetra);
390 if (P_tpetra.is_null()) throw std::runtime_error("BuildPUncoupled: Matrix factory did not return a Tpetra::BlockCrsMatrix");
391 RCP<CrsMatrixWrap> P_wrap = rcp(new CrsMatrixWrap(P_xpetra));
392
394 // "no-QR" option //
396 // Local Q factor is just the fine nullspace support over the current aggregate.
397 // Local R factor is the identity.
398 // NOTE: We're not going to do a QR here as we're assuming that blocksize == NSDim
399 // NOTE: "goodMap" case only
400 Teuchos::Array<Scalar> block(NSDim * NSDim, zero);
401 Teuchos::Array<LO> bcol(1);
402
403 GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
404 for (LO agg = 0; agg < numAggs; agg++) {
405 bcol[0] = agg;
406 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
407 Xpetra::global_size_t offset = agg * NSDim;
408
409 // Process each row in the local Q factor
410 // NOTE: Blocks are in row-major order
411 for (LO j = 0; j < aggSize; j++) {
412 const LO localBlockRow = aggToRowMapLO[aggStart[agg] + j];
413
414 for (size_t r = 0; r < NSDim; r++) {
415 LO localPointRow = localBlockRow * NSDim + r;
416 for (size_t c = 0; c < NSDim; c++)
417 block[r * NSDim + c] = fineNS[c][localPointRow];
418 }
419 // NOTE: Assumes columns==aggs and are ordered sequentially
420 P_tpetra->replaceLocalValues(localBlockRow, bcol(), block());
421
422 } // end aggSize
423
424 for (size_t j = 0; j < NSDim; j++)
425 coarseNS[j][offset + j] = one;
426
427 } // for (GO agg = 0; agg < numAggs; agg++)
428
429 Ptentative = P_wrap;
430}
431
432template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
434 BuildPcoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
435 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace) const {
436 typedef Teuchos::ScalarTraits<SC> STS;
437 typedef typename STS::magnitudeType Magnitude;
438 const SC zero = STS::zero();
439 const SC one = STS::one();
440
441 // number of aggregates
442 GO numAggs = aggregates->GetNumAggregates();
443
444 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
445 // aggStart is a pointer into aggToRowMap
446 // aggStart[i]..aggStart[i+1] are indices into aggToRowMap
447 // aggToRowMap[aggStart[i]]..aggToRowMap[aggStart[i+1]-1] are the DOFs in aggregate i
448 ArrayRCP<LO> aggStart;
449 ArrayRCP<GO> aggToRowMap;
450 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMap);
451
452 // find size of the largest aggregate
453 LO maxAggSize = 0;
454 for (GO i = 0; i < numAggs; ++i) {
455 LO sizeOfThisAgg = aggStart[i + 1] - aggStart[i];
456 if (sizeOfThisAgg > maxAggSize) maxAggSize = sizeOfThisAgg;
457 }
458
459 // dimension of fine level nullspace
460 const size_t NSDim = fineNullspace->getNumVectors();
461
462 // index base for coarse Dof map (usually 0)
463 GO indexBase = A->getRowMap()->getIndexBase();
464
465 const RCP<const Map> nonUniqueMap = amalgInfo->ComputeUnamalgamatedImportDofMap(*aggregates);
466 const RCP<const Map> uniqueMap = A->getDomainMap();
467 RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
468 RCP<MultiVector> fineNullspaceWithOverlap = MultiVectorFactory::Build(nonUniqueMap, NSDim);
469 fineNullspaceWithOverlap->doImport(*fineNullspace, *importer, Xpetra::INSERT);
470
471 // Pull out the nullspace vectors so that we can have random access.
472 ArrayRCP<ArrayRCP<const SC> > fineNS(NSDim);
473 for (size_t i = 0; i < NSDim; ++i)
474 fineNS[i] = fineNullspaceWithOverlap->getData(i);
475
476 // Allocate storage for the coarse nullspace.
477 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
478
479 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
480 for (size_t i = 0; i < NSDim; ++i)
481 if (coarseMap->getLocalNumElements() > 0) coarseNS[i] = coarseNullspace->getDataNonConst(i);
482
483 // This makes the rowmap of Ptent the same as that of A->
484 // This requires moving some parts of some local Q's to other processors
485 // because aggregates can span processors.
486 RCP<const Map> rowMapForPtent = A->getRowMap();
487 const Map& rowMapForPtentRef = *rowMapForPtent;
488
489 // Set up storage for the rows of the local Qs that belong to other processors.
490 // FIXME This is inefficient and could be done within the main loop below with std::vector's.
491 RCP<const Map> colMap = A->getColMap();
492
493 RCP<const Map> ghostQMap;
494 RCP<MultiVector> ghostQvalues;
495 Array<RCP<Xpetra::Vector<GO, LO, GO, Node> > > ghostQcolumns;
496 RCP<Xpetra::Vector<GO, LO, GO, Node> > ghostQrowNums;
497 ArrayRCP<ArrayRCP<SC> > ghostQvals;
498 ArrayRCP<ArrayRCP<GO> > ghostQcols;
499 ArrayRCP<GO> ghostQrows;
500
501 Array<GO> ghostGIDs;
502 for (LO j = 0; j < numAggs; ++j) {
503 for (LO k = aggStart[j]; k < aggStart[j + 1]; ++k) {
504 if (rowMapForPtentRef.isNodeGlobalElement(aggToRowMap[k]) == false) {
505 ghostGIDs.push_back(aggToRowMap[k]);
506 }
507 }
508 }
509 ghostQMap = MapFactory::Build(A->getRowMap()->lib(),
510 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
511 ghostGIDs,
512 indexBase, A->getRowMap()->getComm()); // JG:Xpetra::global_size_t>?
513 // Vector to hold bits of Q that go to other processors.
514 ghostQvalues = MultiVectorFactory::Build(ghostQMap, NSDim);
515 // Note that Epetra does not support MultiVectors templated on Scalar != double.
516 // So to work around this, we allocate an array of Vectors. This shouldn't be too
517 // expensive, as the number of Vectors is NSDim.
518 ghostQcolumns.resize(NSDim);
519 for (size_t i = 0; i < NSDim; ++i)
520 ghostQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
521 ghostQrowNums = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
522 if (ghostQvalues->getLocalLength() > 0) {
523 ghostQvals.resize(NSDim);
524 ghostQcols.resize(NSDim);
525 for (size_t i = 0; i < NSDim; ++i) {
526 ghostQvals[i] = ghostQvalues->getDataNonConst(i);
527 ghostQcols[i] = ghostQcolumns[i]->getDataNonConst(0);
528 }
529 ghostQrows = ghostQrowNums->getDataNonConst(0);
530 }
531
532 // importer to handle moving Q
533 importer = ImportFactory::Build(ghostQMap, A->getRowMap());
534
535 // Dense QR solver
536 Teuchos::SerialQRDenseSolver<LO, SC> qrSolver;
537
538 // Allocate temporary storage for the tentative prolongator.
539 Array<GO> globalColPtr(maxAggSize * NSDim, 0);
540 Array<LO> localColPtr(maxAggSize * NSDim, 0);
541 Array<SC> valPtr(maxAggSize * NSDim, 0.);
542
543 // Create column map for Ptent, estimate local #nonzeros in Ptent, and create Ptent itself.
544 const Map& coarseMapRef = *coarseMap;
545
546 // For the 3-arrays constructor
547 ArrayRCP<size_t> ptent_rowptr;
548 ArrayRCP<LO> ptent_colind;
549 ArrayRCP<Scalar> ptent_values;
550
551 // Because ArrayRCPs are slow...
552 ArrayView<size_t> rowptr_v;
553 ArrayView<LO> colind_v;
554 ArrayView<Scalar> values_v;
555
556 // For temporary usage
557 Array<size_t> rowptr_temp;
558 Array<LO> colind_temp;
559 Array<Scalar> values_temp;
560
561 RCP<CrsMatrix> PtentCrs;
562
563 RCP<CrsMatrixWrap> PtentCrsWrap = rcp(new CrsMatrixWrap(rowMapForPtent, NSDim));
564 PtentCrs = PtentCrsWrap->getCrsMatrix();
565 Ptentative = PtentCrsWrap;
566
567 //*****************************************************************
568 // Loop over all aggregates and calculate local QR decompositions.
569 //*****************************************************************
570 GO qctr = 0; // for indexing into Ptent data vectors
571 const Map& nonUniqueMapRef = *nonUniqueMap;
572
573 size_t total_nnz_count = 0;
574
575 for (GO agg = 0; agg < numAggs; ++agg) {
576 LO myAggSize = aggStart[agg + 1] - aggStart[agg];
577 // For each aggregate, extract the corresponding piece of the nullspace and put it in the flat array,
578 // "localQR" (in column major format) for the QR routine.
579 Teuchos::SerialDenseMatrix<LO, SC> localQR(myAggSize, NSDim);
580 for (size_t j = 0; j < NSDim; ++j) {
581 bool bIsZeroNSColumn = true;
582 for (LO k = 0; k < myAggSize; ++k) {
583 // aggToRowMap[aggPtr[i]+k] is the kth DOF in the ith aggregate
584 // fineNS[j][n] is the nth entry in the jth NS vector
585 try {
586 SC nsVal = fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])]; // extract information from fine level NS
587 localQR(k, j) = nsVal;
588 if (nsVal != zero) bIsZeroNSColumn = false;
589 } catch (...) {
590 GetOStream(Runtime1, -1) << "length of fine level nsp: " << fineNullspace->getGlobalLength() << std::endl;
591 GetOStream(Runtime1, -1) << "length of fine level nsp w overlap: " << fineNullspaceWithOverlap->getGlobalLength() << std::endl;
592 GetOStream(Runtime1, -1) << "(local?) aggId=" << agg << std::endl;
593 GetOStream(Runtime1, -1) << "aggSize=" << myAggSize << std::endl;
594 GetOStream(Runtime1, -1) << "agg DOF=" << k << std::endl;
595 GetOStream(Runtime1, -1) << "NS vector j=" << j << std::endl;
596 GetOStream(Runtime1, -1) << "j*myAggSize + k = " << j * myAggSize + k << std::endl;
597 GetOStream(Runtime1, -1) << "aggToRowMap[" << agg << "][" << k << "] = " << aggToRowMap[aggStart[agg] + k] << std::endl;
598 GetOStream(Runtime1, -1) << "id aggToRowMap[agg][k]=" << aggToRowMap[aggStart[agg] + k] << " is global element in nonUniqueMap = " << nonUniqueMapRef.isNodeGlobalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
599 GetOStream(Runtime1, -1) << "colMap local id aggToRowMap[agg][k]=" << nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
600 GetOStream(Runtime1, -1) << "fineNS...=" << fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])] << std::endl;
601 GetOStream(Errors, -1) << "caught an error!" << std::endl;
602 }
603 } // for (LO k=0 ...
604 TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError, "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column. Error.");
605 } // for (LO j=0 ...
606
607 Xpetra::global_size_t offset = agg * NSDim;
608
609 if (myAggSize >= Teuchos::as<LocalOrdinal>(NSDim)) {
610 // calculate QR decomposition (standard)
611 // R is stored in localQR (size: myAggSize x NSDim)
612
613 // Householder multiplier
614 SC tau = localQR(0, 0);
615
616 if (NSDim == 1) {
617 // Only one nullspace vector, so normalize by hand
618 Magnitude dtemp = 0;
619 for (size_t k = 0; k < Teuchos::as<size_t>(myAggSize); ++k) {
620 Magnitude tmag = STS::magnitude(localQR(k, 0));
621 dtemp += tmag * tmag;
622 }
623 dtemp = Teuchos::ScalarTraits<Magnitude>::squareroot(dtemp);
624 tau = localQR(0, 0);
625 localQR(0, 0) = dtemp;
626 } else {
627 qrSolver.setMatrix(Teuchos::rcp(&localQR, false));
628 qrSolver.factor();
629 }
630
631 // Extract R, the coarse nullspace. This is stored in upper triangular part of localQR.
632 // Note: coarseNS[i][.] is the ith coarse nullspace vector, which may be counter to your intuition.
633 // This stores the (offset+k)th entry only if it is local according to the coarseMap.
634 for (size_t j = 0; j < NSDim; ++j) {
635 for (size_t k = 0; k <= j; ++k) {
636 try {
637 if (coarseMapRef.isNodeLocalElement(offset + k)) {
638 coarseNS[j][offset + k] = localQR(k, j); // TODO is offset+k the correct local ID?!
639 }
640 } catch (...) {
641 GetOStream(Errors, -1) << "caught error in coarseNS insert, j=" << j << ", offset+k = " << offset + k << std::endl;
642 }
643 }
644 }
645
646 // Calculate Q, the tentative prolongator.
647 // The Lapack GEQRF call only works for myAggsize >= NSDim
648
649 if (NSDim == 1) {
650 // Only one nullspace vector, so calculate Q by hand
651 Magnitude dtemp = Teuchos::ScalarTraits<SC>::magnitude(localQR(0, 0));
652 localQR(0, 0) = tau;
653 dtemp = 1 / dtemp;
654 for (LocalOrdinal i = 0; i < myAggSize; ++i) {
655 localQR(i, 0) *= dtemp;
656 }
657 } else {
658 qrSolver.formQ();
659 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO, SC> > qFactor = qrSolver.getQ();
660 for (size_t j = 0; j < NSDim; j++) {
661 for (size_t i = 0; i < Teuchos::as<size_t>(myAggSize); i++) {
662 localQR(i, j) = (*qFactor)(i, j);
663 }
664 }
665 }
666
667 // end default case (myAggSize >= NSDim)
668 } else { // special handling for myAggSize < NSDim (i.e. 1pt nodes)
669 // See comments for the uncoupled case
670
671 // R = extended (by adding identity rows) localQR
672 for (size_t j = 0; j < NSDim; j++)
673 for (size_t k = 0; k < NSDim; k++) {
674 TEUCHOS_TEST_FOR_EXCEPTION(!coarseMapRef.isNodeLocalElement(offset + k), Exceptions::RuntimeError,
675 "Caught error in coarseNS insert, j=" << j << ", offset+k = " << offset + k);
676
677 if (k < as<size_t>(myAggSize))
678 coarseNS[j][offset + k] = localQR(k, j);
679 else
680 coarseNS[j][offset + k] = (k == j ? one : zero);
681 }
682
683 // Q = I (rectangular)
684 for (size_t i = 0; i < as<size_t>(myAggSize); i++)
685 for (size_t j = 0; j < NSDim; j++)
686 localQR(i, j) = (j == i ? one : zero);
687 } // end else (special handling for 1pt aggregates)
688
689 // Process each row in the local Q factor. If the row is local to the current processor
690 // according to the rowmap, insert it into Ptentative. Otherwise, save it in ghostQ
691 // to be communicated later to the owning processor.
692 // FIXME -- what happens if maps are blocked?
693 for (GO j = 0; j < myAggSize; ++j) {
694 // This loop checks whether row associated with current DOF is local, according to rowMapForPtent.
695 // If it is, the row is inserted. If not, the row number, columns, and values are saved in
696 // MultiVectors that will be sent to other processors.
697 GO globalRow = aggToRowMap[aggStart[agg] + j];
698
699 // TODO is the use of Xpetra::global_size_t below correct?
700 if (rowMapForPtentRef.isNodeGlobalElement(globalRow) == false) {
701 ghostQrows[qctr] = globalRow;
702 for (size_t k = 0; k < NSDim; ++k) {
703 ghostQcols[k][qctr] = coarseMapRef.getGlobalElement(agg * NSDim + k);
704 ghostQvals[k][qctr] = localQR(j, k);
705 }
706 ++qctr;
707 } else {
708 size_t nnz = 0;
709 for (size_t k = 0; k < NSDim; ++k) {
710 try {
711 if (localQR(j, k) != Teuchos::ScalarTraits<SC>::zero()) {
712 localColPtr[nnz] = agg * NSDim + k;
713 globalColPtr[nnz] = coarseMapRef.getGlobalElement(localColPtr[nnz]);
714 valPtr[nnz] = localQR(j, k);
715 ++total_nnz_count;
716 ++nnz;
717 }
718 } catch (...) {
719 GetOStream(Errors, -1) << "caught error in colPtr/valPtr insert, current index=" << nnz << std::endl;
720 }
721 } // for (size_t k=0; k<NSDim; ++k)
722
723 try {
724 Ptentative->insertGlobalValues(globalRow, globalColPtr.view(0, nnz), valPtr.view(0, nnz));
725 } catch (...) {
726 GetOStream(Errors, -1) << "pid " << A->getRowMap()->getComm()->getRank()
727 << "caught error during Ptent row insertion, global row "
728 << globalRow << std::endl;
729 }
730 }
731 } // for (GO j=0; j<myAggSize; ++j)
732
733 } // for (LO agg=0; agg<numAggs; ++agg)
734
735 // ***********************************************************
736 // ************* end of aggregate-wise QR ********************
737 // ***********************************************************
738 GetOStream(Runtime1) << "TentativePFactory : aggregates may cross process boundaries" << std::endl;
739 // Import ghost parts of Q factors and insert into Ptentative.
740 // First import just the global row numbers.
741 RCP<Xpetra::Vector<GO, LO, GO, Node> > targetQrowNums = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(rowMapForPtent);
742 targetQrowNums->putScalar(-1);
743 targetQrowNums->doImport(*ghostQrowNums, *importer, Xpetra::INSERT);
744 ArrayRCP<GO> targetQrows = targetQrowNums->getDataNonConst(0);
745
746 // Now create map based on just the row numbers imported.
747 Array<GO> gidsToImport;
748 gidsToImport.reserve(targetQrows.size());
749 for (typename ArrayRCP<GO>::iterator r = targetQrows.begin(); r != targetQrows.end(); ++r) {
750 if (*r > -1) {
751 gidsToImport.push_back(*r);
752 }
753 }
754 RCP<const Map> reducedMap = MapFactory::Build(A->getRowMap()->lib(),
755 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
756 gidsToImport, indexBase, A->getRowMap()->getComm());
757
758 // Import using the row numbers that this processor will receive.
759 importer = ImportFactory::Build(ghostQMap, reducedMap);
760
761 Array<RCP<Xpetra::Vector<GO, LO, GO, Node> > > targetQcolumns(NSDim);
762 for (size_t i = 0; i < NSDim; ++i) {
763 targetQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(reducedMap);
764 targetQcolumns[i]->doImport(*(ghostQcolumns[i]), *importer, Xpetra::INSERT);
765 }
766 RCP<MultiVector> targetQvalues = MultiVectorFactory::Build(reducedMap, NSDim);
767 targetQvalues->doImport(*ghostQvalues, *importer, Xpetra::INSERT);
768
769 ArrayRCP<ArrayRCP<SC> > targetQvals;
770 ArrayRCP<ArrayRCP<GO> > targetQcols;
771 if (targetQvalues->getLocalLength() > 0) {
772 targetQvals.resize(NSDim);
773 targetQcols.resize(NSDim);
774 for (size_t i = 0; i < NSDim; ++i) {
775 targetQvals[i] = targetQvalues->getDataNonConst(i);
776 targetQcols[i] = targetQcolumns[i]->getDataNonConst(0);
777 }
778 }
779
780 valPtr = Array<SC>(NSDim, 0.);
781 globalColPtr = Array<GO>(NSDim, 0);
782 for (typename Array<GO>::iterator r = gidsToImport.begin(); r != gidsToImport.end(); ++r) {
783 if (targetQvalues->getLocalLength() > 0) {
784 for (size_t j = 0; j < NSDim; ++j) {
785 valPtr[j] = targetQvals[j][reducedMap->getLocalElement(*r)];
786 globalColPtr[j] = targetQcols[j][reducedMap->getLocalElement(*r)];
787 }
788 Ptentative->insertGlobalValues(*r, globalColPtr.view(0, NSDim), valPtr.view(0, NSDim));
789 } // if (targetQvalues->getLocalLength() > 0)
790 }
791
792 Ptentative->fillComplete(coarseMap, A->getDomainMap());
793}
794
795template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
797 BuildPuncoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
798 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
799 RCP<const Map> rowMap = A->getRowMap();
800 RCP<const Map> colMap = A->getColMap();
801 const size_t numRows = rowMap->getLocalNumElements();
802
803 typedef Teuchos::ScalarTraits<SC> STS;
804 typedef typename STS::magnitudeType Magnitude;
805 const SC zero = STS::zero();
806 const SC one = STS::one();
807 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
808
809 const GO numAggs = aggregates->GetNumAggregates();
810 const size_t NSDim = fineNullspace->getNumVectors();
811 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
812
813 // Sanity checking
814 const ParameterList& pL = GetParameterList();
815 const bool& doQRStep = pL.get<bool>("tentative: calculate qr");
816 const bool& constantColSums = pL.get<bool>("tentative: constant column sums");
817
818 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums, Exceptions::RuntimeError,
819 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
820
821 // Aggregates map is based on the amalgamated column map
822 // We can skip global-to-local conversion if LIDs in row map are
823 // same as LIDs in column map
824 bool goodMap = MueLu::Utilities<SC, LO, GO, NO>::MapsAreNested(*rowMap, *colMap);
825
826 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
827 // aggStart is a pointer into aggToRowMapLO
828 // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
829 // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
830 ArrayRCP<LO> aggStart;
831 ArrayRCP<LO> aggToRowMapLO;
832 ArrayRCP<GO> aggToRowMapGO;
833 if (goodMap) {
834 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
835 GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
836
837 } else {
838 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMapGO);
839 GetOStream(Warnings0) << "Column map is not consistent with the row map\n"
840 << "using GO->LO conversion with performance penalty" << std::endl;
841 }
842 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
843
844 // Pull out the nullspace vectors so that we can have random access.
845 ArrayRCP<ArrayRCP<const SC> > fineNS(NSDim);
846 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
847 for (size_t i = 0; i < NSDim; i++) {
848 fineNS[i] = fineNullspace->getData(i);
849 if (coarseMap->getLocalNumElements() > 0)
850 coarseNS[i] = coarseNullspace->getDataNonConst(i);
851 }
852
853 size_t nnzEstimate = numRows * NSDim;
854
855 // Time to construct the matrix and fill in the values
856 Ptentative = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
857 RCP<CrsMatrix> PtentCrs = toCrsMatrix(Ptentative);
858
859 ArrayRCP<size_t> iaPtent;
860 ArrayRCP<LO> jaPtent;
861 ArrayRCP<SC> valPtent;
862
863 PtentCrs->allocateAllValues(nnzEstimate, iaPtent, jaPtent, valPtent);
864
865 ArrayView<size_t> ia = iaPtent();
866 ArrayView<LO> ja = jaPtent();
867 ArrayView<SC> val = valPtent();
868
869 ia[0] = 0;
870 for (size_t i = 1; i <= numRows; i++)
871 ia[i] = ia[i - 1] + NSDim;
872
873 for (size_t j = 0; j < nnzEstimate; j++) {
874 ja[j] = INVALID;
875 val[j] = zero;
876 }
877
878 if (doQRStep) {
880 // Standard aggregate-wise QR //
882 for (GO agg = 0; agg < numAggs; agg++) {
883 LO aggSize = aggStart[agg + 1] - aggStart[agg];
884
885 Xpetra::global_size_t offset = agg * NSDim;
886
887 // Extract the piece of the nullspace corresponding to the aggregate, and
888 // put it in the flat array, "localQR" (in column major format) for the
889 // QR routine.
890 Teuchos::SerialDenseMatrix<LO, SC> localQR(aggSize, NSDim);
891 if (goodMap) {
892 for (size_t j = 0; j < NSDim; j++)
893 for (LO k = 0; k < aggSize; k++)
894 localQR(k, j) = fineNS[j][aggToRowMapLO[aggStart[agg] + k]];
895 } else {
896 for (size_t j = 0; j < NSDim; j++)
897 for (LO k = 0; k < aggSize; k++)
898 localQR(k, j) = fineNS[j][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + k])];
899 }
900
901 // Test for zero columns
902 for (size_t j = 0; j < NSDim; j++) {
903 bool bIsZeroNSColumn = true;
904
905 for (LO k = 0; k < aggSize; k++)
906 if (localQR(k, j) != zero)
907 bIsZeroNSColumn = false;
908
909 TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError,
910 "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column in NS column " << j);
911 }
912
913 // Calculate QR decomposition (standard)
914 // NOTE: Q is stored in localQR and R is stored in coarseNS
915 if (aggSize >= Teuchos::as<LO>(NSDim)) {
916 if (NSDim == 1) {
917 // Only one nullspace vector, calculate Q and R by hand
918 Magnitude norm = STS::magnitude(zero);
919 for (size_t k = 0; k < Teuchos::as<size_t>(aggSize); k++)
920 norm += STS::magnitude(localQR(k, 0) * localQR(k, 0));
921 norm = Teuchos::ScalarTraits<Magnitude>::squareroot(norm);
922
923 // R = norm
924 coarseNS[0][offset] = norm;
925
926 // Q = localQR(:,0)/norm
927 for (LO i = 0; i < aggSize; i++)
928 localQR(i, 0) /= norm;
929
930 } else {
931 Teuchos::SerialQRDenseSolver<LO, SC> qrSolver;
932 qrSolver.setMatrix(Teuchos::rcp(&localQR, false));
933 qrSolver.factor();
934
935 // R = upper triangular part of localQR
936 for (size_t j = 0; j < NSDim; j++)
937 for (size_t k = 0; k <= j; k++)
938 coarseNS[j][offset + k] = localQR(k, j); // TODO is offset+k the correct local ID?!
939
940 // Calculate Q, the tentative prolongator.
941 // The Lapack GEQRF call only works for myAggsize >= NSDim
942 qrSolver.formQ();
943 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO, SC> > qFactor = qrSolver.getQ();
944 for (size_t j = 0; j < NSDim; j++)
945 for (size_t i = 0; i < Teuchos::as<size_t>(aggSize); i++)
946 localQR(i, j) = (*qFactor)(i, j);
947 }
948
949 } else {
950 // Special handling for aggSize < NSDim (i.e. single node aggregates in structural mechanics)
951
952 // The local QR decomposition is not possible in the "overconstrained"
953 // case (i.e. number of columns in localQR > number of rows), which
954 // corresponds to #DOFs in Aggregate < NSDim. For usual problems this
955 // is only possible for single node aggregates in structural mechanics.
956 // (Similar problems may arise in discontinuous Galerkin problems...)
957 // We bypass the QR decomposition and use an identity block in the
958 // tentative prolongator for the single node aggregate and transfer the
959 // corresponding fine level null space information 1-to-1 to the coarse
960 // level null space part.
961
962 // NOTE: The resulting tentative prolongation operator has
963 // (aggSize*DofsPerNode-NSDim) zero columns leading to a singular
964 // coarse level operator A. To deal with that one has the following
965 // options:
966 // - Use the "RepairMainDiagonal" flag in the RAPFactory (default:
967 // false) to add some identity block to the diagonal of the zero rows
968 // in the coarse level operator A, such that standard level smoothers
969 // can be used again.
970 // - Use special (projection-based) level smoothers, which can deal
971 // with singular matrices (very application specific)
972 // - Adapt the code below to avoid zero columns. However, we do not
973 // support a variable number of DOFs per node in MueLu/Xpetra which
974 // makes the implementation really hard.
975
976 // R = extended (by adding identity rows) localQR
977 for (size_t j = 0; j < NSDim; j++)
978 for (size_t k = 0; k < NSDim; k++)
979 if (k < as<size_t>(aggSize))
980 coarseNS[j][offset + k] = localQR(k, j);
981 else
982 coarseNS[j][offset + k] = (k == j ? one : zero);
983
984 // Q = I (rectangular)
985 for (size_t i = 0; i < as<size_t>(aggSize); i++)
986 for (size_t j = 0; j < NSDim; j++)
987 localQR(i, j) = (j == i ? one : zero);
988 }
989
990 // Process each row in the local Q factor
991 // FIXME: What happens if maps are blocked?
992 for (LO j = 0; j < aggSize; j++) {
993 LO localRow = (goodMap ? aggToRowMapLO[aggStart[agg] + j] : rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]));
994
995 size_t rowStart = ia[localRow];
996 for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
997 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
998 if (localQR(j, k) != zero) {
999 ja[rowStart + lnnz] = offset + k;
1000 val[rowStart + lnnz] = localQR(j, k);
1001 lnnz++;
1002 }
1003 }
1004 }
1005 }
1006
1007 } else {
1008 GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
1009 if (NSDim > 1)
1010 GetOStream(Warnings0) << "TentativePFactory : for nontrivial nullspace, this may degrade performance" << std::endl;
1012 // "no-QR" option //
1014 // Local Q factor is just the fine nullspace support over the current aggregate.
1015 // Local R factor is the identity.
1016 // TODO I have not implemented any special handling for aggregates that are too
1017 // TODO small to locally support the nullspace, as is done in the standard QR
1018 // TODO case above.
1019 if (goodMap) {
1020 for (GO agg = 0; agg < numAggs; agg++) {
1021 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1022 Xpetra::global_size_t offset = agg * NSDim;
1023
1024 // Process each row in the local Q factor
1025 // FIXME: What happens if maps are blocked?
1026 for (LO j = 0; j < aggSize; j++) {
1027 // TODO Here I do not check for a zero nullspace column on the aggregate.
1028 // as is done in the standard QR case.
1029
1030 const LO localRow = aggToRowMapLO[aggStart[agg] + j];
1031
1032 const size_t rowStart = ia[localRow];
1033
1034 for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
1035 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1036 SC qr_jk = fineNS[k][aggToRowMapLO[aggStart[agg] + j]];
1037 if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1038 if (qr_jk != zero) {
1039 ja[rowStart + lnnz] = offset + k;
1040 val[rowStart + lnnz] = qr_jk;
1041 lnnz++;
1042 }
1043 }
1044 }
1045 for (size_t j = 0; j < NSDim; j++)
1046 coarseNS[j][offset + j] = one;
1047 } // for (GO agg = 0; agg < numAggs; agg++)
1048
1049 } else {
1050 for (GO agg = 0; agg < numAggs; agg++) {
1051 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1052 Xpetra::global_size_t offset = agg * NSDim;
1053 for (LO j = 0; j < aggSize; j++) {
1054 const LO localRow = rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]);
1055
1056 const size_t rowStart = ia[localRow];
1057
1058 for (size_t k = 0, lnnz = 0; k < NSDim; ++k) {
1059 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1060 SC qr_jk = fineNS[k][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j])];
1061 if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1062 if (qr_jk != zero) {
1063 ja[rowStart + lnnz] = offset + k;
1064 val[rowStart + lnnz] = qr_jk;
1065 lnnz++;
1066 }
1067 }
1068 }
1069 for (size_t j = 0; j < NSDim; j++)
1070 coarseNS[j][offset + j] = one;
1071 } // for (GO agg = 0; agg < numAggs; agg++)
1072
1073 } // if (goodmap) else ...
1074
1075 } // if doQRStep ... else
1076
1077 // Compress storage (remove all INVALID, which happen when we skip zeros)
1078 // We do that in-place
1079 size_t ia_tmp = 0, nnz = 0;
1080 for (size_t i = 0; i < numRows; i++) {
1081 for (size_t j = ia_tmp; j < ia[i + 1]; j++)
1082 if (ja[j] != INVALID) {
1083 ja[nnz] = ja[j];
1084 val[nnz] = val[j];
1085 nnz++;
1086 }
1087 ia_tmp = ia[i + 1];
1088 ia[i + 1] = nnz;
1089 }
1090 if (rowMap->lib() == Xpetra::UseTpetra) {
1091 // - Cannot resize for Epetra, as it checks for same pointers
1092 // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
1093 // NOTE: these invalidate ja and val views
1094 jaPtent.resize(nnz);
1095 valPtent.resize(nnz);
1096 }
1097
1098 GetOStream(Runtime1) << "TentativePFactory : aggregates do not cross process boundaries" << std::endl;
1099
1100 PtentCrs->setAllValues(iaPtent, jaPtent, valPtent);
1101
1102 // Managing labels & constants for ESFC
1103 RCP<ParameterList> FCparams;
1104 if (pL.isSublist("matrixmatrix: kernel params"))
1105 FCparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
1106 else
1107 FCparams = rcp(new ParameterList);
1108 // By default, we don't need global constants for TentativeP
1109 FCparams->set("compute global constants", FCparams->get("compute global constants", false));
1110 std::string levelIDs = toString(levelID);
1111 FCparams->set("Timer Label", std::string("MueLu::TentativeP-") + levelIDs);
1112 RCP<const Export> dummy_e;
1113 RCP<const Import> dummy_i;
1114
1115 PtentCrs->expertStaticFillComplete(coarseMap, A->getDomainMap(), dummy_i, dummy_e, FCparams);
1116}
1117
1118} // namespace MueLu
1119
1120// TODO ReUse: If only P or Nullspace is missing, TentativePFactory can be smart and skip part of the computation.
1121
1122#define MUELU_TENTATIVEPFACTORY_SHORT
1123#endif // MUELU_TENTATIVEPFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
int GetLevelID() const
Return level number.
static const NoFactory * get()
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void BuildPuncoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void BuildPuncoupledBlockCrs(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void BuildPcoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace) const
static bool MapsAreNested(const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &rowMap, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &colMap)
Namespace for MueLu classes and methods.
@ Warnings0
Important warning messages (one line)
@ Statistics2
Print even more statistics.
@ Runtime1
Description of what is happening (more verbose)
std::string toString(const T &what)
Little helper function to convert non-string types to strings.