102 typedef typename Teuchos::ScalarTraits<Scalar>::coordinateType coordinate_type;
103 typedef Xpetra::MultiVector<coordinate_type, LO, GO, NO> RealValuedMultiVector;
104 typedef Xpetra::MultiVectorFactory<coordinate_type, LO, GO, NO> RealValuedMultiVectorFactory;
106 const ParameterList& pL = GetParameterList();
107 std::string nspName =
"Nullspace";
108 if (pL.isParameter(
"Nullspace name")) nspName = pL.get<std::string>(
"Nullspace name");
110 RCP<Matrix> Ptentative;
111 RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel,
"A");
112 RCP<Aggregates> aggregates = Get<RCP<Aggregates> >(fineLevel,
"Aggregates");
115 if (aggregates->GetNumGlobalAggregatesComputeIfNeeded() == 0) {
116 Ptentative = Teuchos::null;
117 Set(coarseLevel,
"P", Ptentative);
121 RCP<AmalgamationInfo> amalgInfo = Get<RCP<AmalgamationInfo> >(fineLevel,
"UnAmalgamationInfo");
122 RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, nspName);
123 RCP<const Map> coarseMap = Get<RCP<const Map> >(fineLevel,
"CoarseMap");
124 RCP<RealValuedMultiVector> fineCoords;
125 if (bTransferCoordinates_) {
126 fineCoords = Get<RCP<RealValuedMultiVector> >(fineLevel,
"Coordinates");
131 RCP<const Teuchos::Comm<int> > nodeComm = Get<RCP<const Teuchos::Comm<int> > >(fineLevel,
"Node Comm");
132 Set<RCP<const Teuchos::Comm<int> > >(coarseLevel,
"Node Comm", nodeComm);
136 TEUCHOS_TEST_FOR_EXCEPTION(A->getDomainMap()->getLocalNumElements() != fineNullspace->getMap()->getLocalNumElements(),
139 RCP<MultiVector> coarseNullspace;
140 RCP<RealValuedMultiVector> coarseCoords;
142 if (bTransferCoordinates_) {
145 ArrayView<const GO> elementAList = coarseMap->getLocalElementList();
147 if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null) {
148 blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize();
150 GO indexBase = coarseMap->getIndexBase();
151 LO numCoarseNodes = Teuchos::as<LO>(elementAList.size() / blkSize);
152 Array<GO> nodeList(numCoarseNodes);
153 const int numDimensions = fineCoords->getNumVectors();
155 for (LO i = 0; i < numCoarseNodes; i++) {
156 nodeList[i] = (elementAList[i * blkSize] - indexBase) / blkSize + indexBase;
158 Teuchos::RCP<Teuchos::ParameterList> params = Teuchos::rcp(
new Teuchos::ParameterList());
159 params->set(
"compute global constants",
false);
160 RCP<const Map> coarseCoordsMap = MapFactory::Build(fineCoords->getMap()->lib(),
161 blkSize * coarseMap->getGlobalNumElements(),
164 fineCoords->getMap()->getComm(),
166 coarseCoords = RealValuedMultiVectorFactory::Build(coarseCoordsMap, numDimensions);
169 RCP<RealValuedMultiVector> ghostedCoords;
170 if (aggregates->AggregatesCrossProcessors()) {
171 RCP<const Map> aggMap = aggregates->GetMap();
172 RCP<const Import> importer = ImportFactory::Build(fineCoords->getMap(), aggMap);
174 ghostedCoords = RealValuedMultiVectorFactory::Build(aggMap, numDimensions);
175 ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);
177 ghostedCoords = fineCoords;
181 int myPID = coarseCoordsMap->getComm()->getRank();
182 LO numAggs = aggregates->GetNumAggregates();
183 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
184 const ArrayRCP<const LO> vertex2AggID = aggregates->GetVertex2AggId()->getData(0);
185 const ArrayRCP<const LO> procWinner = aggregates->GetProcWinner()->getData(0);
188 for (
int dim = 0; dim < numDimensions; ++dim) {
189 ArrayRCP<const coordinate_type> fineCoordsData = ghostedCoords->getData(dim);
190 ArrayRCP<coordinate_type> coarseCoordsData = coarseCoords->getDataNonConst(dim);
192 for (LO lnode = 0; lnode < Teuchos::as<LO>(vertex2AggID.size()); lnode++) {
193 if (procWinner[lnode] == myPID &&
194 lnode < fineCoordsData.size() &&
195 vertex2AggID[lnode] < coarseCoordsData.size() &&
196 Teuchos::ScalarTraits<coordinate_type>::isnaninf(fineCoordsData[lnode]) ==
false) {
197 coarseCoordsData[vertex2AggID[lnode]] += fineCoordsData[lnode];
200 for (LO agg = 0; agg < numAggs; agg++) {
201 coarseCoordsData[agg] /= aggSizes[agg];
206 if (!aggregates->AggregatesCrossProcessors()) {
207 if (Xpetra::Helpers<SC, LO, GO, NO>::isTpetraBlockCrs(A)) {
208 BuildPuncoupledBlockCrs(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.
GetLevelID());
210 BuildPuncoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.
GetLevelID());
213 BuildPcoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
223 if (A->IsView(
"stridedMaps") ==
true)
224 Ptentative->CreateView(
"stridedMaps", A->getRowMap(
"stridedMaps"), coarseMap);
226 if (bTransferCoordinates_) {
227 Set(coarseLevel,
"Coordinates", coarseCoords);
229 Set(coarseLevel,
"Nullspace", coarseNullspace);
230 Set(coarseLevel,
"P", Ptentative);
232 if (pL.get<
bool>(
"sa: keep tentative prolongator")) {
238 RCP<ParameterList> params = rcp(
new ParameterList());
239 params->set(
"printLoadBalancingInfo",
true);
246 BuildPuncoupledBlockCrs(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
247 RCP<const Map> coarsePointMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace,
const int levelID)
const {
257 RCP<const Map> rowMap = A->getRowMap();
258 RCP<const Map> rangeMap = A->getRangeMap();
259 RCP<const Map> colMap = A->getColMap();
261 const size_t numFineBlockRows = rowMap->getLocalNumElements();
263 typedef Teuchos::ScalarTraits<SC> STS;
265 const SC zero = STS::zero();
266 const SC one = STS::one();
267 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
269 const GO numAggs = aggregates->GetNumAggregates();
270 const size_t NSDim = fineNullspace->getNumVectors();
271 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
277 const size_t numCoarseBlockRows = coarsePointMap->getLocalNumElements() / NSDim;
278 RCP<const Map> coarseBlockMap = MapFactory::Build(coarsePointMap->lib(),
279 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
281 coarsePointMap->getIndexBase(),
282 coarsePointMap->getComm());
284 const ParameterList& pL = GetParameterList();
285 const bool&
doQRStep = pL.get<
bool>(
"tentative: calculate qr");
286 const bool& constantColSums = pL.get<
bool>(
"tentative: constant column sums");
289 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
302 ArrayRCP<LO> aggStart;
303 ArrayRCP<LO> aggToRowMapLO;
304 ArrayRCP<GO> aggToRowMapGO;
306 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
307 GetOStream(
Runtime1) <<
"Column map is consistent with the row map, good." << std::endl;
309 throw std::runtime_error(
"TentativePFactory::PuncoupledBlockCrs: Inconsistent maps not currently supported");
312 coarseNullspace = MultiVectorFactory::Build(coarsePointMap, NSDim);
315 ArrayRCP<ArrayRCP<const SC> >
fineNS(NSDim);
316 ArrayRCP<ArrayRCP<SC> >
coarseNS(NSDim);
317 for (
size_t i = 0; i < NSDim; i++) {
318 fineNS[i] = fineNullspace->getData(i);
319 if (coarsePointMap->getLocalNumElements() > 0)
320 coarseNS[i] = coarseNullspace->getDataNonConst(i);
326 RCP<CrsGraph> BlockGraph = CrsGraphFactory::Build(rowMap, coarseBlockMap, 0);
327 ArrayRCP<size_t> iaPtent;
328 ArrayRCP<LO> jaPtent;
329 BlockGraph->allocateAllIndices(numFineBlockRows, iaPtent, jaPtent);
330 ArrayView<size_t> ia = iaPtent();
331 ArrayView<LO> ja = jaPtent();
333 for (
size_t i = 0; i < numFineBlockRows; i++) {
337 ia[numCoarseBlockRows] = numCoarseBlockRows;
339 for (GO agg = 0; agg < numAggs; agg++) {
340 LO aggSize = aggStart[agg + 1] - aggStart[agg];
341 Xpetra::global_size_t offset = agg;
343 for (LO j = 0; j < aggSize; j++) {
345 const LO localRow = aggToRowMapLO[aggStart[agg] + j];
346 const size_t rowStart = ia[localRow];
347 ja[rowStart] = offset;
353 size_t ia_tmp = 0, nnz = 0;
354 for (
size_t i = 0; i < numFineBlockRows; i++) {
355 for (
size_t j = ia_tmp; j < ia[i + 1]; j++)
356 if (ja[j] != INVALID) {
364 if (rowMap->lib() == Xpetra::UseTpetra) {
371 GetOStream(
Runtime1) <<
"TentativePFactory : generating block graph" << std::endl;
372 BlockGraph->setAllIndices(iaPtent, jaPtent);
376 RCP<ParameterList> FCparams;
377 if (pL.isSublist(
"matrixmatrix: kernel params"))
378 FCparams = rcp(
new ParameterList(pL.sublist(
"matrixmatrix: kernel params")));
380 FCparams = rcp(
new ParameterList);
383 FCparams->set(
"compute global constants", FCparams->get(
"compute global constants",
true));
384 std::string levelIDs =
toString(levelID);
385 FCparams->set(
"Timer Label", std::string(
"MueLu::TentativeP-") + levelIDs);
386 RCP<const Export> dummy_e;
387 RCP<const Import> dummy_i;
388 BlockGraph->expertStaticFillComplete(coarseBlockMap, rowMap, dummy_i, dummy_e, FCparams);
393 RCP<Xpetra::CrsMatrix<SC, LO, GO, NO> > P_xpetra = Xpetra::CrsMatrixFactory<SC, LO, GO, NO>::BuildBlock(BlockGraph, coarsePointMap, rangeMap, NSDim);
394 RCP<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO> > P_tpetra = rcp_dynamic_cast<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO> >(P_xpetra);
395 if (P_tpetra.is_null())
throw std::runtime_error(
"BuildPUncoupled: Matrix factory did not return a Tpetra::BlockCrsMatrix");
396 RCP<CrsMatrixWrap> P_wrap = rcp(
new CrsMatrixWrap(P_xpetra));
405 Teuchos::Array<Scalar> block(NSDim * NSDim, zero);
406 Teuchos::Array<LO> bcol(1);
408 GetOStream(
Runtime1) <<
"TentativePFactory : bypassing local QR phase" << std::endl;
409 for (LO agg = 0; agg < numAggs; agg++) {
411 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
412 Xpetra::global_size_t offset = agg * NSDim;
416 for (LO j = 0; j < aggSize; j++) {
417 const LO localBlockRow = aggToRowMapLO[aggStart[agg] + j];
419 for (
size_t r = 0; r < NSDim; r++) {
420 LO localPointRow = localBlockRow * NSDim + r;
421 for (
size_t c = 0; c < NSDim; c++)
422 block[r * NSDim + c] =
fineNS[c][localPointRow];
425 P_tpetra->replaceLocalValues(localBlockRow, bcol(), block());
429 for (
size_t j = 0; j < NSDim; j++)
439 BuildPcoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
440 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace)
const {
441 typedef Teuchos::ScalarTraits<SC> STS;
442 typedef typename STS::magnitudeType Magnitude;
443 const SC zero = STS::zero();
444 const SC one = STS::one();
447 GO numAggs = aggregates->GetNumAggregates();
453 ArrayRCP<LO> aggStart;
454 ArrayRCP<GO> aggToRowMap;
455 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMap);
459 for (GO i = 0; i < numAggs; ++i) {
460 LO sizeOfThisAgg = aggStart[i + 1] - aggStart[i];
461 if (sizeOfThisAgg > maxAggSize) maxAggSize = sizeOfThisAgg;
465 const size_t NSDim = fineNullspace->getNumVectors();
468 GO indexBase = A->getRowMap()->getIndexBase();
470 const RCP<const Map> nonUniqueMap = amalgInfo->ComputeUnamalgamatedImportDofMap(*aggregates);
471 const RCP<const Map> uniqueMap = A->getDomainMap();
472 RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
473 RCP<MultiVector> fineNullspaceWithOverlap = MultiVectorFactory::Build(nonUniqueMap, NSDim);
474 fineNullspaceWithOverlap->doImport(*fineNullspace, *importer, Xpetra::INSERT);
477 ArrayRCP<ArrayRCP<const SC> >
fineNS(NSDim);
478 for (
size_t i = 0; i < NSDim; ++i)
479 fineNS[i] = fineNullspaceWithOverlap->getData(i);
482 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
484 ArrayRCP<ArrayRCP<SC> >
coarseNS(NSDim);
485 for (
size_t i = 0; i < NSDim; ++i)
486 if (coarseMap->getLocalNumElements() > 0)
coarseNS[i] = coarseNullspace->getDataNonConst(i);
491 RCP<const Map> rowMapForPtent = A->getRowMap();
492 const Map& rowMapForPtentRef = *rowMapForPtent;
496 RCP<const Map> colMap = A->getColMap();
498 RCP<const Map> ghostQMap;
499 RCP<MultiVector> ghostQvalues;
500 Array<RCP<Xpetra::Vector<GO, LO, GO, Node> > > ghostQcolumns;
501 RCP<Xpetra::Vector<GO, LO, GO, Node> > ghostQrowNums;
502 ArrayRCP<ArrayRCP<SC> > ghostQvals;
503 ArrayRCP<ArrayRCP<GO> > ghostQcols;
504 ArrayRCP<GO> ghostQrows;
507 for (LO j = 0; j < numAggs; ++j) {
508 for (LO k = aggStart[j]; k < aggStart[j + 1]; ++k) {
509 if (rowMapForPtentRef.isNodeGlobalElement(aggToRowMap[k]) ==
false) {
510 ghostGIDs.push_back(aggToRowMap[k]);
514 ghostQMap = MapFactory::Build(A->getRowMap()->lib(),
515 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
517 indexBase, A->getRowMap()->getComm());
519 ghostQvalues = MultiVectorFactory::Build(ghostQMap, NSDim);
523 ghostQcolumns.resize(NSDim);
524 for (
size_t i = 0; i < NSDim; ++i)
525 ghostQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
526 ghostQrowNums = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
527 if (ghostQvalues->getLocalLength() > 0) {
528 ghostQvals.resize(NSDim);
529 ghostQcols.resize(NSDim);
530 for (
size_t i = 0; i < NSDim; ++i) {
531 ghostQvals[i] = ghostQvalues->getDataNonConst(i);
532 ghostQcols[i] = ghostQcolumns[i]->getDataNonConst(0);
534 ghostQrows = ghostQrowNums->getDataNonConst(0);
538 importer = ImportFactory::Build(ghostQMap, A->getRowMap());
541 Teuchos::SerialQRDenseSolver<LO, SC> qrSolver;
544 Array<GO> globalColPtr(maxAggSize * NSDim, 0);
545 Array<LO> localColPtr(maxAggSize * NSDim, 0);
546 Array<SC> valPtr(maxAggSize * NSDim, 0.);
549 const Map& coarseMapRef = *coarseMap;
552 ArrayRCP<size_t> ptent_rowptr;
553 ArrayRCP<LO> ptent_colind;
554 ArrayRCP<Scalar> ptent_values;
557 ArrayView<size_t> rowptr_v;
558 ArrayView<LO> colind_v;
559 ArrayView<Scalar> values_v;
562 Array<size_t> rowptr_temp;
563 Array<LO> colind_temp;
564 Array<Scalar> values_temp;
566 RCP<CrsMatrix> PtentCrs;
568 RCP<CrsMatrixWrap> PtentCrsWrap = rcp(
new CrsMatrixWrap(rowMapForPtent, NSDim));
569 PtentCrs = PtentCrsWrap->getCrsMatrix();
570 Ptentative = PtentCrsWrap;
576 const Map& nonUniqueMapRef = *nonUniqueMap;
578 size_t total_nnz_count = 0;
580 for (GO agg = 0; agg < numAggs; ++agg) {
581 LO myAggSize = aggStart[agg + 1] - aggStart[agg];
584 Teuchos::SerialDenseMatrix<LO, SC> localQR(myAggSize, NSDim);
585 for (
size_t j = 0; j < NSDim; ++j) {
586 bool bIsZeroNSColumn =
true;
587 for (LO k = 0; k < myAggSize; ++k) {
591 SC nsVal =
fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])];
592 localQR(k, j) = nsVal;
593 if (nsVal != zero) bIsZeroNSColumn =
false;
595 GetOStream(
Runtime1, -1) <<
"length of fine level nsp: " << fineNullspace->getGlobalLength() << std::endl;
596 GetOStream(
Runtime1, -1) <<
"length of fine level nsp w overlap: " << fineNullspaceWithOverlap->getGlobalLength() << std::endl;
597 GetOStream(
Runtime1, -1) <<
"(local?) aggId=" << agg << std::endl;
598 GetOStream(
Runtime1, -1) <<
"aggSize=" << myAggSize << std::endl;
599 GetOStream(
Runtime1, -1) <<
"agg DOF=" << k << std::endl;
600 GetOStream(
Runtime1, -1) <<
"NS vector j=" << j << std::endl;
601 GetOStream(
Runtime1, -1) <<
"j*myAggSize + k = " << j * myAggSize + k << std::endl;
602 GetOStream(
Runtime1, -1) <<
"aggToRowMap[" << agg <<
"][" << k <<
"] = " << aggToRowMap[aggStart[agg] + k] << std::endl;
603 GetOStream(
Runtime1, -1) <<
"id aggToRowMap[agg][k]=" << aggToRowMap[aggStart[agg] + k] <<
" is global element in nonUniqueMap = " << nonUniqueMapRef.isNodeGlobalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
604 GetOStream(
Runtime1, -1) <<
"colMap local id aggToRowMap[agg][k]=" << nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
605 GetOStream(
Runtime1, -1) <<
"fineNS...=" <<
fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])] << std::endl;
606 GetOStream(
Errors, -1) <<
"caught an error!" << std::endl;
609 TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn ==
true,
Exceptions::RuntimeError,
"MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column. Error.");
612 Xpetra::global_size_t offset = agg * NSDim;
614 if (myAggSize >= Teuchos::as<LocalOrdinal>(NSDim)) {
619 SC tau = localQR(0, 0);
624 for (
size_t k = 0; k < Teuchos::as<size_t>(myAggSize); ++k) {
625 Magnitude tmag = STS::magnitude(localQR(k, 0));
626 dtemp += tmag * tmag;
628 dtemp = Teuchos::ScalarTraits<Magnitude>::squareroot(dtemp);
630 localQR(0, 0) = dtemp;
632 qrSolver.setMatrix(Teuchos::rcp(&localQR,
false));
639 for (
size_t j = 0; j < NSDim; ++j) {
640 for (
size_t k = 0; k <= j; ++k) {
642 if (coarseMapRef.isNodeLocalElement(offset + k)) {
643 coarseNS[j][offset + k] = localQR(k, j);
646 GetOStream(
Errors, -1) <<
"caught error in coarseNS insert, j=" << j <<
", offset+k = " << offset + k << std::endl;
656 Magnitude dtemp = Teuchos::ScalarTraits<SC>::magnitude(localQR(0, 0));
660 localQR(i, 0) *= dtemp;
664 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO, SC> > qFactor = qrSolver.getQ();
665 for (
size_t j = 0; j < NSDim; j++) {
666 for (
size_t i = 0; i < Teuchos::as<size_t>(myAggSize); i++) {
667 localQR(i, j) = (*qFactor)(i, j);
677 for (
size_t j = 0; j < NSDim; j++)
678 for (
size_t k = 0; k < NSDim; k++) {
680 "Caught error in coarseNS insert, j=" << j <<
", offset+k = " << offset + k);
682 if (k < as<size_t>(myAggSize))
683 coarseNS[j][offset + k] = localQR(k, j);
685 coarseNS[j][offset + k] = (k == j ? one : zero);
689 for (
size_t i = 0; i < as<size_t>(myAggSize); i++)
690 for (
size_t j = 0; j < NSDim; j++)
691 localQR(i, j) = (j == i ? one : zero);
698 for (GO j = 0; j < myAggSize; ++j) {
702 GO globalRow = aggToRowMap[aggStart[agg] + j];
705 if (rowMapForPtentRef.isNodeGlobalElement(globalRow) ==
false) {
706 ghostQrows[qctr] = globalRow;
707 for (
size_t k = 0; k < NSDim; ++k) {
708 ghostQcols[k][qctr] = coarseMapRef.getGlobalElement(agg * NSDim + k);
709 ghostQvals[k][qctr] = localQR(j, k);
714 for (
size_t k = 0; k < NSDim; ++k) {
716 if (localQR(j, k) != Teuchos::ScalarTraits<SC>::zero()) {
717 localColPtr[nnz] = agg * NSDim + k;
718 globalColPtr[nnz] = coarseMapRef.getGlobalElement(localColPtr[nnz]);
719 valPtr[nnz] = localQR(j, k);
724 GetOStream(
Errors, -1) <<
"caught error in colPtr/valPtr insert, current index=" << nnz << std::endl;
729 Ptentative->insertGlobalValues(globalRow, globalColPtr.view(0, nnz), valPtr.view(0, nnz));
731 GetOStream(
Errors, -1) <<
"pid " << A->getRowMap()->getComm()->getRank()
732 <<
"caught error during Ptent row insertion, global row "
733 << globalRow << std::endl;
743 GetOStream(
Runtime1) <<
"TentativePFactory : aggregates may cross process boundaries" << std::endl;
746 RCP<Xpetra::Vector<GO, LO, GO, Node> > targetQrowNums = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(rowMapForPtent);
747 targetQrowNums->putScalar(-1);
748 targetQrowNums->doImport(*ghostQrowNums, *importer, Xpetra::INSERT);
749 ArrayRCP<GO> targetQrows = targetQrowNums->getDataNonConst(0);
752 Array<GO> gidsToImport;
753 gidsToImport.reserve(targetQrows.size());
754 for (
typename ArrayRCP<GO>::iterator r = targetQrows.begin(); r != targetQrows.end(); ++r) {
756 gidsToImport.push_back(*r);
759 RCP<const Map> reducedMap = MapFactory::Build(A->getRowMap()->lib(),
760 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
761 gidsToImport, indexBase, A->getRowMap()->getComm());
764 importer = ImportFactory::Build(ghostQMap, reducedMap);
766 Array<RCP<Xpetra::Vector<GO, LO, GO, Node> > > targetQcolumns(NSDim);
767 for (
size_t i = 0; i < NSDim; ++i) {
768 targetQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(reducedMap);
769 targetQcolumns[i]->doImport(*(ghostQcolumns[i]), *importer, Xpetra::INSERT);
771 RCP<MultiVector> targetQvalues = MultiVectorFactory::Build(reducedMap, NSDim);
772 targetQvalues->doImport(*ghostQvalues, *importer, Xpetra::INSERT);
774 ArrayRCP<ArrayRCP<SC> > targetQvals;
775 ArrayRCP<ArrayRCP<GO> > targetQcols;
776 if (targetQvalues->getLocalLength() > 0) {
777 targetQvals.resize(NSDim);
778 targetQcols.resize(NSDim);
779 for (
size_t i = 0; i < NSDim; ++i) {
780 targetQvals[i] = targetQvalues->getDataNonConst(i);
781 targetQcols[i] = targetQcolumns[i]->getDataNonConst(0);
785 valPtr = Array<SC>(NSDim, 0.);
786 globalColPtr = Array<GO>(NSDim, 0);
787 for (
typename Array<GO>::iterator r = gidsToImport.begin(); r != gidsToImport.end(); ++r) {
788 if (targetQvalues->getLocalLength() > 0) {
789 for (
size_t j = 0; j < NSDim; ++j) {
790 valPtr[j] = targetQvals[j][reducedMap->getLocalElement(*r)];
791 globalColPtr[j] = targetQcols[j][reducedMap->getLocalElement(*r)];
793 Ptentative->insertGlobalValues(*r, globalColPtr.view(0, NSDim), valPtr.view(0, NSDim));
797 Ptentative->fillComplete(coarseMap, A->getDomainMap());
802 BuildPuncoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
803 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace,
const int levelID)
const {
804 RCP<const Map> rowMap = A->getRowMap();
805 RCP<const Map> colMap = A->getColMap();
806 const size_t numRows = rowMap->getLocalNumElements();
808 typedef Teuchos::ScalarTraits<SC> STS;
809 typedef typename STS::magnitudeType Magnitude;
810 const SC zero = STS::zero();
811 const SC one = STS::one();
812 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
814 const GO numAggs = aggregates->GetNumAggregates();
815 const size_t NSDim = fineNullspace->getNumVectors();
816 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
819 const ParameterList& pL = GetParameterList();
820 const bool&
doQRStep = pL.get<
bool>(
"tentative: calculate qr");
821 const bool& constantColSums = pL.get<
bool>(
"tentative: constant column sums");
824 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
835 ArrayRCP<LO> aggStart;
836 ArrayRCP<LO> aggToRowMapLO;
837 ArrayRCP<GO> aggToRowMapGO;
839 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
840 GetOStream(
Runtime1) <<
"Column map is consistent with the row map, good." << std::endl;
843 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMapGO);
844 GetOStream(
Warnings0) <<
"Column map is not consistent with the row map\n"
845 <<
"using GO->LO conversion with performance penalty" << std::endl;
847 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
850 ArrayRCP<ArrayRCP<const SC> >
fineNS(NSDim);
851 ArrayRCP<ArrayRCP<SC> >
coarseNS(NSDim);
852 for (
size_t i = 0; i < NSDim; i++) {
853 fineNS[i] = fineNullspace->getData(i);
854 if (coarseMap->getLocalNumElements() > 0)
855 coarseNS[i] = coarseNullspace->getDataNonConst(i);
858 size_t nnzEstimate = numRows * NSDim;
861 Ptentative = rcp(
new CrsMatrixWrap(rowMap, coarseMap, 0));
862 RCP<CrsMatrix> PtentCrs = toCrsMatrix(Ptentative);
864 ArrayRCP<size_t> iaPtent;
865 ArrayRCP<LO> jaPtent;
866 ArrayRCP<SC> valPtent;
868 PtentCrs->allocateAllValues(nnzEstimate, iaPtent, jaPtent, valPtent);
870 ArrayView<size_t> ia = iaPtent();
871 ArrayView<LO> ja = jaPtent();
872 ArrayView<SC> val = valPtent();
875 for (
size_t i = 1; i <= numRows; i++)
876 ia[i] = ia[i - 1] + NSDim;
878 for (
size_t j = 0; j < nnzEstimate; j++) {
887 for (GO agg = 0; agg < numAggs; agg++) {
888 LO aggSize = aggStart[agg + 1] - aggStart[agg];
890 Xpetra::global_size_t offset = agg * NSDim;
895 Teuchos::SerialDenseMatrix<LO, SC> localQR(aggSize, NSDim);
897 for (
size_t j = 0; j < NSDim; j++)
898 for (LO k = 0; k < aggSize; k++)
899 localQR(k, j) =
fineNS[j][aggToRowMapLO[aggStart[agg] + k]];
901 for (
size_t j = 0; j < NSDim; j++)
902 for (LO k = 0; k < aggSize; k++)
903 localQR(k, j) =
fineNS[j][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + k])];
907 for (
size_t j = 0; j < NSDim; j++) {
908 bool bIsZeroNSColumn =
true;
910 for (LO k = 0; k < aggSize; k++)
911 if (localQR(k, j) != zero)
912 bIsZeroNSColumn =
false;
915 "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column in NS column " << j);
920 if (aggSize >= Teuchos::as<LO>(NSDim)) {
923 Magnitude norm = STS::magnitude(zero);
924 for (
size_t k = 0; k < Teuchos::as<size_t>(aggSize); k++)
925 norm += STS::magnitude(localQR(k, 0) * localQR(k, 0));
926 norm = Teuchos::ScalarTraits<Magnitude>::squareroot(norm);
932 for (LO i = 0; i < aggSize; i++)
933 localQR(i, 0) /= norm;
936 Teuchos::SerialQRDenseSolver<LO, SC> qrSolver;
937 qrSolver.setMatrix(Teuchos::rcp(&localQR,
false));
941 for (
size_t j = 0; j < NSDim; j++)
942 for (
size_t k = 0; k <= j; k++)
943 coarseNS[j][offset + k] = localQR(k, j);
948 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO, SC> > qFactor = qrSolver.getQ();
949 for (
size_t j = 0; j < NSDim; j++)
950 for (
size_t i = 0; i < Teuchos::as<size_t>(aggSize); i++)
951 localQR(i, j) = (*qFactor)(i, j);
982 for (
size_t j = 0; j < NSDim; j++)
983 for (
size_t k = 0; k < NSDim; k++)
984 if (k < as<size_t>(aggSize))
985 coarseNS[j][offset + k] = localQR(k, j);
987 coarseNS[j][offset + k] = (k == j ? one : zero);
990 for (
size_t i = 0; i < as<size_t>(aggSize); i++)
991 for (
size_t j = 0; j < NSDim; j++)
992 localQR(i, j) = (j == i ? one : zero);
997 for (LO j = 0; j < aggSize; j++) {
998 LO localRow = (goodMap ? aggToRowMapLO[aggStart[agg] + j] : rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]));
1000 size_t rowStart = ia[localRow];
1001 for (
size_t k = 0, lnnz = 0; k < NSDim; k++) {
1003 if (localQR(j, k) != zero) {
1004 ja[rowStart + lnnz] = offset + k;
1005 val[rowStart + lnnz] = localQR(j, k);
1013 GetOStream(
Runtime1) <<
"TentativePFactory : bypassing local QR phase" << std::endl;
1015 GetOStream(
Warnings0) <<
"TentativePFactory : for nontrivial nullspace, this may degrade performance" << std::endl;
1025 for (GO agg = 0; agg < numAggs; agg++) {
1026 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1027 Xpetra::global_size_t offset = agg * NSDim;
1031 for (LO j = 0; j < aggSize; j++) {
1035 const LO localRow = aggToRowMapLO[aggStart[agg] + j];
1037 const size_t rowStart = ia[localRow];
1039 for (
size_t k = 0, lnnz = 0; k < NSDim; k++) {
1041 SC qr_jk =
fineNS[k][aggToRowMapLO[aggStart[agg] + j]];
1042 if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1043 if (qr_jk != zero) {
1044 ja[rowStart + lnnz] = offset + k;
1045 val[rowStart + lnnz] = qr_jk;
1050 for (
size_t j = 0; j < NSDim; j++)
1055 for (GO agg = 0; agg < numAggs; agg++) {
1056 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1057 Xpetra::global_size_t offset = agg * NSDim;
1058 for (LO j = 0; j < aggSize; j++) {
1059 const LO localRow = rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]);
1061 const size_t rowStart = ia[localRow];
1063 for (
size_t k = 0, lnnz = 0; k < NSDim; ++k) {
1065 SC qr_jk =
fineNS[k][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j])];
1066 if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1067 if (qr_jk != zero) {
1068 ja[rowStart + lnnz] = offset + k;
1069 val[rowStart + lnnz] = qr_jk;
1074 for (
size_t j = 0; j < NSDim; j++)
1084 size_t ia_tmp = 0, nnz = 0;
1085 for (
size_t i = 0; i < numRows; i++) {
1086 for (
size_t j = ia_tmp; j < ia[i + 1]; j++)
1087 if (ja[j] != INVALID) {
1095 if (rowMap->lib() == Xpetra::UseTpetra) {
1099 jaPtent.resize(nnz);
1100 valPtent.resize(nnz);
1103 GetOStream(
Runtime1) <<
"TentativePFactory : aggregates do not cross process boundaries" << std::endl;
1105 PtentCrs->setAllValues(iaPtent, jaPtent, valPtent);
1108 RCP<ParameterList> FCparams;
1109 if (pL.isSublist(
"matrixmatrix: kernel params"))
1110 FCparams = rcp(
new ParameterList(pL.sublist(
"matrixmatrix: kernel params")));
1112 FCparams = rcp(
new ParameterList);
1114 FCparams->set(
"compute global constants", FCparams->get(
"compute global constants",
false));
1115 std::string levelIDs =
toString(levelID);
1116 FCparams->set(
"Timer Label", std::string(
"MueLu::TentativeP-") + levelIDs);
1117 RCP<const Export> dummy_e;
1118 RCP<const Import> dummy_i;
1120 PtentCrs->expertStaticFillComplete(coarseMap, A->getDomainMap(), dummy_i, dummy_e, FCparams);