100 typedef typename Teuchos::ScalarTraits<Scalar>::coordinateType coordinate_type;
101 typedef Xpetra::MultiVector<coordinate_type, LO, GO, NO> RealValuedMultiVector;
102 typedef Xpetra::MultiVectorFactory<coordinate_type, LO, GO, NO> RealValuedMultiVectorFactory;
104 const ParameterList& pL = GetParameterList();
105 std::string nspName =
"Nullspace";
106 if (pL.isParameter(
"Nullspace name")) nspName = pL.get<std::string>(
"Nullspace name");
108 RCP<Matrix> Ptentative;
109 RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel,
"A");
110 RCP<Aggregates> aggregates = Get<RCP<Aggregates> >(fineLevel,
"Aggregates");
113 if (aggregates->GetNumGlobalAggregatesComputeIfNeeded() == 0) {
114 Ptentative = Teuchos::null;
115 Set(coarseLevel,
"P", Ptentative);
119 RCP<AmalgamationInfo> amalgInfo = Get<RCP<AmalgamationInfo> >(fineLevel,
"UnAmalgamationInfo");
120 RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, nspName);
121 RCP<const Map> coarseMap = Get<RCP<const Map> >(fineLevel,
"CoarseMap");
122 RCP<RealValuedMultiVector> fineCoords;
123 if (bTransferCoordinates_) {
124 fineCoords = Get<RCP<RealValuedMultiVector> >(fineLevel,
"Coordinates");
129 RCP<const Teuchos::Comm<int> > nodeComm = Get<RCP<const Teuchos::Comm<int> > >(fineLevel,
"Node Comm");
130 Set<RCP<const Teuchos::Comm<int> > >(coarseLevel,
"Node Comm", nodeComm);
134 TEUCHOS_TEST_FOR_EXCEPTION(A->getDomainMap()->getLocalNumElements() != fineNullspace->getMap()->getLocalNumElements(),
137 RCP<MultiVector> coarseNullspace;
138 RCP<RealValuedMultiVector> coarseCoords;
140 if (bTransferCoordinates_) {
143 ArrayView<const GO> elementAList = coarseMap->getLocalElementList();
145 if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null) {
146 blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize();
148 GO indexBase = coarseMap->getIndexBase();
149 LO numCoarseNodes = Teuchos::as<LO>(elementAList.size() / blkSize);
150 Array<GO> nodeList(numCoarseNodes);
151 const int numDimensions = fineCoords->getNumVectors();
153 for (LO i = 0; i < numCoarseNodes; i++) {
154 nodeList[i] = (elementAList[i * blkSize] - indexBase) / blkSize + indexBase;
156 RCP<const Map> coarseCoordsMap = MapFactory::Build(fineCoords->getMap()->lib(),
157 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
160 fineCoords->getMap()->getComm());
161 coarseCoords = RealValuedMultiVectorFactory::Build(coarseCoordsMap, numDimensions);
164 RCP<RealValuedMultiVector> ghostedCoords;
165 if (aggregates->AggregatesCrossProcessors()) {
166 RCP<const Map> aggMap = aggregates->GetMap();
167 RCP<const Import> importer = ImportFactory::Build(fineCoords->getMap(), aggMap);
169 ghostedCoords = RealValuedMultiVectorFactory::Build(aggMap, numDimensions);
170 ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);
172 ghostedCoords = fineCoords;
176 int myPID = coarseCoordsMap->getComm()->getRank();
177 LO numAggs = aggregates->GetNumAggregates();
178 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
179 const ArrayRCP<const LO> vertex2AggID = aggregates->GetVertex2AggId()->getData(0);
180 const ArrayRCP<const LO> procWinner = aggregates->GetProcWinner()->getData(0);
183 for (
int dim = 0; dim < numDimensions; ++dim) {
184 ArrayRCP<const coordinate_type> fineCoordsData = ghostedCoords->getData(dim);
185 ArrayRCP<coordinate_type> coarseCoordsData = coarseCoords->getDataNonConst(dim);
187 for (LO lnode = 0; lnode < Teuchos::as<LO>(vertex2AggID.size()); lnode++) {
188 if (procWinner[lnode] == myPID &&
189 lnode < fineCoordsData.size() &&
190 vertex2AggID[lnode] < coarseCoordsData.size() &&
191 Teuchos::ScalarTraits<coordinate_type>::isnaninf(fineCoordsData[lnode]) ==
false) {
192 coarseCoordsData[vertex2AggID[lnode]] += fineCoordsData[lnode];
195 for (LO agg = 0; agg < numAggs; agg++) {
196 coarseCoordsData[agg] /= aggSizes[agg];
201 if (!aggregates->AggregatesCrossProcessors()) {
202 if (Xpetra::Helpers<SC, LO, GO, NO>::isTpetraBlockCrs(A)) {
203 BuildPuncoupledBlockCrs(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.
GetLevelID());
205 BuildPuncoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.
GetLevelID());
208 BuildPcoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
218 if (A->IsView(
"stridedMaps") ==
true)
219 Ptentative->CreateView(
"stridedMaps", A->getRowMap(
"stridedMaps"), coarseMap);
221 Ptentative->CreateView(
"stridedMaps", Ptentative->getRangeMap(), coarseMap);
223 if (bTransferCoordinates_) {
224 Set(coarseLevel,
"Coordinates", coarseCoords);
226 Set(coarseLevel,
"Nullspace", coarseNullspace);
227 Set(coarseLevel,
"P", Ptentative);
230 RCP<ParameterList> params = rcp(
new ParameterList());
231 params->set(
"printLoadBalancingInfo",
true);
238 BuildPuncoupledBlockCrs(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
239 RCP<const Map> coarsePointMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace,
const int levelID)
const {
249 RCP<const Map> rowMap = A->getRowMap();
250 RCP<const Map> rangeMap = A->getRangeMap();
251 RCP<const Map> colMap = A->getColMap();
253 const size_t numFineBlockRows = rowMap->getLocalNumElements();
255 typedef Teuchos::ScalarTraits<SC> STS;
257 const SC zero = STS::zero();
258 const SC one = STS::one();
259 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
261 const GO numAggs = aggregates->GetNumAggregates();
262 const size_t NSDim = fineNullspace->getNumVectors();
263 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
269 const size_t numCoarseBlockRows = coarsePointMap->getLocalNumElements() / NSDim;
270 RCP<const Map> coarseBlockMap = MapFactory::Build(coarsePointMap->lib(),
271 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
273 coarsePointMap->getIndexBase(),
274 coarsePointMap->getComm());
276 const ParameterList& pL = GetParameterList();
277 const bool&
doQRStep = pL.get<
bool>(
"tentative: calculate qr");
278 const bool& constantColSums = pL.get<
bool>(
"tentative: constant column sums");
281 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
294 ArrayRCP<LO> aggStart;
295 ArrayRCP<LO> aggToRowMapLO;
296 ArrayRCP<GO> aggToRowMapGO;
298 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
299 GetOStream(
Runtime1) <<
"Column map is consistent with the row map, good." << std::endl;
301 throw std::runtime_error(
"TentativePFactory::PuncoupledBlockCrs: Inconsistent maps not currently supported");
304 coarseNullspace = MultiVectorFactory::Build(coarsePointMap, NSDim);
307 ArrayRCP<ArrayRCP<const SC> >
fineNS(NSDim);
308 ArrayRCP<ArrayRCP<SC> >
coarseNS(NSDim);
309 for (
size_t i = 0; i < NSDim; i++) {
310 fineNS[i] = fineNullspace->getData(i);
311 if (coarsePointMap->getLocalNumElements() > 0)
312 coarseNS[i] = coarseNullspace->getDataNonConst(i);
318 RCP<CrsGraph> BlockGraph = CrsGraphFactory::Build(rowMap, coarseBlockMap, 0);
319 ArrayRCP<size_t> iaPtent;
320 ArrayRCP<LO> jaPtent;
321 BlockGraph->allocateAllIndices(numFineBlockRows, iaPtent, jaPtent);
322 ArrayView<size_t> ia = iaPtent();
323 ArrayView<LO> ja = jaPtent();
325 for (
size_t i = 0; i < numFineBlockRows; i++) {
329 ia[numCoarseBlockRows] = numCoarseBlockRows;
331 for (GO agg = 0; agg < numAggs; agg++) {
332 LO aggSize = aggStart[agg + 1] - aggStart[agg];
333 Xpetra::global_size_t offset = agg;
335 for (LO j = 0; j < aggSize; j++) {
337 const LO localRow = aggToRowMapLO[aggStart[agg] + j];
338 const size_t rowStart = ia[localRow];
339 ja[rowStart] = offset;
345 size_t ia_tmp = 0, nnz = 0;
346 for (
size_t i = 0; i < numFineBlockRows; i++) {
347 for (
size_t j = ia_tmp; j < ia[i + 1]; j++)
348 if (ja[j] != INVALID) {
356 if (rowMap->lib() == Xpetra::UseTpetra) {
363 GetOStream(
Runtime1) <<
"TentativePFactory : generating block graph" << std::endl;
364 BlockGraph->setAllIndices(iaPtent, jaPtent);
368 RCP<ParameterList> FCparams;
369 if (pL.isSublist(
"matrixmatrix: kernel params"))
370 FCparams = rcp(
new ParameterList(pL.sublist(
"matrixmatrix: kernel params")));
372 FCparams = rcp(
new ParameterList);
375 FCparams->set(
"compute global constants", FCparams->get(
"compute global constants",
true));
376 std::string levelIDs =
toString(levelID);
377 FCparams->set(
"Timer Label", std::string(
"MueLu::TentativeP-") + levelIDs);
378 RCP<const Export> dummy_e;
379 RCP<const Import> dummy_i;
380 BlockGraph->expertStaticFillComplete(coarseBlockMap, rowMap, dummy_i, dummy_e, FCparams);
385 RCP<Xpetra::CrsMatrix<SC, LO, GO, NO> > P_xpetra = Xpetra::CrsMatrixFactory<SC, LO, GO, NO>::BuildBlock(BlockGraph, coarsePointMap, rangeMap, NSDim);
386 RCP<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO> > P_tpetra = rcp_dynamic_cast<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO> >(P_xpetra);
387 if (P_tpetra.is_null())
throw std::runtime_error(
"BuildPUncoupled: Matrix factory did not return a Tpetra::BlockCrsMatrix");
388 RCP<CrsMatrixWrap> P_wrap = rcp(
new CrsMatrixWrap(P_xpetra));
397 Teuchos::Array<Scalar> block(NSDim * NSDim, zero);
398 Teuchos::Array<LO> bcol(1);
400 GetOStream(
Runtime1) <<
"TentativePFactory : bypassing local QR phase" << std::endl;
401 for (LO agg = 0; agg < numAggs; agg++) {
403 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
404 Xpetra::global_size_t offset = agg * NSDim;
408 for (LO j = 0; j < aggSize; j++) {
409 const LO localBlockRow = aggToRowMapLO[aggStart[agg] + j];
411 for (
size_t r = 0; r < NSDim; r++) {
412 LO localPointRow = localBlockRow * NSDim + r;
413 for (
size_t c = 0; c < NSDim; c++)
414 block[r * NSDim + c] =
fineNS[c][localPointRow];
417 P_tpetra->replaceLocalValues(localBlockRow, bcol(), block());
421 for (
size_t j = 0; j < NSDim; j++)
431 BuildPcoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
432 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace)
const {
433 typedef Teuchos::ScalarTraits<SC> STS;
434 typedef typename STS::magnitudeType Magnitude;
435 const SC zero = STS::zero();
436 const SC one = STS::one();
439 GO numAggs = aggregates->GetNumAggregates();
445 ArrayRCP<LO> aggStart;
446 ArrayRCP<GO> aggToRowMap;
447 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMap);
451 for (GO i = 0; i < numAggs; ++i) {
452 LO sizeOfThisAgg = aggStart[i + 1] - aggStart[i];
453 if (sizeOfThisAgg > maxAggSize) maxAggSize = sizeOfThisAgg;
457 const size_t NSDim = fineNullspace->getNumVectors();
460 GO indexBase = A->getRowMap()->getIndexBase();
462 const RCP<const Map> nonUniqueMap = amalgInfo->ComputeUnamalgamatedImportDofMap(*aggregates);
463 const RCP<const Map> uniqueMap = A->getDomainMap();
464 RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
465 RCP<MultiVector> fineNullspaceWithOverlap = MultiVectorFactory::Build(nonUniqueMap, NSDim);
466 fineNullspaceWithOverlap->doImport(*fineNullspace, *importer, Xpetra::INSERT);
469 ArrayRCP<ArrayRCP<const SC> >
fineNS(NSDim);
470 for (
size_t i = 0; i < NSDim; ++i)
471 fineNS[i] = fineNullspaceWithOverlap->getData(i);
474 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
476 ArrayRCP<ArrayRCP<SC> >
coarseNS(NSDim);
477 for (
size_t i = 0; i < NSDim; ++i)
478 if (coarseMap->getLocalNumElements() > 0)
coarseNS[i] = coarseNullspace->getDataNonConst(i);
483 RCP<const Map> rowMapForPtent = A->getRowMap();
484 const Map& rowMapForPtentRef = *rowMapForPtent;
488 RCP<const Map> colMap = A->getColMap();
490 RCP<const Map> ghostQMap;
491 RCP<MultiVector> ghostQvalues;
492 Array<RCP<Xpetra::Vector<GO, LO, GO, Node> > > ghostQcolumns;
493 RCP<Xpetra::Vector<GO, LO, GO, Node> > ghostQrowNums;
494 ArrayRCP<ArrayRCP<SC> > ghostQvals;
495 ArrayRCP<ArrayRCP<GO> > ghostQcols;
496 ArrayRCP<GO> ghostQrows;
499 for (LO j = 0; j < numAggs; ++j) {
500 for (LO k = aggStart[j]; k < aggStart[j + 1]; ++k) {
501 if (rowMapForPtentRef.isNodeGlobalElement(aggToRowMap[k]) ==
false) {
502 ghostGIDs.push_back(aggToRowMap[k]);
506 ghostQMap = MapFactory::Build(A->getRowMap()->lib(),
507 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
509 indexBase, A->getRowMap()->getComm());
511 ghostQvalues = MultiVectorFactory::Build(ghostQMap, NSDim);
515 ghostQcolumns.resize(NSDim);
516 for (
size_t i = 0; i < NSDim; ++i)
517 ghostQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
518 ghostQrowNums = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
519 if (ghostQvalues->getLocalLength() > 0) {
520 ghostQvals.resize(NSDim);
521 ghostQcols.resize(NSDim);
522 for (
size_t i = 0; i < NSDim; ++i) {
523 ghostQvals[i] = ghostQvalues->getDataNonConst(i);
524 ghostQcols[i] = ghostQcolumns[i]->getDataNonConst(0);
526 ghostQrows = ghostQrowNums->getDataNonConst(0);
530 importer = ImportFactory::Build(ghostQMap, A->getRowMap());
533 Teuchos::SerialQRDenseSolver<LO, SC> qrSolver;
536 Array<GO> globalColPtr(maxAggSize * NSDim, 0);
537 Array<LO> localColPtr(maxAggSize * NSDim, 0);
538 Array<SC> valPtr(maxAggSize * NSDim, 0.);
541 const Map& coarseMapRef = *coarseMap;
544 ArrayRCP<size_t> ptent_rowptr;
545 ArrayRCP<LO> ptent_colind;
546 ArrayRCP<Scalar> ptent_values;
549 ArrayView<size_t> rowptr_v;
550 ArrayView<LO> colind_v;
551 ArrayView<Scalar> values_v;
554 Array<size_t> rowptr_temp;
555 Array<LO> colind_temp;
556 Array<Scalar> values_temp;
558 RCP<CrsMatrix> PtentCrs;
560 RCP<CrsMatrixWrap> PtentCrsWrap = rcp(
new CrsMatrixWrap(rowMapForPtent, NSDim));
561 PtentCrs = PtentCrsWrap->getCrsMatrix();
562 Ptentative = PtentCrsWrap;
568 const Map& nonUniqueMapRef = *nonUniqueMap;
570 size_t total_nnz_count = 0;
572 for (GO agg = 0; agg < numAggs; ++agg) {
573 LO myAggSize = aggStart[agg + 1] - aggStart[agg];
576 Teuchos::SerialDenseMatrix<LO, SC> localQR(myAggSize, NSDim);
577 for (
size_t j = 0; j < NSDim; ++j) {
578 bool bIsZeroNSColumn =
true;
579 for (LO k = 0; k < myAggSize; ++k) {
583 SC nsVal =
fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])];
584 localQR(k, j) = nsVal;
585 if (nsVal != zero) bIsZeroNSColumn =
false;
587 GetOStream(
Runtime1, -1) <<
"length of fine level nsp: " << fineNullspace->getGlobalLength() << std::endl;
588 GetOStream(
Runtime1, -1) <<
"length of fine level nsp w overlap: " << fineNullspaceWithOverlap->getGlobalLength() << std::endl;
589 GetOStream(
Runtime1, -1) <<
"(local?) aggId=" << agg << std::endl;
590 GetOStream(
Runtime1, -1) <<
"aggSize=" << myAggSize << std::endl;
591 GetOStream(
Runtime1, -1) <<
"agg DOF=" << k << std::endl;
592 GetOStream(
Runtime1, -1) <<
"NS vector j=" << j << std::endl;
593 GetOStream(
Runtime1, -1) <<
"j*myAggSize + k = " << j * myAggSize + k << std::endl;
594 GetOStream(
Runtime1, -1) <<
"aggToRowMap[" << agg <<
"][" << k <<
"] = " << aggToRowMap[aggStart[agg] + k] << std::endl;
595 GetOStream(
Runtime1, -1) <<
"id aggToRowMap[agg][k]=" << aggToRowMap[aggStart[agg] + k] <<
" is global element in nonUniqueMap = " << nonUniqueMapRef.isNodeGlobalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
596 GetOStream(
Runtime1, -1) <<
"colMap local id aggToRowMap[agg][k]=" << nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
597 GetOStream(
Runtime1, -1) <<
"fineNS...=" <<
fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])] << std::endl;
598 GetOStream(
Errors, -1) <<
"caught an error!" << std::endl;
601 TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn ==
true,
Exceptions::RuntimeError,
"MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column. Error.");
604 Xpetra::global_size_t offset = agg * NSDim;
606 if (myAggSize >= Teuchos::as<LocalOrdinal>(NSDim)) {
611 SC tau = localQR(0, 0);
616 for (
size_t k = 0; k < Teuchos::as<size_t>(myAggSize); ++k) {
617 Magnitude tmag = STS::magnitude(localQR(k, 0));
618 dtemp += tmag * tmag;
620 dtemp = Teuchos::ScalarTraits<Magnitude>::squareroot(dtemp);
622 localQR(0, 0) = dtemp;
624 qrSolver.setMatrix(Teuchos::rcp(&localQR,
false));
631 for (
size_t j = 0; j < NSDim; ++j) {
632 for (
size_t k = 0; k <= j; ++k) {
634 if (coarseMapRef.isNodeLocalElement(offset + k)) {
635 coarseNS[j][offset + k] = localQR(k, j);
638 GetOStream(
Errors, -1) <<
"caught error in coarseNS insert, j=" << j <<
", offset+k = " << offset + k << std::endl;
648 Magnitude dtemp = Teuchos::ScalarTraits<SC>::magnitude(localQR(0, 0));
652 localQR(i, 0) *= dtemp;
656 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO, SC> > qFactor = qrSolver.getQ();
657 for (
size_t j = 0; j < NSDim; j++) {
658 for (
size_t i = 0; i < Teuchos::as<size_t>(myAggSize); i++) {
659 localQR(i, j) = (*qFactor)(i, j);
669 for (
size_t j = 0; j < NSDim; j++)
670 for (
size_t k = 0; k < NSDim; k++) {
672 "Caught error in coarseNS insert, j=" << j <<
", offset+k = " << offset + k);
674 if (k < as<size_t>(myAggSize))
675 coarseNS[j][offset + k] = localQR(k, j);
677 coarseNS[j][offset + k] = (k == j ? one : zero);
681 for (
size_t i = 0; i < as<size_t>(myAggSize); i++)
682 for (
size_t j = 0; j < NSDim; j++)
683 localQR(i, j) = (j == i ? one : zero);
690 for (GO j = 0; j < myAggSize; ++j) {
694 GO globalRow = aggToRowMap[aggStart[agg] + j];
697 if (rowMapForPtentRef.isNodeGlobalElement(globalRow) ==
false) {
698 ghostQrows[qctr] = globalRow;
699 for (
size_t k = 0; k < NSDim; ++k) {
700 ghostQcols[k][qctr] = coarseMapRef.getGlobalElement(agg * NSDim + k);
701 ghostQvals[k][qctr] = localQR(j, k);
706 for (
size_t k = 0; k < NSDim; ++k) {
708 if (localQR(j, k) != Teuchos::ScalarTraits<SC>::zero()) {
709 localColPtr[nnz] = agg * NSDim + k;
710 globalColPtr[nnz] = coarseMapRef.getGlobalElement(localColPtr[nnz]);
711 valPtr[nnz] = localQR(j, k);
716 GetOStream(
Errors, -1) <<
"caught error in colPtr/valPtr insert, current index=" << nnz << std::endl;
721 Ptentative->insertGlobalValues(globalRow, globalColPtr.view(0, nnz), valPtr.view(0, nnz));
723 GetOStream(
Errors, -1) <<
"pid " << A->getRowMap()->getComm()->getRank()
724 <<
"caught error during Ptent row insertion, global row "
725 << globalRow << std::endl;
735 GetOStream(
Runtime1) <<
"TentativePFactory : aggregates may cross process boundaries" << std::endl;
738 RCP<Xpetra::Vector<GO, LO, GO, Node> > targetQrowNums = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(rowMapForPtent);
739 targetQrowNums->putScalar(-1);
740 targetQrowNums->doImport(*ghostQrowNums, *importer, Xpetra::INSERT);
741 ArrayRCP<GO> targetQrows = targetQrowNums->getDataNonConst(0);
744 Array<GO> gidsToImport;
745 gidsToImport.reserve(targetQrows.size());
746 for (
typename ArrayRCP<GO>::iterator r = targetQrows.begin(); r != targetQrows.end(); ++r) {
748 gidsToImport.push_back(*r);
751 RCP<const Map> reducedMap = MapFactory::Build(A->getRowMap()->lib(),
752 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
753 gidsToImport, indexBase, A->getRowMap()->getComm());
756 importer = ImportFactory::Build(ghostQMap, reducedMap);
758 Array<RCP<Xpetra::Vector<GO, LO, GO, Node> > > targetQcolumns(NSDim);
759 for (
size_t i = 0; i < NSDim; ++i) {
760 targetQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(reducedMap);
761 targetQcolumns[i]->doImport(*(ghostQcolumns[i]), *importer, Xpetra::INSERT);
763 RCP<MultiVector> targetQvalues = MultiVectorFactory::Build(reducedMap, NSDim);
764 targetQvalues->doImport(*ghostQvalues, *importer, Xpetra::INSERT);
766 ArrayRCP<ArrayRCP<SC> > targetQvals;
767 ArrayRCP<ArrayRCP<GO> > targetQcols;
768 if (targetQvalues->getLocalLength() > 0) {
769 targetQvals.resize(NSDim);
770 targetQcols.resize(NSDim);
771 for (
size_t i = 0; i < NSDim; ++i) {
772 targetQvals[i] = targetQvalues->getDataNonConst(i);
773 targetQcols[i] = targetQcolumns[i]->getDataNonConst(0);
777 valPtr = Array<SC>(NSDim, 0.);
778 globalColPtr = Array<GO>(NSDim, 0);
779 for (
typename Array<GO>::iterator r = gidsToImport.begin(); r != gidsToImport.end(); ++r) {
780 if (targetQvalues->getLocalLength() > 0) {
781 for (
size_t j = 0; j < NSDim; ++j) {
782 valPtr[j] = targetQvals[j][reducedMap->getLocalElement(*r)];
783 globalColPtr[j] = targetQcols[j][reducedMap->getLocalElement(*r)];
785 Ptentative->insertGlobalValues(*r, globalColPtr.view(0, NSDim), valPtr.view(0, NSDim));
789 Ptentative->fillComplete(coarseMap, A->getDomainMap());
794 BuildPuncoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
795 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace,
const int levelID)
const {
796 RCP<const Map> rowMap = A->getRowMap();
797 RCP<const Map> colMap = A->getColMap();
798 const size_t numRows = rowMap->getLocalNumElements();
800 typedef Teuchos::ScalarTraits<SC> STS;
801 typedef typename STS::magnitudeType Magnitude;
802 const SC zero = STS::zero();
803 const SC one = STS::one();
804 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
806 const GO numAggs = aggregates->GetNumAggregates();
807 const size_t NSDim = fineNullspace->getNumVectors();
808 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
811 const ParameterList& pL = GetParameterList();
812 const bool&
doQRStep = pL.get<
bool>(
"tentative: calculate qr");
813 const bool& constantColSums = pL.get<
bool>(
"tentative: constant column sums");
816 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
827 ArrayRCP<LO> aggStart;
828 ArrayRCP<LO> aggToRowMapLO;
829 ArrayRCP<GO> aggToRowMapGO;
831 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
832 GetOStream(
Runtime1) <<
"Column map is consistent with the row map, good." << std::endl;
835 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMapGO);
836 GetOStream(
Warnings0) <<
"Column map is not consistent with the row map\n"
837 <<
"using GO->LO conversion with performance penalty" << std::endl;
839 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
842 ArrayRCP<ArrayRCP<const SC> >
fineNS(NSDim);
843 ArrayRCP<ArrayRCP<SC> >
coarseNS(NSDim);
844 for (
size_t i = 0; i < NSDim; i++) {
845 fineNS[i] = fineNullspace->getData(i);
846 if (coarseMap->getLocalNumElements() > 0)
847 coarseNS[i] = coarseNullspace->getDataNonConst(i);
850 size_t nnzEstimate = numRows * NSDim;
853 Ptentative = rcp(
new CrsMatrixWrap(rowMap, coarseMap, 0));
854 RCP<CrsMatrix> PtentCrs = toCrsMatrix(Ptentative);
856 ArrayRCP<size_t> iaPtent;
857 ArrayRCP<LO> jaPtent;
858 ArrayRCP<SC> valPtent;
860 PtentCrs->allocateAllValues(nnzEstimate, iaPtent, jaPtent, valPtent);
862 ArrayView<size_t> ia = iaPtent();
863 ArrayView<LO> ja = jaPtent();
864 ArrayView<SC> val = valPtent();
867 for (
size_t i = 1; i <= numRows; i++)
868 ia[i] = ia[i - 1] + NSDim;
870 for (
size_t j = 0; j < nnzEstimate; j++) {
879 for (GO agg = 0; agg < numAggs; agg++) {
880 LO aggSize = aggStart[agg + 1] - aggStart[agg];
882 Xpetra::global_size_t offset = agg * NSDim;
887 Teuchos::SerialDenseMatrix<LO, SC> localQR(aggSize, NSDim);
889 for (
size_t j = 0; j < NSDim; j++)
890 for (LO k = 0; k < aggSize; k++)
891 localQR(k, j) =
fineNS[j][aggToRowMapLO[aggStart[agg] + k]];
893 for (
size_t j = 0; j < NSDim; j++)
894 for (LO k = 0; k < aggSize; k++)
895 localQR(k, j) =
fineNS[j][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + k])];
899 for (
size_t j = 0; j < NSDim; j++) {
900 bool bIsZeroNSColumn =
true;
902 for (LO k = 0; k < aggSize; k++)
903 if (localQR(k, j) != zero)
904 bIsZeroNSColumn =
false;
907 "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column in NS column " << j);
912 if (aggSize >= Teuchos::as<LO>(NSDim)) {
915 Magnitude norm = STS::magnitude(zero);
916 for (
size_t k = 0; k < Teuchos::as<size_t>(aggSize); k++)
917 norm += STS::magnitude(localQR(k, 0) * localQR(k, 0));
918 norm = Teuchos::ScalarTraits<Magnitude>::squareroot(norm);
924 for (LO i = 0; i < aggSize; i++)
925 localQR(i, 0) /= norm;
928 Teuchos::SerialQRDenseSolver<LO, SC> qrSolver;
929 qrSolver.setMatrix(Teuchos::rcp(&localQR,
false));
933 for (
size_t j = 0; j < NSDim; j++)
934 for (
size_t k = 0; k <= j; k++)
935 coarseNS[j][offset + k] = localQR(k, j);
940 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO, SC> > qFactor = qrSolver.getQ();
941 for (
size_t j = 0; j < NSDim; j++)
942 for (
size_t i = 0; i < Teuchos::as<size_t>(aggSize); i++)
943 localQR(i, j) = (*qFactor)(i, j);
974 for (
size_t j = 0; j < NSDim; j++)
975 for (
size_t k = 0; k < NSDim; k++)
976 if (k < as<size_t>(aggSize))
977 coarseNS[j][offset + k] = localQR(k, j);
979 coarseNS[j][offset + k] = (k == j ? one : zero);
982 for (
size_t i = 0; i < as<size_t>(aggSize); i++)
983 for (
size_t j = 0; j < NSDim; j++)
984 localQR(i, j) = (j == i ? one : zero);
989 for (LO j = 0; j < aggSize; j++) {
990 LO localRow = (goodMap ? aggToRowMapLO[aggStart[agg] + j] : rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]));
992 size_t rowStart = ia[localRow];
993 for (
size_t k = 0, lnnz = 0; k < NSDim; k++) {
995 if (localQR(j, k) != zero) {
996 ja[rowStart + lnnz] = offset + k;
997 val[rowStart + lnnz] = localQR(j, k);
1005 GetOStream(
Runtime1) <<
"TentativePFactory : bypassing local QR phase" << std::endl;
1007 GetOStream(
Warnings0) <<
"TentativePFactory : for nontrivial nullspace, this may degrade performance" << std::endl;
1017 for (GO agg = 0; agg < numAggs; agg++) {
1018 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1019 Xpetra::global_size_t offset = agg * NSDim;
1023 for (LO j = 0; j < aggSize; j++) {
1027 const LO localRow = aggToRowMapLO[aggStart[agg] + j];
1029 const size_t rowStart = ia[localRow];
1031 for (
size_t k = 0, lnnz = 0; k < NSDim; k++) {
1033 SC qr_jk =
fineNS[k][aggToRowMapLO[aggStart[agg] + j]];
1034 if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1035 if (qr_jk != zero) {
1036 ja[rowStart + lnnz] = offset + k;
1037 val[rowStart + lnnz] = qr_jk;
1042 for (
size_t j = 0; j < NSDim; j++)
1047 for (GO agg = 0; agg < numAggs; agg++) {
1048 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1049 Xpetra::global_size_t offset = agg * NSDim;
1050 for (LO j = 0; j < aggSize; j++) {
1051 const LO localRow = rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]);
1053 const size_t rowStart = ia[localRow];
1055 for (
size_t k = 0, lnnz = 0; k < NSDim; ++k) {
1057 SC qr_jk =
fineNS[k][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j])];
1058 if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1059 if (qr_jk != zero) {
1060 ja[rowStart + lnnz] = offset + k;
1061 val[rowStart + lnnz] = qr_jk;
1066 for (
size_t j = 0; j < NSDim; j++)
1076 size_t ia_tmp = 0, nnz = 0;
1077 for (
size_t i = 0; i < numRows; i++) {
1078 for (
size_t j = ia_tmp; j < ia[i + 1]; j++)
1079 if (ja[j] != INVALID) {
1087 if (rowMap->lib() == Xpetra::UseTpetra) {
1091 jaPtent.resize(nnz);
1092 valPtent.resize(nnz);
1095 GetOStream(
Runtime1) <<
"TentativePFactory : aggregates do not cross process boundaries" << std::endl;
1097 PtentCrs->setAllValues(iaPtent, jaPtent, valPtent);
1100 RCP<ParameterList> FCparams;
1101 if (pL.isSublist(
"matrixmatrix: kernel params"))
1102 FCparams = rcp(
new ParameterList(pL.sublist(
"matrixmatrix: kernel params")));
1104 FCparams = rcp(
new ParameterList);
1106 FCparams->set(
"compute global constants", FCparams->get(
"compute global constants",
false));
1107 std::string levelIDs =
toString(levelID);
1108 FCparams->set(
"Timer Label", std::string(
"MueLu::TentativeP-") + levelIDs);
1109 RCP<const Export> dummy_e;
1110 RCP<const Import> dummy_i;
1112 PtentCrs->expertStaticFillComplete(coarseMap, A->getDomainMap(), dummy_i, dummy_e, FCparams);