100 typedef typename Teuchos::ScalarTraits<Scalar>::coordinateType coordinate_type;
101 typedef Xpetra::MultiVector<coordinate_type, LO, GO, NO> RealValuedMultiVector;
102 typedef Xpetra::MultiVectorFactory<coordinate_type, LO, GO, NO> RealValuedMultiVectorFactory;
104 const ParameterList& pL = GetParameterList();
105 std::string nspName =
"Nullspace";
106 if (pL.isParameter(
"Nullspace name")) nspName = pL.get<std::string>(
"Nullspace name");
108 RCP<Matrix> Ptentative;
109 RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel,
"A");
110 RCP<Aggregates> aggregates = Get<RCP<Aggregates> >(fineLevel,
"Aggregates");
113 if (aggregates->GetNumGlobalAggregatesComputeIfNeeded() == 0) {
114 Ptentative = Teuchos::null;
115 Set(coarseLevel,
"P", Ptentative);
119 RCP<AmalgamationInfo> amalgInfo = Get<RCP<AmalgamationInfo> >(fineLevel,
"UnAmalgamationInfo");
120 RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, nspName);
121 RCP<const Map> coarseMap = Get<RCP<const Map> >(fineLevel,
"CoarseMap");
122 RCP<RealValuedMultiVector> fineCoords;
123 if (bTransferCoordinates_) {
124 fineCoords = Get<RCP<RealValuedMultiVector> >(fineLevel,
"Coordinates");
129 RCP<const Teuchos::Comm<int> > nodeComm = Get<RCP<const Teuchos::Comm<int> > >(fineLevel,
"Node Comm");
130 Set<RCP<const Teuchos::Comm<int> > >(coarseLevel,
"Node Comm", nodeComm);
134 TEUCHOS_TEST_FOR_EXCEPTION(A->getDomainMap()->getLocalNumElements() != fineNullspace->getMap()->getLocalNumElements(),
137 RCP<MultiVector> coarseNullspace;
138 RCP<RealValuedMultiVector> coarseCoords;
140 if (bTransferCoordinates_) {
143 ArrayView<const GO> elementAList = coarseMap->getLocalElementList();
145 if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null) {
146 blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize();
148 GO indexBase = coarseMap->getIndexBase();
149 LO numCoarseNodes = Teuchos::as<LO>(elementAList.size() / blkSize);
150 Array<GO> nodeList(numCoarseNodes);
151 const int numDimensions = fineCoords->getNumVectors();
153 for (LO i = 0; i < numCoarseNodes; i++) {
154 nodeList[i] = (elementAList[i * blkSize] - indexBase) / blkSize + indexBase;
156 Teuchos::RCP<Teuchos::ParameterList> params = Teuchos::rcp(
new Teuchos::ParameterList());
157 params->set(
"compute global constants",
false);
158 RCP<const Map> coarseCoordsMap = MapFactory::Build(fineCoords->getMap()->lib(),
159 blkSize * coarseMap->getGlobalNumElements(),
162 fineCoords->getMap()->getComm(),
164 coarseCoords = RealValuedMultiVectorFactory::Build(coarseCoordsMap, numDimensions);
167 RCP<RealValuedMultiVector> ghostedCoords;
168 if (aggregates->AggregatesCrossProcessors()) {
169 RCP<const Map> aggMap = aggregates->GetMap();
170 RCP<const Import> importer = ImportFactory::Build(fineCoords->getMap(), aggMap);
172 ghostedCoords = RealValuedMultiVectorFactory::Build(aggMap, numDimensions);
173 ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);
175 ghostedCoords = fineCoords;
179 int myPID = coarseCoordsMap->getComm()->getRank();
180 LO numAggs = aggregates->GetNumAggregates();
181 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
182 const ArrayRCP<const LO> vertex2AggID = aggregates->GetVertex2AggId()->getData(0);
183 const ArrayRCP<const LO> procWinner = aggregates->GetProcWinner()->getData(0);
186 for (
int dim = 0; dim < numDimensions; ++dim) {
187 ArrayRCP<const coordinate_type> fineCoordsData = ghostedCoords->getData(dim);
188 ArrayRCP<coordinate_type> coarseCoordsData = coarseCoords->getDataNonConst(dim);
190 for (LO lnode = 0; lnode < Teuchos::as<LO>(vertex2AggID.size()); lnode++) {
191 if (procWinner[lnode] == myPID &&
192 lnode < fineCoordsData.size() &&
193 vertex2AggID[lnode] < coarseCoordsData.size() &&
194 Teuchos::ScalarTraits<coordinate_type>::isnaninf(fineCoordsData[lnode]) ==
false) {
195 coarseCoordsData[vertex2AggID[lnode]] += fineCoordsData[lnode];
198 for (LO agg = 0; agg < numAggs; agg++) {
199 coarseCoordsData[agg] /= aggSizes[agg];
204 if (!aggregates->AggregatesCrossProcessors()) {
205 if (Xpetra::Helpers<SC, LO, GO, NO>::isTpetraBlockCrs(A)) {
206 BuildPuncoupledBlockCrs(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.
GetLevelID());
208 BuildPuncoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.
GetLevelID());
211 BuildPcoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
221 if (A->IsView(
"stridedMaps") ==
true)
222 Ptentative->CreateView(
"stridedMaps", A->getRowMap(
"stridedMaps"), coarseMap);
224 Ptentative->CreateView(
"stridedMaps", Ptentative->getRangeMap(), coarseMap);
226 if (bTransferCoordinates_) {
227 Set(coarseLevel,
"Coordinates", coarseCoords);
229 Set(coarseLevel,
"Nullspace", coarseNullspace);
230 Set(coarseLevel,
"P", Ptentative);
233 RCP<ParameterList> params = rcp(
new ParameterList());
234 params->set(
"printLoadBalancingInfo",
true);
241 BuildPuncoupledBlockCrs(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
242 RCP<const Map> coarsePointMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace,
const int levelID)
const {
252 RCP<const Map> rowMap = A->getRowMap();
253 RCP<const Map> rangeMap = A->getRangeMap();
254 RCP<const Map> colMap = A->getColMap();
256 const size_t numFineBlockRows = rowMap->getLocalNumElements();
258 typedef Teuchos::ScalarTraits<SC> STS;
260 const SC zero = STS::zero();
261 const SC one = STS::one();
262 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
264 const GO numAggs = aggregates->GetNumAggregates();
265 const size_t NSDim = fineNullspace->getNumVectors();
266 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
272 const size_t numCoarseBlockRows = coarsePointMap->getLocalNumElements() / NSDim;
273 RCP<const Map> coarseBlockMap = MapFactory::Build(coarsePointMap->lib(),
274 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
276 coarsePointMap->getIndexBase(),
277 coarsePointMap->getComm());
279 const ParameterList& pL = GetParameterList();
280 const bool&
doQRStep = pL.get<
bool>(
"tentative: calculate qr");
281 const bool& constantColSums = pL.get<
bool>(
"tentative: constant column sums");
284 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
297 ArrayRCP<LO> aggStart;
298 ArrayRCP<LO> aggToRowMapLO;
299 ArrayRCP<GO> aggToRowMapGO;
301 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
302 GetOStream(
Runtime1) <<
"Column map is consistent with the row map, good." << std::endl;
304 throw std::runtime_error(
"TentativePFactory::PuncoupledBlockCrs: Inconsistent maps not currently supported");
307 coarseNullspace = MultiVectorFactory::Build(coarsePointMap, NSDim);
310 ArrayRCP<ArrayRCP<const SC> >
fineNS(NSDim);
311 ArrayRCP<ArrayRCP<SC> >
coarseNS(NSDim);
312 for (
size_t i = 0; i < NSDim; i++) {
313 fineNS[i] = fineNullspace->getData(i);
314 if (coarsePointMap->getLocalNumElements() > 0)
315 coarseNS[i] = coarseNullspace->getDataNonConst(i);
321 RCP<CrsGraph> BlockGraph = CrsGraphFactory::Build(rowMap, coarseBlockMap, 0);
322 ArrayRCP<size_t> iaPtent;
323 ArrayRCP<LO> jaPtent;
324 BlockGraph->allocateAllIndices(numFineBlockRows, iaPtent, jaPtent);
325 ArrayView<size_t> ia = iaPtent();
326 ArrayView<LO> ja = jaPtent();
328 for (
size_t i = 0; i < numFineBlockRows; i++) {
332 ia[numCoarseBlockRows] = numCoarseBlockRows;
334 for (GO agg = 0; agg < numAggs; agg++) {
335 LO aggSize = aggStart[agg + 1] - aggStart[agg];
336 Xpetra::global_size_t offset = agg;
338 for (LO j = 0; j < aggSize; j++) {
340 const LO localRow = aggToRowMapLO[aggStart[agg] + j];
341 const size_t rowStart = ia[localRow];
342 ja[rowStart] = offset;
348 size_t ia_tmp = 0, nnz = 0;
349 for (
size_t i = 0; i < numFineBlockRows; i++) {
350 for (
size_t j = ia_tmp; j < ia[i + 1]; j++)
351 if (ja[j] != INVALID) {
359 if (rowMap->lib() == Xpetra::UseTpetra) {
366 GetOStream(
Runtime1) <<
"TentativePFactory : generating block graph" << std::endl;
367 BlockGraph->setAllIndices(iaPtent, jaPtent);
371 RCP<ParameterList> FCparams;
372 if (pL.isSublist(
"matrixmatrix: kernel params"))
373 FCparams = rcp(
new ParameterList(pL.sublist(
"matrixmatrix: kernel params")));
375 FCparams = rcp(
new ParameterList);
378 FCparams->set(
"compute global constants", FCparams->get(
"compute global constants",
true));
379 std::string levelIDs =
toString(levelID);
380 FCparams->set(
"Timer Label", std::string(
"MueLu::TentativeP-") + levelIDs);
381 RCP<const Export> dummy_e;
382 RCP<const Import> dummy_i;
383 BlockGraph->expertStaticFillComplete(coarseBlockMap, rowMap, dummy_i, dummy_e, FCparams);
388 RCP<Xpetra::CrsMatrix<SC, LO, GO, NO> > P_xpetra = Xpetra::CrsMatrixFactory<SC, LO, GO, NO>::BuildBlock(BlockGraph, coarsePointMap, rangeMap, NSDim);
389 RCP<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO> > P_tpetra = rcp_dynamic_cast<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO> >(P_xpetra);
390 if (P_tpetra.is_null())
throw std::runtime_error(
"BuildPUncoupled: Matrix factory did not return a Tpetra::BlockCrsMatrix");
391 RCP<CrsMatrixWrap> P_wrap = rcp(
new CrsMatrixWrap(P_xpetra));
400 Teuchos::Array<Scalar> block(NSDim * NSDim, zero);
401 Teuchos::Array<LO> bcol(1);
403 GetOStream(
Runtime1) <<
"TentativePFactory : bypassing local QR phase" << std::endl;
404 for (LO agg = 0; agg < numAggs; agg++) {
406 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
407 Xpetra::global_size_t offset = agg * NSDim;
411 for (LO j = 0; j < aggSize; j++) {
412 const LO localBlockRow = aggToRowMapLO[aggStart[agg] + j];
414 for (
size_t r = 0; r < NSDim; r++) {
415 LO localPointRow = localBlockRow * NSDim + r;
416 for (
size_t c = 0; c < NSDim; c++)
417 block[r * NSDim + c] =
fineNS[c][localPointRow];
420 P_tpetra->replaceLocalValues(localBlockRow, bcol(), block());
424 for (
size_t j = 0; j < NSDim; j++)
434 BuildPcoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
435 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace)
const {
436 typedef Teuchos::ScalarTraits<SC> STS;
437 typedef typename STS::magnitudeType Magnitude;
438 const SC zero = STS::zero();
439 const SC one = STS::one();
442 GO numAggs = aggregates->GetNumAggregates();
448 ArrayRCP<LO> aggStart;
449 ArrayRCP<GO> aggToRowMap;
450 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMap);
454 for (GO i = 0; i < numAggs; ++i) {
455 LO sizeOfThisAgg = aggStart[i + 1] - aggStart[i];
456 if (sizeOfThisAgg > maxAggSize) maxAggSize = sizeOfThisAgg;
460 const size_t NSDim = fineNullspace->getNumVectors();
463 GO indexBase = A->getRowMap()->getIndexBase();
465 const RCP<const Map> nonUniqueMap = amalgInfo->ComputeUnamalgamatedImportDofMap(*aggregates);
466 const RCP<const Map> uniqueMap = A->getDomainMap();
467 RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
468 RCP<MultiVector> fineNullspaceWithOverlap = MultiVectorFactory::Build(nonUniqueMap, NSDim);
469 fineNullspaceWithOverlap->doImport(*fineNullspace, *importer, Xpetra::INSERT);
472 ArrayRCP<ArrayRCP<const SC> >
fineNS(NSDim);
473 for (
size_t i = 0; i < NSDim; ++i)
474 fineNS[i] = fineNullspaceWithOverlap->getData(i);
477 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
479 ArrayRCP<ArrayRCP<SC> >
coarseNS(NSDim);
480 for (
size_t i = 0; i < NSDim; ++i)
481 if (coarseMap->getLocalNumElements() > 0)
coarseNS[i] = coarseNullspace->getDataNonConst(i);
486 RCP<const Map> rowMapForPtent = A->getRowMap();
487 const Map& rowMapForPtentRef = *rowMapForPtent;
491 RCP<const Map> colMap = A->getColMap();
493 RCP<const Map> ghostQMap;
494 RCP<MultiVector> ghostQvalues;
495 Array<RCP<Xpetra::Vector<GO, LO, GO, Node> > > ghostQcolumns;
496 RCP<Xpetra::Vector<GO, LO, GO, Node> > ghostQrowNums;
497 ArrayRCP<ArrayRCP<SC> > ghostQvals;
498 ArrayRCP<ArrayRCP<GO> > ghostQcols;
499 ArrayRCP<GO> ghostQrows;
502 for (LO j = 0; j < numAggs; ++j) {
503 for (LO k = aggStart[j]; k < aggStart[j + 1]; ++k) {
504 if (rowMapForPtentRef.isNodeGlobalElement(aggToRowMap[k]) ==
false) {
505 ghostGIDs.push_back(aggToRowMap[k]);
509 ghostQMap = MapFactory::Build(A->getRowMap()->lib(),
510 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
512 indexBase, A->getRowMap()->getComm());
514 ghostQvalues = MultiVectorFactory::Build(ghostQMap, NSDim);
518 ghostQcolumns.resize(NSDim);
519 for (
size_t i = 0; i < NSDim; ++i)
520 ghostQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
521 ghostQrowNums = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
522 if (ghostQvalues->getLocalLength() > 0) {
523 ghostQvals.resize(NSDim);
524 ghostQcols.resize(NSDim);
525 for (
size_t i = 0; i < NSDim; ++i) {
526 ghostQvals[i] = ghostQvalues->getDataNonConst(i);
527 ghostQcols[i] = ghostQcolumns[i]->getDataNonConst(0);
529 ghostQrows = ghostQrowNums->getDataNonConst(0);
533 importer = ImportFactory::Build(ghostQMap, A->getRowMap());
536 Teuchos::SerialQRDenseSolver<LO, SC> qrSolver;
539 Array<GO> globalColPtr(maxAggSize * NSDim, 0);
540 Array<LO> localColPtr(maxAggSize * NSDim, 0);
541 Array<SC> valPtr(maxAggSize * NSDim, 0.);
544 const Map& coarseMapRef = *coarseMap;
547 ArrayRCP<size_t> ptent_rowptr;
548 ArrayRCP<LO> ptent_colind;
549 ArrayRCP<Scalar> ptent_values;
552 ArrayView<size_t> rowptr_v;
553 ArrayView<LO> colind_v;
554 ArrayView<Scalar> values_v;
557 Array<size_t> rowptr_temp;
558 Array<LO> colind_temp;
559 Array<Scalar> values_temp;
561 RCP<CrsMatrix> PtentCrs;
563 RCP<CrsMatrixWrap> PtentCrsWrap = rcp(
new CrsMatrixWrap(rowMapForPtent, NSDim));
564 PtentCrs = PtentCrsWrap->getCrsMatrix();
565 Ptentative = PtentCrsWrap;
571 const Map& nonUniqueMapRef = *nonUniqueMap;
573 size_t total_nnz_count = 0;
575 for (GO agg = 0; agg < numAggs; ++agg) {
576 LO myAggSize = aggStart[agg + 1] - aggStart[agg];
579 Teuchos::SerialDenseMatrix<LO, SC> localQR(myAggSize, NSDim);
580 for (
size_t j = 0; j < NSDim; ++j) {
581 bool bIsZeroNSColumn =
true;
582 for (LO k = 0; k < myAggSize; ++k) {
586 SC nsVal =
fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])];
587 localQR(k, j) = nsVal;
588 if (nsVal != zero) bIsZeroNSColumn =
false;
590 GetOStream(
Runtime1, -1) <<
"length of fine level nsp: " << fineNullspace->getGlobalLength() << std::endl;
591 GetOStream(
Runtime1, -1) <<
"length of fine level nsp w overlap: " << fineNullspaceWithOverlap->getGlobalLength() << std::endl;
592 GetOStream(
Runtime1, -1) <<
"(local?) aggId=" << agg << std::endl;
593 GetOStream(
Runtime1, -1) <<
"aggSize=" << myAggSize << std::endl;
594 GetOStream(
Runtime1, -1) <<
"agg DOF=" << k << std::endl;
595 GetOStream(
Runtime1, -1) <<
"NS vector j=" << j << std::endl;
596 GetOStream(
Runtime1, -1) <<
"j*myAggSize + k = " << j * myAggSize + k << std::endl;
597 GetOStream(
Runtime1, -1) <<
"aggToRowMap[" << agg <<
"][" << k <<
"] = " << aggToRowMap[aggStart[agg] + k] << std::endl;
598 GetOStream(
Runtime1, -1) <<
"id aggToRowMap[agg][k]=" << aggToRowMap[aggStart[agg] + k] <<
" is global element in nonUniqueMap = " << nonUniqueMapRef.isNodeGlobalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
599 GetOStream(
Runtime1, -1) <<
"colMap local id aggToRowMap[agg][k]=" << nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
600 GetOStream(
Runtime1, -1) <<
"fineNS...=" <<
fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])] << std::endl;
601 GetOStream(
Errors, -1) <<
"caught an error!" << std::endl;
604 TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn ==
true,
Exceptions::RuntimeError,
"MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column. Error.");
607 Xpetra::global_size_t offset = agg * NSDim;
609 if (myAggSize >= Teuchos::as<LocalOrdinal>(NSDim)) {
614 SC tau = localQR(0, 0);
619 for (
size_t k = 0; k < Teuchos::as<size_t>(myAggSize); ++k) {
620 Magnitude tmag = STS::magnitude(localQR(k, 0));
621 dtemp += tmag * tmag;
623 dtemp = Teuchos::ScalarTraits<Magnitude>::squareroot(dtemp);
625 localQR(0, 0) = dtemp;
627 qrSolver.setMatrix(Teuchos::rcp(&localQR,
false));
634 for (
size_t j = 0; j < NSDim; ++j) {
635 for (
size_t k = 0; k <= j; ++k) {
637 if (coarseMapRef.isNodeLocalElement(offset + k)) {
638 coarseNS[j][offset + k] = localQR(k, j);
641 GetOStream(
Errors, -1) <<
"caught error in coarseNS insert, j=" << j <<
", offset+k = " << offset + k << std::endl;
651 Magnitude dtemp = Teuchos::ScalarTraits<SC>::magnitude(localQR(0, 0));
655 localQR(i, 0) *= dtemp;
659 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO, SC> > qFactor = qrSolver.getQ();
660 for (
size_t j = 0; j < NSDim; j++) {
661 for (
size_t i = 0; i < Teuchos::as<size_t>(myAggSize); i++) {
662 localQR(i, j) = (*qFactor)(i, j);
672 for (
size_t j = 0; j < NSDim; j++)
673 for (
size_t k = 0; k < NSDim; k++) {
675 "Caught error in coarseNS insert, j=" << j <<
", offset+k = " << offset + k);
677 if (k < as<size_t>(myAggSize))
678 coarseNS[j][offset + k] = localQR(k, j);
680 coarseNS[j][offset + k] = (k == j ? one : zero);
684 for (
size_t i = 0; i < as<size_t>(myAggSize); i++)
685 for (
size_t j = 0; j < NSDim; j++)
686 localQR(i, j) = (j == i ? one : zero);
693 for (GO j = 0; j < myAggSize; ++j) {
697 GO globalRow = aggToRowMap[aggStart[agg] + j];
700 if (rowMapForPtentRef.isNodeGlobalElement(globalRow) ==
false) {
701 ghostQrows[qctr] = globalRow;
702 for (
size_t k = 0; k < NSDim; ++k) {
703 ghostQcols[k][qctr] = coarseMapRef.getGlobalElement(agg * NSDim + k);
704 ghostQvals[k][qctr] = localQR(j, k);
709 for (
size_t k = 0; k < NSDim; ++k) {
711 if (localQR(j, k) != Teuchos::ScalarTraits<SC>::zero()) {
712 localColPtr[nnz] = agg * NSDim + k;
713 globalColPtr[nnz] = coarseMapRef.getGlobalElement(localColPtr[nnz]);
714 valPtr[nnz] = localQR(j, k);
719 GetOStream(
Errors, -1) <<
"caught error in colPtr/valPtr insert, current index=" << nnz << std::endl;
724 Ptentative->insertGlobalValues(globalRow, globalColPtr.view(0, nnz), valPtr.view(0, nnz));
726 GetOStream(
Errors, -1) <<
"pid " << A->getRowMap()->getComm()->getRank()
727 <<
"caught error during Ptent row insertion, global row "
728 << globalRow << std::endl;
738 GetOStream(
Runtime1) <<
"TentativePFactory : aggregates may cross process boundaries" << std::endl;
741 RCP<Xpetra::Vector<GO, LO, GO, Node> > targetQrowNums = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(rowMapForPtent);
742 targetQrowNums->putScalar(-1);
743 targetQrowNums->doImport(*ghostQrowNums, *importer, Xpetra::INSERT);
744 ArrayRCP<GO> targetQrows = targetQrowNums->getDataNonConst(0);
747 Array<GO> gidsToImport;
748 gidsToImport.reserve(targetQrows.size());
749 for (
typename ArrayRCP<GO>::iterator r = targetQrows.begin(); r != targetQrows.end(); ++r) {
751 gidsToImport.push_back(*r);
754 RCP<const Map> reducedMap = MapFactory::Build(A->getRowMap()->lib(),
755 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
756 gidsToImport, indexBase, A->getRowMap()->getComm());
759 importer = ImportFactory::Build(ghostQMap, reducedMap);
761 Array<RCP<Xpetra::Vector<GO, LO, GO, Node> > > targetQcolumns(NSDim);
762 for (
size_t i = 0; i < NSDim; ++i) {
763 targetQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(reducedMap);
764 targetQcolumns[i]->doImport(*(ghostQcolumns[i]), *importer, Xpetra::INSERT);
766 RCP<MultiVector> targetQvalues = MultiVectorFactory::Build(reducedMap, NSDim);
767 targetQvalues->doImport(*ghostQvalues, *importer, Xpetra::INSERT);
769 ArrayRCP<ArrayRCP<SC> > targetQvals;
770 ArrayRCP<ArrayRCP<GO> > targetQcols;
771 if (targetQvalues->getLocalLength() > 0) {
772 targetQvals.resize(NSDim);
773 targetQcols.resize(NSDim);
774 for (
size_t i = 0; i < NSDim; ++i) {
775 targetQvals[i] = targetQvalues->getDataNonConst(i);
776 targetQcols[i] = targetQcolumns[i]->getDataNonConst(0);
780 valPtr = Array<SC>(NSDim, 0.);
781 globalColPtr = Array<GO>(NSDim, 0);
782 for (
typename Array<GO>::iterator r = gidsToImport.begin(); r != gidsToImport.end(); ++r) {
783 if (targetQvalues->getLocalLength() > 0) {
784 for (
size_t j = 0; j < NSDim; ++j) {
785 valPtr[j] = targetQvals[j][reducedMap->getLocalElement(*r)];
786 globalColPtr[j] = targetQcols[j][reducedMap->getLocalElement(*r)];
788 Ptentative->insertGlobalValues(*r, globalColPtr.view(0, NSDim), valPtr.view(0, NSDim));
792 Ptentative->fillComplete(coarseMap, A->getDomainMap());
797 BuildPuncoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
798 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace,
const int levelID)
const {
799 RCP<const Map> rowMap = A->getRowMap();
800 RCP<const Map> colMap = A->getColMap();
801 const size_t numRows = rowMap->getLocalNumElements();
803 typedef Teuchos::ScalarTraits<SC> STS;
804 typedef typename STS::magnitudeType Magnitude;
805 const SC zero = STS::zero();
806 const SC one = STS::one();
807 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
809 const GO numAggs = aggregates->GetNumAggregates();
810 const size_t NSDim = fineNullspace->getNumVectors();
811 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
814 const ParameterList& pL = GetParameterList();
815 const bool&
doQRStep = pL.get<
bool>(
"tentative: calculate qr");
816 const bool& constantColSums = pL.get<
bool>(
"tentative: constant column sums");
819 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
830 ArrayRCP<LO> aggStart;
831 ArrayRCP<LO> aggToRowMapLO;
832 ArrayRCP<GO> aggToRowMapGO;
834 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
835 GetOStream(
Runtime1) <<
"Column map is consistent with the row map, good." << std::endl;
838 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMapGO);
839 GetOStream(
Warnings0) <<
"Column map is not consistent with the row map\n"
840 <<
"using GO->LO conversion with performance penalty" << std::endl;
842 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
845 ArrayRCP<ArrayRCP<const SC> >
fineNS(NSDim);
846 ArrayRCP<ArrayRCP<SC> >
coarseNS(NSDim);
847 for (
size_t i = 0; i < NSDim; i++) {
848 fineNS[i] = fineNullspace->getData(i);
849 if (coarseMap->getLocalNumElements() > 0)
850 coarseNS[i] = coarseNullspace->getDataNonConst(i);
853 size_t nnzEstimate = numRows * NSDim;
856 Ptentative = rcp(
new CrsMatrixWrap(rowMap, coarseMap, 0));
857 RCP<CrsMatrix> PtentCrs = toCrsMatrix(Ptentative);
859 ArrayRCP<size_t> iaPtent;
860 ArrayRCP<LO> jaPtent;
861 ArrayRCP<SC> valPtent;
863 PtentCrs->allocateAllValues(nnzEstimate, iaPtent, jaPtent, valPtent);
865 ArrayView<size_t> ia = iaPtent();
866 ArrayView<LO> ja = jaPtent();
867 ArrayView<SC> val = valPtent();
870 for (
size_t i = 1; i <= numRows; i++)
871 ia[i] = ia[i - 1] + NSDim;
873 for (
size_t j = 0; j < nnzEstimate; j++) {
882 for (GO agg = 0; agg < numAggs; agg++) {
883 LO aggSize = aggStart[agg + 1] - aggStart[agg];
885 Xpetra::global_size_t offset = agg * NSDim;
890 Teuchos::SerialDenseMatrix<LO, SC> localQR(aggSize, NSDim);
892 for (
size_t j = 0; j < NSDim; j++)
893 for (LO k = 0; k < aggSize; k++)
894 localQR(k, j) =
fineNS[j][aggToRowMapLO[aggStart[agg] + k]];
896 for (
size_t j = 0; j < NSDim; j++)
897 for (LO k = 0; k < aggSize; k++)
898 localQR(k, j) =
fineNS[j][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + k])];
902 for (
size_t j = 0; j < NSDim; j++) {
903 bool bIsZeroNSColumn =
true;
905 for (LO k = 0; k < aggSize; k++)
906 if (localQR(k, j) != zero)
907 bIsZeroNSColumn =
false;
910 "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column in NS column " << j);
915 if (aggSize >= Teuchos::as<LO>(NSDim)) {
918 Magnitude norm = STS::magnitude(zero);
919 for (
size_t k = 0; k < Teuchos::as<size_t>(aggSize); k++)
920 norm += STS::magnitude(localQR(k, 0) * localQR(k, 0));
921 norm = Teuchos::ScalarTraits<Magnitude>::squareroot(norm);
927 for (LO i = 0; i < aggSize; i++)
928 localQR(i, 0) /= norm;
931 Teuchos::SerialQRDenseSolver<LO, SC> qrSolver;
932 qrSolver.setMatrix(Teuchos::rcp(&localQR,
false));
936 for (
size_t j = 0; j < NSDim; j++)
937 for (
size_t k = 0; k <= j; k++)
938 coarseNS[j][offset + k] = localQR(k, j);
943 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO, SC> > qFactor = qrSolver.getQ();
944 for (
size_t j = 0; j < NSDim; j++)
945 for (
size_t i = 0; i < Teuchos::as<size_t>(aggSize); i++)
946 localQR(i, j) = (*qFactor)(i, j);
977 for (
size_t j = 0; j < NSDim; j++)
978 for (
size_t k = 0; k < NSDim; k++)
979 if (k < as<size_t>(aggSize))
980 coarseNS[j][offset + k] = localQR(k, j);
982 coarseNS[j][offset + k] = (k == j ? one : zero);
985 for (
size_t i = 0; i < as<size_t>(aggSize); i++)
986 for (
size_t j = 0; j < NSDim; j++)
987 localQR(i, j) = (j == i ? one : zero);
992 for (LO j = 0; j < aggSize; j++) {
993 LO localRow = (goodMap ? aggToRowMapLO[aggStart[agg] + j] : rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]));
995 size_t rowStart = ia[localRow];
996 for (
size_t k = 0, lnnz = 0; k < NSDim; k++) {
998 if (localQR(j, k) != zero) {
999 ja[rowStart + lnnz] = offset + k;
1000 val[rowStart + lnnz] = localQR(j, k);
1008 GetOStream(
Runtime1) <<
"TentativePFactory : bypassing local QR phase" << std::endl;
1010 GetOStream(
Warnings0) <<
"TentativePFactory : for nontrivial nullspace, this may degrade performance" << std::endl;
1020 for (GO agg = 0; agg < numAggs; agg++) {
1021 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1022 Xpetra::global_size_t offset = agg * NSDim;
1026 for (LO j = 0; j < aggSize; j++) {
1030 const LO localRow = aggToRowMapLO[aggStart[agg] + j];
1032 const size_t rowStart = ia[localRow];
1034 for (
size_t k = 0, lnnz = 0; k < NSDim; k++) {
1036 SC qr_jk =
fineNS[k][aggToRowMapLO[aggStart[agg] + j]];
1037 if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1038 if (qr_jk != zero) {
1039 ja[rowStart + lnnz] = offset + k;
1040 val[rowStart + lnnz] = qr_jk;
1045 for (
size_t j = 0; j < NSDim; j++)
1050 for (GO agg = 0; agg < numAggs; agg++) {
1051 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1052 Xpetra::global_size_t offset = agg * NSDim;
1053 for (LO j = 0; j < aggSize; j++) {
1054 const LO localRow = rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]);
1056 const size_t rowStart = ia[localRow];
1058 for (
size_t k = 0, lnnz = 0; k < NSDim; ++k) {
1060 SC qr_jk =
fineNS[k][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j])];
1061 if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1062 if (qr_jk != zero) {
1063 ja[rowStart + lnnz] = offset + k;
1064 val[rowStart + lnnz] = qr_jk;
1069 for (
size_t j = 0; j < NSDim; j++)
1079 size_t ia_tmp = 0, nnz = 0;
1080 for (
size_t i = 0; i < numRows; i++) {
1081 for (
size_t j = ia_tmp; j < ia[i + 1]; j++)
1082 if (ja[j] != INVALID) {
1090 if (rowMap->lib() == Xpetra::UseTpetra) {
1094 jaPtent.resize(nnz);
1095 valPtent.resize(nnz);
1098 GetOStream(
Runtime1) <<
"TentativePFactory : aggregates do not cross process boundaries" << std::endl;
1100 PtentCrs->setAllValues(iaPtent, jaPtent, valPtent);
1103 RCP<ParameterList> FCparams;
1104 if (pL.isSublist(
"matrixmatrix: kernel params"))
1105 FCparams = rcp(
new ParameterList(pL.sublist(
"matrixmatrix: kernel params")));
1107 FCparams = rcp(
new ParameterList);
1109 FCparams->set(
"compute global constants", FCparams->get(
"compute global constants",
false));
1110 std::string levelIDs =
toString(levelID);
1111 FCparams->set(
"Timer Label", std::string(
"MueLu::TentativeP-") + levelIDs);
1112 RCP<const Export> dummy_e;
1113 RCP<const Import> dummy_i;
1115 PtentCrs->expertStaticFillComplete(coarseMap, A->getDomainMap(), dummy_i, dummy_e, FCparams);