63 for (
size_t i = 0; i < input.
getColMap()->getLocalNumElements(); i++) {
64 GlobalOrdinal gcid = input.
getColMap()->getGlobalElement(i);
72 std::vector<int> myUnknownDofGIDs(comm->getSize(), 0);
73 std::vector<int> numUnknownDofGIDs(comm->getSize(), 0);
74 myUnknownDofGIDs[comm->getRank()] = ovlUnknownStatusGids.
size();
75 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, comm->getSize(), &myUnknownDofGIDs[0], &numUnknownDofGIDs[0]);
78 size_t cntUnknownDofGIDs = 0;
79 for (
int p = 0; p < comm->getSize(); p++) cntUnknownDofGIDs += numUnknownDofGIDs[p];
80 std::vector<GlobalOrdinal> lUnknownDofGIDs(cntUnknownDofGIDs, -1);
81 std::vector<GlobalOrdinal> gUnknownDofGIDs(cntUnknownDofGIDs, -1);
83 size_t cntUnknownOffset = 0;
84 for (
int p = 0; p < comm->getRank(); p++) cntUnknownOffset += numUnknownDofGIDs[p];
85 for (
size_t k = 0; k < Teuchos::as<size_t>(ovlUnknownStatusGids.
size()); k++) {
86 lUnknownDofGIDs[k + cntUnknownOffset] = ovlUnknownStatusGids[k];
88 if (cntUnknownDofGIDs > 0)
89 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(cntUnknownDofGIDs), &lUnknownDofGIDs[0], &gUnknownDofGIDs[0]);
90 std::sort(gUnknownDofGIDs.begin(), gUnknownDofGIDs.end());
91 gUnknownDofGIDs.erase(std::unique(gUnknownDofGIDs.begin(), gUnknownDofGIDs.end()), gUnknownDofGIDs.end());
94 for (
size_t k = 0; k < gUnknownDofGIDs.size(); k++) {
95 GlobalOrdinal curgid = gUnknownDofGIDs[k];
101 std::vector<int> myFoundDofGIDs(comm->getSize(), 0);
102 std::vector<int> numFoundDofGIDs(comm->getSize(), 0);
103 myFoundDofGIDs[comm->getRank()] = ovlFoundStatusGids.
size();
104 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, comm->getSize(), &myFoundDofGIDs[0], &numFoundDofGIDs[0]);
107 size_t cntFoundDofGIDs = 0;
108 for (
int p = 0; p < comm->getSize(); p++) cntFoundDofGIDs += numFoundDofGIDs[p];
109 std::vector<GlobalOrdinal> lFoundDofGIDs(cntFoundDofGIDs, -1);
110 std::vector<GlobalOrdinal> gFoundDofGIDs(cntFoundDofGIDs, -1);
112 size_t cntFoundOffset = 0;
113 for (
int p = 0; p < comm->getRank(); p++) cntFoundOffset += numFoundDofGIDs[p];
114 for (
size_t k = 0; k < Teuchos::as<size_t>(ovlFoundStatusGids.
size()); k++) {
115 lFoundDofGIDs[k + cntFoundOffset] = ovlFoundStatusGids[k];
117 if (cntFoundDofGIDs > 0)
118 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(cntFoundDofGIDs), &lFoundDofGIDs[0], &gFoundDofGIDs[0]);
121 for (
size_t i = 0; i < input.
getColMap()->getLocalNumElements(); i++) {
122 GlobalOrdinal gcid = input.
getColMap()->getGlobalElement(i);
124 std::find(gFoundDofGIDs.begin(), gFoundDofGIDs.end(), gcid) != gFoundDofGIDs.end()) {
139 size_t numRows = rangeMapExtractor->NumMaps();
140 size_t numCols = domainMapExtractor->NumMaps();
142 TEUCHOS_TEST_FOR_EXCEPTION(rangeMapExtractor->getThyraMode() ==
true,
Xpetra::Exceptions::Incompatible,
"Xpetra::MatrixUtils::Split: RangeMapExtractor must not use Thyra style numbering of GIDs. The MapExtractor must contain all GIDs of the full range map in order to define a proper splitting.")
143 TEUCHOS_TEST_FOR_EXCEPTION(domainMapExtractor->getThyraMode() ==
true,
Xpetra::Exceptions::Incompatible,
"Xpetra::MatrixUtils::Split: DomainMapExtractor must not use Thyra style numbering of GIDs. The MapExtractor must contain all GIDs of the full domain map in order to define a proper splitting.")
155 TEUCHOS_TEST_FOR_EXCEPTION(fullDomainMap->getMaxAllGlobalIndex() != input.
getColMap()->getMaxAllGlobalIndex(),
Xpetra::Exceptions::Incompatible,
"Xpetra::MatrixUtils::Split: DomainMapExtractor incompatible to domain map of input matrix. fullDomainMap->getMaxAllGlobalIndex() = " << fullDomainMap->getMaxAllGlobalIndex() <<
" vs. input.getColMap()->getMaxAllGlobalIndex() = " << input.
getColMap()->getMaxAllGlobalIndex())
162 if (columnMapExtractor == Teuchos::null) {
165 std::vector<Teuchos::RCP<const Map>> ovlxmaps(numCols, Teuchos::null);
166 for (
size_t c = 0; c < numCols; c++) {
169 ovlxmaps[c] = colMap;
175 myColumnMapExtractor = columnMapExtractor;
182 if (bThyraMode ==
true) {
184 std::vector<Teuchos::RCP<const Map>> thyRgMapExtractorMaps(numRows, Teuchos::null);
185 for (
size_t r = 0; r < numRows; r++) {
189 if (strRangeMap != Teuchos::null) {
190 std::vector<size_t> strInfo = strRangeMap->getStridingData();
191 GlobalOrdinal offset = strRangeMap->getOffset();
192 LocalOrdinal stridedBlockId = strRangeMap->getStridedBlockId();
194 thyRgMapExtractorMaps[r] = strShrinkedMap;
196 thyRgMapExtractorMaps[r] = shrinkedMap;
202 std::vector<Teuchos::RCP<const Map>> thyDoMapExtractorMaps(numCols, Teuchos::null);
203 std::vector<Teuchos::RCP<const Map>> thyColMapExtractorMaps(numCols, Teuchos::null);
204 for (
size_t c = 0; c < numCols; c++) {
209 if (strDomainMap != Teuchos::null) {
210 std::vector<size_t> strInfo = strDomainMap->getStridingData();
211 GlobalOrdinal offset = strDomainMap->getOffset();
212 LocalOrdinal stridedBlockId = strDomainMap->getStridedBlockId();
214 thyDoMapExtractorMaps[c] = strShrinkedDomainMap;
216 thyDoMapExtractorMaps[c] = shrinkedDomainMap;
221 if (strColMap != Teuchos::null) {
222 std::vector<size_t> strInfo = strColMap->getStridingData();
223 GlobalOrdinal offset = strColMap->getOffset();
224 LocalOrdinal stridedBlockId = strColMap->getStridedBlockId();
226 thyColMapExtractorMaps[c] = strShrinkedColMap;
228 thyColMapExtractorMaps[c] = shrinkedColMap;
240 std::vector<Teuchos::RCP<Matrix>> subMatrices(numRows * numCols, Teuchos::null);
241 for (
size_t r = 0; r < numRows; r++) {
242 for (
size_t c = 0; c < numCols; c++) {
246 if (bThyraMode ==
true)
266 doCheck->putScalar(1.0);
272 doCheck->putScalar(-1.0);
273 coCheck->putScalar(-1.0);
276 for (
size_t rrr = 0; rrr < input.
getDomainMap()->getLocalNumElements(); rrr++) {
278 GlobalOrdinal
id = input.
getDomainMap()->getGlobalElement(rrr);
281 size_t BlockId = domainMapExtractor->getMapIndexForGID(
id);
283 doCheckData[rrr] = Teuchos::as<Scalar>(BlockId);
291 for (
size_t rr = 0; rr < input.
getRowMap()->getLocalNumElements(); rr++) {
293 GlobalOrdinal growid = input.
getRowMap()->getGlobalElement(rr);
302 size_t rowBlockId = rangeMapExtractor->getMapIndexForGID(growid);
305 GlobalOrdinal subblock_growid = growid;
306 if (bThyraMode ==
true) {
308 LocalOrdinal lrowid = rangeMapExtractor->getMap(rowBlockId)->getLocalElement(growid);
310 subblock_growid = thyRangeMapExtractor->getMap(rowBlockId,
true)->getGlobalElement(lrowid);
323 for (
size_t i = 0; i < (size_t)indices.
size(); i++) {
325 GlobalOrdinal gcolid = input.
getColMap()->getGlobalElement(indices[i]);
327 size_t colBlockId = myColumnMapExtractor->getMapIndexForGID(gcolid);
331 GlobalOrdinal subblock_gcolid = gcolid;
332 if (bThyraMode ==
true) {
334 LocalOrdinal lcolid = myColumnMapExtractor->getMap(colBlockId)->getLocalElement(gcolid);
336 subblock_gcolid = thyColMapExtractor->getMap(colBlockId,
true)->getGlobalElement(lcolid);
338 blockColIdx[colBlockId].push_back(subblock_gcolid);
339 blockColVals[colBlockId].push_back(vals[i]);
342 for (
size_t c = 0; c < numCols; c++) {
343 subMatrices[rowBlockId * numCols + c]->insertGlobalValues(subblock_growid, blockColIdx[c].view(0, blockColIdx[c].size()), blockColVals[c].view(0, blockColVals[c].size()));
349 if (bThyraMode ==
true) {
350 for (
size_t r = 0; r < numRows; r++) {
351 for (
size_t c = 0; c < numCols; c++) {
352 subMatrices[r * numCols + c]->fillComplete(thyDomainMapExtractor->getMap(c,
true), thyRangeMapExtractor->getMap(r,
true));
357 for (
size_t r = 0; r < numRows; r++) {
358 for (
size_t c = 0; c < numCols; c++) {
359 subMatrices[r * numCols + c]->fillComplete(domainMapExtractor->getMap(c), rangeMapExtractor->getMap(r));
365 for (
size_t r = 0; r < numRows; r++) {
366 for (
size_t c = 0; c < numCols; c++) {
367 bA->setMatrix(r, c, subMatrices[r * numCols + c]);
377 const Scalar replacementValue) {
379 using Teuchos::rcp_dynamic_cast;
381 GlobalOrdinal gZeroDiags;
382 bool usedEfficientPath =
false;
384#ifdef HAVE_MUELU_TPETRA
388 tpCrsAc = rcp_dynamic_cast<TpetraCrsMatrix>(crsWrapAc->getCrsMatrix());
391 auto tpCrsGraph = tpCrsAc->getTpetra_CrsMatrix()->getCrsGraph();
392 size_t numRows = Ac->getRowMap()->getLocalNumElements();
393 typedef typename Tpetra::CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::offset_device_view_type offset_type;
394 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
395 auto offsets = offset_type(Kokkos::ViewAllocateWithoutInitializing(
"offsets"), numRows);
396 tpCrsGraph->getLocalDiagOffsets(offsets);
399 Tpetra::Details::OrdinalTraits<typename offset_type::value_type>::invalid();
401 if (repairZeroDiagonals) {
405 LO numMissingDiagonalEntries = 0;
407 Kokkos::parallel_reduce(
408 "countMissingDiagonalEntries",
409 range_type(0, numRows),
410 KOKKOS_LAMBDA(
const LO i,
LO& missing) {
411 missing += (offsets(i) == STINV);
413 numMissingDiagonalEntries);
415 GlobalOrdinal gNumMissingDiagonalEntries;
416 Teuchos::reduceAll(*(Ac->getRowMap()->getComm()), Teuchos::REDUCE_SUM, Teuchos::as<GlobalOrdinal>(numMissingDiagonalEntries),
417 Teuchos::outArg(gNumMissingDiagonalEntries));
419 if (gNumMissingDiagonalEntries == 0) {
422 auto lclA = tpCrsAc->getTpetra_CrsMatrix()->getLocalMatrixDevice();
424#if KOKKOS_VERSION >= 40799
425 using ATS = KokkosKernels::ArithTraits<Scalar>;
427 using ATS = Kokkos::ArithTraits<Scalar>;
429#if KOKKOS_VERSION >= 40799
430 using impl_ATS = KokkosKernels::ArithTraits<typename ATS::val_type>;
432 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
436 typename ATS::val_type impl_replacementValue = replacementValue;
438 Kokkos::parallel_reduce(
439 "fixSmallDiagonalEntries",
440 range_type(0, numRows),
441 KOKKOS_LAMBDA(
const LO i,
LO& fixed) {
442 const auto offset = offsets(i);
443 auto curRow = lclA.row(i);
444 if (impl_ATS::magnitude(curRow.value(offset)) <= threshold) {
445 curRow.value(offset) = impl_replacementValue;
451 Teuchos::reduceAll(*(Ac->getRowMap()->getComm()), Teuchos::REDUCE_SUM, Teuchos::as<GlobalOrdinal>(lZeroDiags),
452 Teuchos::outArg(gZeroDiags));
454 usedEfficientPath =
true;
460 auto lclA = tpCrsAc->getTpetra_CrsMatrix()->getLocalMatrixDevice();
462#if KOKKOS_VERSION >= 40799
463 using ATS = KokkosKernels::ArithTraits<Scalar>;
465 using ATS = Kokkos::ArithTraits<Scalar>;
467#if KOKKOS_VERSION >= 40799
468 using impl_ATS = KokkosKernels::ArithTraits<typename ATS::val_type>;
470 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
475 Kokkos::parallel_reduce(
476 "detectSmallDiagonalEntries",
477 range_type(0, numRows),
478 KOKKOS_LAMBDA(
const LO i,
LO& small) {
479 const auto offset = offsets(i);
483 auto curRow = lclA.row(i);
484 if (impl_ATS::magnitude(curRow.value(offset)) <= threshold) {
491 Teuchos::reduceAll(*(Ac->getRowMap()->getComm()), Teuchos::REDUCE_SUM, Teuchos::as<GlobalOrdinal>(lZeroDiags),
492 Teuchos::outArg(gZeroDiags));
494 usedEfficientPath =
true;
499 if (!usedEfficientPath) {
502 Ac->getLocalDiagCopy(*diagVec);
504 LocalOrdinal lZeroDiags = 0;
507 for (
size_t i = 0; i < rowMap->getLocalNumElements(); i++) {
508 if (TST::magnitude(diagVal[i]) <= threshold) {
513 Teuchos::reduceAll(*(rowMap->getComm()), Teuchos::REDUCE_SUM, Teuchos::as<GlobalOrdinal>(lZeroDiags),
514 Teuchos::outArg(gZeroDiags));
516 if (repairZeroDiagonals && gZeroDiags > 0) {
534 for (
size_t r = 0; r < rowMap->getLocalNumElements(); r++) {
535 if (TST::magnitude(diagVal[r]) <= threshold) {
536 GlobalOrdinal grid = rowMap->getGlobalElement(r);
538 valout[0] = replacementValue;
539 fixDiagMatrix->insertGlobalValues(grid, indout(), valout());
544 fixDiagMatrix->fillComplete(Ac->getDomainMap(), Ac->getRangeMap());
548 Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::TwoMatrixAdd(*Ac,
false, 1.0, *fixDiagMatrix,
false, 1.0, newAc, fos);
549 if (Ac->IsView(
"stridedMaps"))
550 newAc->CreateView(
"stridedMaps", Ac);
553 fixDiagMatrix = Teuchos::null;
559 p->set(
"DoOptimizeStorage",
true);
568 fos <<
"CheckRepairMainDiagonal: " << (repairZeroDiagonals ?
"repaired " :
"found ")
569 << gZeroDiags <<
" too small entries (threshold = " << threshold <<
") on main diagonal of Ac." << std::endl;
571#ifdef HAVE_XPETRA_DEBUG
577 Ac->getLocalDiagCopy(*diagVec);
578 diagVal = diagVec->getData(0);
579 for (
size_t r = 0; r < Ac->getRowMap()->getLocalNumElements(); r++) {
580 if (TST::magnitude(diagVal[r]) <= threshold) {
581 fos <<
"Error: there are too small entries left on diagonal after repair..." << std::endl;