559 if (!isKokkosKernelsStream_) {
561 LevelOfFill_, 0, Overalloc_));
563 std::vector<int> weights(num_streams_);
564 std::fill(weights.begin(), weights.end(), 1);
565 exec_space_instances_ = Kokkos::Experimental::partition_space(execution_space(), weights);
567 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
568 if (!hasStreamReordered_) {
569 if (hasStreamsWithRCB_) {
570 TEUCHOS_TEST_FOR_EXCEPTION(A_coordinates_.is_null(), std::runtime_error, prefix <<
"The coordinates associated with rows of the input matrix is null while RILUK uses streams with RCB. Please call setCoord() with a nonnull input before calling this method.");
571 auto A_coordinates_lcl = A_coordinates_->getLocalViewDevice(Tpetra::Access::ReadOnly);
572 perm_rcb_ = perm_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"perm_rcb_"), A_coordinates_lcl.extent(0));
573 coors_rcb_ = coors_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"coors_rcb_"), A_coordinates_lcl.extent(0), A_coordinates_lcl.extent(1));
574 Kokkos::deep_copy(coors_rcb_, A_coordinates_lcl);
575 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, coors_rcb_,
576 A_local_diagblks_v_, perm_rcb_);
578 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_);
581 perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_,
true);
582 reverse_perm_v_.resize(perm_v_.size());
583 for (
size_t istream = 0; istream < perm_v_.size(); ++istream) {
584 using perm_type =
typename lno_nonzero_view_t::non_const_type;
585 const auto perm = perm_v_[istream];
586 const auto perm_length = perm.extent(0);
587 perm_type reverse_perm(
588 Kokkos::view_alloc(Kokkos::WithoutInitializing,
"reverse_perm"),
590 Kokkos::parallel_for(
591 Kokkos::RangePolicy<execution_space>(exec_space_instances_[istream], 0, perm_length),
592 KOKKOS_LAMBDA(
const local_ordinal_type ii) {
593 reverse_perm(perm(ii)) = ii;
595 reverse_perm_v_[istream] = reverse_perm;
599 A_local_diagblks_rowmap_v_ = std::vector<lno_row_view_t>(num_streams_);
600 A_local_diagblks_entries_v_ = std::vector<lno_nonzero_view_t>(num_streams_);
601 A_local_diagblks_values_v_ = std::vector<scalar_nonzero_view_t>(num_streams_);
603 for (
int i = 0; i < num_streams_; i++) {
604 A_local_diagblks_rowmap_v_[i] = A_local_diagblks_v_[i].graph.row_map;
605 A_local_diagblks_entries_v_[i] = A_local_diagblks_v_[i].graph.entries;
606 A_local_diagblks_values_v_[i] = A_local_diagblks_v_[i].values;
608 Teuchos::RCP<const crs_map_type> A_local_diagblks_RowMap = rcp(
new crs_map_type(A_local_diagblks_v_[i].numRows(),
609 A_local_diagblks_v_[i].numRows(),
610 A_local_crs_->getRowMap()->getComm()));
611 Teuchos::RCP<const crs_map_type> A_local_diagblks_ColMap = rcp(
new crs_map_type(A_local_diagblks_v_[i].numCols(),
612 A_local_diagblks_v_[i].numCols(),
613 A_local_crs_->getColMap()->getComm()));
614 Teuchos::RCP<crs_matrix_type> A_local_diagblks = rcp(
new crs_matrix_type(A_local_diagblks_RowMap,
615 A_local_diagblks_ColMap,
616 A_local_diagblks_v_[i]));
618 LevelOfFill_, 0, Overalloc_));
623 if (this->isKokkosKernelsSpiluk_) {
624 if (!isKokkosKernelsStream_) {
625 this->KernelHandle_ = Teuchos::rcp(
new kk_handle_type());
626 KernelHandle_->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
627 A_local_->getLocalNumRows(),
628 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1),
629 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1));
630 Graph_->initialize(KernelHandle_);
632 KernelHandle_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(num_streams_);
633 for (
int i = 0; i < num_streams_; i++) {
634 KernelHandle_v_[i] = Teuchos::rcp(
new kk_handle_type());
635 KernelHandle_v_[i]->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
636 A_local_diagblks_v_[i].numRows(),
637 2 * A_local_diagblks_v_[i].nnz() * (LevelOfFill_ + 1),
638 2 * A_local_diagblks_v_[i].nnz() * (LevelOfFill_ + 1));
639 Graph_v_[i]->initialize(KernelHandle_v_[i]);
643 Graph_->initialize();
647 checkOrderingConsistency(*A_local_);
648 if (!isKokkosKernelsStream_) {
649 L_solver_->setMatrix(L_);
651 L_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
652 L_solver_->setMatrices(L_v_);
654 L_solver_->initialize();
656 if (!isKokkosKernelsStream_) {
657 U_solver_->setMatrix(U_);
659 U_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
660 U_solver_->setMatrices(U_v_);
662 U_solver_->initialize();
669 isInitialized_ =
true;
671 initializeTime_ += (timer.wallTime() - startTime);
674template <
class MatrixType>
675void RILUK<MatrixType>::
676 checkOrderingConsistency(
const row_matrix_type& A) {
680 Teuchos::ArrayView<const global_ordinal_type> rowGIDs = A.getRowMap()->getLocalElementList();
681 Teuchos::ArrayView<const global_ordinal_type> colGIDs = A.getColMap()->getLocalElementList();
682 bool gidsAreConsistentlyOrdered =
true;
683 global_ordinal_type indexOfInconsistentGID = 0;
684 for (global_ordinal_type i = 0; i < rowGIDs.size(); ++i) {
685 if (rowGIDs[i] != colGIDs[i]) {
686 gidsAreConsistentlyOrdered =
false;
687 indexOfInconsistentGID = i;
691 TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered ==
false, std::runtime_error,
692 "The ordering of the local GIDs in the row and column maps is not the same"
694 <<
"at index " << indexOfInconsistentGID
695 <<
". Consistency is required, as all calculations are done with"
697 <<
"local indexing.");
700template <
class MatrixType>
701void RILUK<MatrixType>::
702 initAllValues(
const row_matrix_type& A) {
703 using Teuchos::ArrayRCP;
707 using Teuchos::REDUCE_SUM;
708 using Teuchos::reduceAll;
709 typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
711 size_t NumIn = 0, NumL = 0, NumU = 0;
712 bool DiagFound =
false;
713 size_t NumNonzeroDiags = 0;
714 size_t MaxNumEntries = A.getGlobalMaxNumRowEntries();
718 nonconst_local_inds_host_view_type InI(
"InI", MaxNumEntries);
719 Teuchos::Array<local_ordinal_type> LI(MaxNumEntries);
720 Teuchos::Array<local_ordinal_type> UI(MaxNumEntries);
721 nonconst_values_host_view_type InV(
"InV", MaxNumEntries);
722 Teuchos::Array<scalar_type> LV(MaxNumEntries);
723 Teuchos::Array<scalar_type> UV(MaxNumEntries);
726 const bool ReplaceValues = L_->isStaticGraph() || L_->isLocallyIndexed();
731 L_->setAllToScalar(STS::zero());
732 U_->setAllToScalar(STS::zero());
735 D_->putScalar(STS::zero());
736 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
738 RCP<const map_type> rowMap = L_->getRowMap();
748 Teuchos::ArrayView<const global_ordinal_type> nodeGIDs = rowMap->getLocalElementList();
749 for (
size_t myRow = 0; myRow < A.getLocalNumRows(); ++myRow) {
750 local_ordinal_type local_row = myRow;
754 A.getLocalRowCopy(local_row, InI, InV, NumIn);
762 for (
size_t j = 0; j < NumIn; ++j) {
763 const local_ordinal_type k = InI[j];
765 if (k == local_row) {
768 DV(local_row) += Rthresh_ * InV[j] + IFPACK2_SGN(InV[j]) * Athresh_;
770 TEUCHOS_TEST_FOR_EXCEPTION(
771 true, std::runtime_error,
772 "Ifpack2::RILUK::initAllValues: current "
774 << k <<
" < 0. I'm not sure why this is an error; it is "
775 "probably an artifact of the undocumented assumptions of the "
776 "original implementation (likely copied and pasted from Ifpack). "
777 "Nevertheless, the code I found here insisted on this being an error "
778 "state, so I will throw an exception here.");
779 }
else if (k < local_row) {
783 }
else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
795 DV(local_row) = Athresh_;
800 L_->replaceLocalValues(local_row, LI(0, NumL), LV(0, NumL));
804 L_->insertLocalValues(local_row, LI(0, NumL), LV(0, NumL));
810 U_->replaceLocalValues(local_row, UI(0, NumU), UV(0, NumU));
814 U_->insertLocalValues(local_row, UI(0, NumU), UV(0, NumU));
822 isInitialized_ =
true;
825template <
class MatrixType>
826void RILUK<MatrixType>::compute_serial() {
829 initAllValues(*A_local_);
834 const scalar_type MinDiagonalValue = STS::rmin();
835 const scalar_type MaxDiagonalValue = STS::one() / MinDiagonalValue;
837 size_t NumIn, NumL, NumU;
840 const size_t MaxNumEntries =
841 L_->getLocalMaxNumRowEntries() + U_->getLocalMaxNumRowEntries() + 1;
843 Teuchos::Array<local_ordinal_type> InI(MaxNumEntries);
844 Teuchos::Array<scalar_type> InV(MaxNumEntries);
845 size_t num_cols = U_->getColMap()->getLocalNumElements();
846 Teuchos::Array<int> colflag(num_cols, -1);
848 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
852 using IST =
typename row_matrix_type::impl_scalar_type;
853 for (
size_t i = 0; i < L_->getLocalNumRows(); ++i) {
854 local_ordinal_type local_row = i;
857 local_inds_host_view_type UUI;
858 values_host_view_type UUV;
862 NumIn = MaxNumEntries;
863 nonconst_local_inds_host_view_type InI_v(InI.data(), MaxNumEntries);
864 nonconst_values_host_view_type InV_v(
reinterpret_cast<IST*
>(InV.data()), MaxNumEntries);
866 L_->getLocalRowCopy(local_row, InI_v, InV_v, NumL);
869 InI[NumL] = local_row;
871 nonconst_local_inds_host_view_type InI_sub(InI.data() + NumL + 1, MaxNumEntries - NumL - 1);
872 nonconst_values_host_view_type InV_sub(
reinterpret_cast<IST*
>(InV.data()) + NumL + 1, MaxNumEntries - NumL - 1);
874 U_->getLocalRowCopy(local_row, InI_sub, InV_sub, NumU);
875 NumIn = NumL + NumU + 1;
878 for (
size_t j = 0; j < NumIn; ++j) {
882 scalar_type diagmod = STS::zero();
884 for (
size_t jj = 0; jj < NumL; ++jj) {
885 local_ordinal_type j = InI[jj];
886 IST multiplier = InV[jj];
888 InV[jj] *=
static_cast<scalar_type
>(DV(j));
890 U_->getLocalRowView(j, UUI, UUV);
893 if (RelaxValue_ == STM::zero()) {
894 for (
size_t k = 0; k < NumUU; ++k) {
895 const int kk = colflag[UUI[k]];
900 InV[kk] -=
static_cast<scalar_type
>(multiplier * UUV[k]);
905 for (
size_t k = 0; k < NumUU; ++k) {
909 const int kk = colflag[UUI[k]];
911 InV[kk] -=
static_cast<scalar_type
>(multiplier * UUV[k]);
913 diagmod -=
static_cast<scalar_type
>(multiplier * UUV[k]);
921 L_->replaceLocalValues(local_row, InI(0, NumL), InV(0, NumL));
926 if (RelaxValue_ != STM::zero()) {
927 DV(i) += RelaxValue_ * diagmod;
930 if (STS::magnitude(DV(i)) > STS::magnitude(MaxDiagonalValue)) {
931 if (STS::real(DV(i)) < STM::zero()) {
932 DV(i) = -MinDiagonalValue;
934 DV(i) = MinDiagonalValue;
937 DV(i) =
static_cast<impl_scalar_type
>(STS::one()) / DV(i);
940 for (
size_t j = 0; j < NumU; ++j) {
941 InV[NumL + 1 + j] *=
static_cast<scalar_type
>(DV(i));
946 U_->replaceLocalValues(local_row, InI(NumL + 1, NumU), InV(NumL + 1, NumU));
950 for (
size_t j = 0; j < NumIn; ++j) {
951 colflag[InI[j]] = -1;
962 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
963 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
966 L_solver_->setMatrix(L_);
967 L_solver_->compute();
968 U_solver_->setMatrix(U_);
969 U_solver_->compute();
972template <
class MatrixType>
973void RILUK<MatrixType>::compute_kkspiluk() {
977 L_->setAllToScalar(STS::zero());
978 U_->setAllToScalar(STS::zero());
980 using row_map_type =
typename crs_matrix_type::local_matrix_device_type::row_map_type;
981 auto lclL = L_->getLocalMatrixDevice();
982 row_map_type L_rowmap = lclL.graph.row_map;
983 auto L_entries = lclL.graph.entries;
984 auto L_values = lclL.values;
986 auto lclU = U_->getLocalMatrixDevice();
987 row_map_type U_rowmap = lclU.graph.row_map;
988 auto U_entries = lclU.graph.entries;
989 auto U_values = lclU.values;
991 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
992 KokkosSparse::spiluk_numeric(KernelHandle_.getRawPtr(), LevelOfFill_,
993 lclMtx.graph.row_map, lclMtx.graph.entries, lclMtx.values,
994 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values);
996 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
997 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
999 L_solver_->compute();
1000 U_solver_->compute();
1003template <
class MatrixType>
1004void RILUK<MatrixType>::compute_kkspiluk_stream() {
1005 std::vector<lno_row_view_t> L_rowmap_v(num_streams_);
1006 std::vector<lno_nonzero_view_t> L_entries_v(num_streams_);
1007 std::vector<scalar_nonzero_view_t> L_values_v(num_streams_);
1008 std::vector<lno_row_view_t> U_rowmap_v(num_streams_);
1009 std::vector<lno_nonzero_view_t> U_entries_v(num_streams_);
1010 std::vector<scalar_nonzero_view_t> U_values_v(num_streams_);
1011 std::vector<kk_handle_type*> KernelHandle_rawptr_v_(num_streams_);
1012 for (
int i = 0; i < num_streams_; i++) {
1013 L_v_[i]->resumeFill();
1014 U_v_[i]->resumeFill();
1016 L_v_[i]->setAllToScalar(STS::zero());
1017 U_v_[i]->setAllToScalar(STS::zero());
1019 auto lclL = L_v_[i]->getLocalMatrixDevice();
1020 L_rowmap_v[i] = lclL.graph.row_map;
1021 L_entries_v[i] = lclL.graph.entries;
1022 L_values_v[i] = lclL.values;
1024 auto lclU = U_v_[i]->getLocalMatrixDevice();
1025 U_rowmap_v[i] = lclU.graph.row_map;
1026 U_entries_v[i] = lclU.graph.entries;
1027 U_values_v[i] = lclU.values;
1028 KernelHandle_rawptr_v_[i] = KernelHandle_v_[i].getRawPtr();
1031 if (hasStreamReordered_) {
1035 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1038 using TeamPolicy = Kokkos::TeamPolicy<execution_space>;
1039 const auto A_nrows = lclMtx.numRows();
1040 auto rows_per_block = ((A_nrows % num_streams_) == 0)
1041 ? (A_nrows / num_streams_)
1042 : (A_nrows / num_streams_ + 1);
1043 for (
int i = 0; i < num_streams_; i++) {
1044 const auto start_row_offset = i * rows_per_block;
1045 auto rowptrs = A_local_diagblks_rowmap_v_[i];
1046 auto colindices = A_local_diagblks_entries_v_[i];
1047 auto values = A_local_diagblks_values_v_[i];
1048 const bool reordered = hasStreamReordered_;
1049 typename lno_nonzero_view_t::non_const_type reverse_perm = hasStreamReordered_ ? reverse_perm_v_[i] :
typename lno_nonzero_view_t::non_const_type{};
1050 TeamPolicy pol(exec_space_instances_[i], A_local_diagblks_rowmap_v_[i].extent(0) - 1, Kokkos::AUTO);
1051 Kokkos::parallel_for(
1052 pol, KOKKOS_LAMBDA(
const typename TeamPolicy::member_type& team) {
1053 const auto irow = team.league_rank();
1054 const auto irow_A = start_row_offset + (reordered ? reverse_perm(irow) : irow);
1055 const auto A_local_crs_row = lclMtx.rowConst(irow_A);
1056 const auto begin_row = rowptrs(irow);
1057 const auto num_entries = rowptrs(irow + 1) - begin_row;
1058 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, num_entries), [&](
const int j) {
1059 const auto colidx = colindices(begin_row + j);
1060 const auto colidx_A = start_row_offset + (reordered ? reverse_perm(colidx) : colidx);
1062 const int offset = KokkosSparse::findRelOffset(
1063 &A_local_crs_row.colidx(0), A_local_crs_row.length, colidx_A, 0,
false);
1064 values(begin_row + j) = A_local_crs_row.value(offset);
1070 KokkosSparse::Experimental::spiluk_numeric_streams(exec_space_instances_, KernelHandle_rawptr_v_, LevelOfFill_,
1071 A_local_diagblks_rowmap_v_, A_local_diagblks_entries_v_, A_local_diagblks_values_v_,
1072 L_rowmap_v, L_entries_v, L_values_v,
1073 U_rowmap_v, U_entries_v, U_values_v);
1075 for (
int i = 0; i < num_streams_; i++) {
1076 L_v_[i]->fillComplete();
1077 U_v_[i]->fillComplete();
1080 L_solver_->compute();
1081 U_solver_->compute();
1084template <
class MatrixType>
1086 using Teuchos::Array;
1087 using Teuchos::ArrayView;
1090 using Teuchos::rcp_const_cast;
1091 using Teuchos::rcp_dynamic_cast;
1092 const char prefix[] =
"Ifpack2::RILUK::compute: ";
1097 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix <<
"The matrix is null. Please "
1098 "call setMatrix() with a nonnull input before calling this method.");
1099 TEUCHOS_TEST_FOR_EXCEPTION(!A_->isFillComplete(), std::runtime_error, prefix <<
"The matrix is not "
1100 "fill complete. You may not invoke initialize() or compute() with this "
1101 "matrix until the matrix is fill complete. If your matrix is a "
1102 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
1103 "range Maps, if appropriate) before calling this method.");
1105 if (!isInitialized()) {
1109 Teuchos::Time timer(
"RILUK::compute");
1112 Teuchos::TimeMonitor timeMon(timer);
1113 double startTime = timer.wallTime();
1115 isComputed_ =
false;
1117 if (!this->isKokkosKernelsSpiluk_) {
1121 if (!A_local_crs_nc_.is_null()) {
1123 A_local_crs_nc_->resumeFill();
1125 nonconst_local_inds_host_view_type indices(
"indices", A_local_->getLocalMaxNumRowEntries());
1126 nonconst_values_host_view_type values(
"values", A_local_->getLocalMaxNumRowEntries());
1128 size_t numEntries = 0;
1129 A_local_->getLocalRowCopy(i, indices, values, numEntries);
1130 A_local_crs_nc_->replaceLocalValues(i, numEntries,
reinterpret_cast<scalar_type*
>(values.data()), indices.data());
1132 A_local_crs_nc_->fillComplete(A_local_->getDomainMap(), A_local_->getRangeMap());
1135 if (!isKokkosKernelsStream_) {
1139 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1140 if (!hasStreamsWithRCB_) {
1141 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_, hasStreamReordered_);
1143 auto A_coordinates_lcl = A_coordinates_->getLocalViewDevice(Tpetra::Access::ReadOnly);
1144 Kokkos::deep_copy(coors_rcb_, A_coordinates_lcl);
1145 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, coors_rcb_,
1146 A_local_diagblks_v_, perm_rcb_);
1148 for (
int i = 0; i < num_streams_; i++) {
1149 A_local_diagblks_values_v_[i] = A_local_diagblks_v_[i].values;
1152 compute_kkspiluk_stream();
1158 computeTime_ += (timer.wallTime() - startTime);
1162template <
typename MV,
typename Map>
1163void resetMultiVecIfNeeded(std::unique_ptr<MV>& mv_ptr,
const Map& map,
const size_t numVectors,
bool initialize) {
1164 if (!mv_ptr || mv_ptr->getNumVectors() != numVectors) {
1165 mv_ptr.reset(
new MV(map, numVectors, initialize));
1170template <
class MatrixType>
1172 apply(
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1173 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1174 Teuchos::ETransp mode,
1178 using Teuchos::rcpFromRef;
1180 TEUCHOS_TEST_FOR_EXCEPTION(
1181 A_.is_null(), std::runtime_error,
1182 "Ifpack2::RILUK::apply: The matrix is "
1183 "null. Please call setMatrix() with a nonnull input, then initialize() "
1184 "and compute(), before calling this method.");
1185 TEUCHOS_TEST_FOR_EXCEPTION(
1186 !isComputed(), std::runtime_error,
1187 "Ifpack2::RILUK::apply: If you have not yet called compute(), "
1188 "you must call compute() before calling this method.");
1189 TEUCHOS_TEST_FOR_EXCEPTION(
1190 X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
1191 "Ifpack2::RILUK::apply: X and Y do not have the same number of columns. "
1192 "X.getNumVectors() = "
1193 << X.getNumVectors()
1194 <<
" != Y.getNumVectors() = " << Y.getNumVectors() <<
".");
1195 TEUCHOS_TEST_FOR_EXCEPTION(
1196 STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
1197 "Ifpack2::RILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1198 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1199 "fixed. There is a FIXME in this file about this very issue.");
1200#ifdef HAVE_IFPACK2_DEBUG
1202 if (!isKokkosKernelsStream_) {
1204 TEUCHOS_TEST_FOR_EXCEPTION(STM::isnaninf(D_nrm1), std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf.");
1206 Teuchos::Array<magnitude_type> norms(X.getNumVectors());
1209 for (
size_t j = 0; j < X.getNumVectors(); ++j) {
1210 if (STM::isnaninf(norms[j])) {
1215 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the input X is NaN or Inf.");
1222 Teuchos::Time timer(
"RILUK::apply");
1223 double startTime = timer.wallTime();
1225 Teuchos::TimeMonitor timeMon(timer);
1226 if (alpha == one && beta == zero) {
1227 if (isKokkosKernelsSpiluk_ && isKokkosKernelsStream_ && (hasStreamReordered_ || hasStreamsWithRCB_)) {
1228 Impl::resetMultiVecIfNeeded(reordered_x_, X.getMap(), X.getNumVectors(),
false);
1229 Impl::resetMultiVecIfNeeded(reordered_y_, Y.getMap(), Y.getNumVectors(),
false);
1232 for (
size_t j = 0; j < X.getNumVectors(); j++) {
1233 auto X_j = X.getVector(j);
1234 auto ReorderedX_j = reordered_x_->getVectorNonConst(j);
1235 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1236 auto ReorderedX_lcl = ReorderedX_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1237 if (hasStreamReordered_) {
1240 for (
int i = 0; i < num_streams_; i++) {
1241 auto perm_i = perm_v_[i];
1242 stream_end = stream_begin + perm_i.extent(0);
1243 auto X_lcl_sub = Kokkos::subview(X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1244 auto ReorderedX_lcl_sub = Kokkos::subview(ReorderedX_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1245 Kokkos::parallel_for(
1246 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0,
static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1247 ReorderedX_lcl_sub(perm_i(ii)) = X_lcl_sub(ii);
1249 stream_begin = stream_end;
1252 auto perm = perm_rcb_;
1253 Kokkos::parallel_for(
1254 Kokkos::RangePolicy<execution_space>(0,
static_cast<int>(X_lcl.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1255 ReorderedX_lcl(perm(ii), 0) = X_lcl(ii, 0);
1261 if (mode == Teuchos::NO_TRANS) {
1263 L_solver_->apply(*reordered_x_, Y, mode);
1265 U_solver_->apply(Y, *reordered_y_, mode);
1268 U_solver_->apply(*reordered_x_, Y, mode);
1270 L_solver_->apply(Y, *reordered_y_, mode);
1273 for (
size_t j = 0; j < Y.getNumVectors(); j++) {
1274 auto Y_j = Y.getVectorNonConst(j);
1275 auto ReorderedY_j = reordered_y_->getVector(j);
1276 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1277 auto ReorderedY_lcl = ReorderedY_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1278 if (hasStreamReordered_) {
1281 for (
int i = 0; i < num_streams_; i++) {
1282 auto perm_i = perm_v_[i];
1283 stream_end = stream_begin + perm_i.extent(0);
1284 auto Y_lcl_sub = Kokkos::subview(Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1285 auto ReorderedY_lcl_sub = Kokkos::subview(ReorderedY_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1286 Kokkos::parallel_for(
1287 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0,
static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1288 Y_lcl_sub(ii) = ReorderedY_lcl_sub(perm_i(ii));
1290 stream_begin = stream_end;
1293 auto perm = perm_rcb_;
1294 Kokkos::parallel_for(
1295 Kokkos::RangePolicy<execution_space>(0,
static_cast<int>(Y_lcl.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1296 Y_lcl(ii, 0) = ReorderedY_lcl(perm(ii), 0);
1302 if (mode == Teuchos::NO_TRANS) {
1303#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1307 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1310 L_solver_->apply(X, *Y_tmp_, mode);
1312 if (!this->isKokkosKernelsSpiluk_) {
1315 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1318 U_solver_->apply(*Y_tmp_, Y, mode);
1321 L_solver_->apply(X, Y, mode);
1323 if (!this->isKokkosKernelsSpiluk_) {
1326 Y.elementWiseMultiply(one, *D_, Y, zero);
1329 U_solver_->apply(Y, Y, mode);
1332#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1336 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1339 U_solver_->apply(X, *Y_tmp_, mode);
1341 if (!this->isKokkosKernelsSpiluk_) {
1347 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1350 L_solver_->apply(*Y_tmp_, Y, mode);
1353 U_solver_->apply(X, Y, mode);
1355 if (!this->isKokkosKernelsSpiluk_) {
1361 Y.elementWiseMultiply(one, *D_, Y, zero);
1364 L_solver_->apply(Y, Y, mode);
1369 if (alpha == zero) {
1379 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1380 apply(X, *Y_tmp_, mode);
1381 Y.update(alpha, *Y_tmp_, beta);
1386#ifdef HAVE_IFPACK2_DEBUG
1388 Teuchos::Array<magnitude_type> norms(Y.getNumVectors());
1391 for (
size_t j = 0; j < Y.getNumVectors(); ++j) {
1392 if (STM::isnaninf(norms[j])) {
1397 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the output Y is NaN or Inf.");
1402 applyTime_ += (timer.wallTime() - startTime);
1438template <
class MatrixType>
1440 std::ostringstream os;
1445 os <<
"\"Ifpack2::RILUK\": {";
1446 os <<
"Initialized: " << (isInitialized() ?
"true" :
"false") <<
", "
1447 <<
"Computed: " << (isComputed() ?
"true" :
"false") <<
", ";
1449 os <<
"Level-of-fill: " << getLevelOfFill() <<
", ";
1451 if (isKokkosKernelsSpiluk_) os <<
"KK-SPILUK, ";
1452 if (isKokkosKernelsStream_) os <<
"KK-Stream, ";
1455 os <<
"Matrix: null";
1457 os <<
"Global matrix dimensions: ["
1458 << A_->getGlobalNumRows() <<
", " << A_->getGlobalNumCols() <<
"]"
1459 <<
", Global nnz: " << A_->getGlobalNumEntries();
1462 if (!L_solver_.is_null()) os <<
", " << L_solver_->description();
1463 if (!U_solver_.is_null()) os <<
", " << U_solver_->description();
1471#define IFPACK2_RILUK_INSTANT(S, LO, GO, N) \
1472 template class Ifpack2::RILUK<Tpetra::RowMatrix<S, LO, GO, N> >;