560 LevelOfFill_, 0, Overalloc_));
562 std::vector<int> weights(num_streams_);
563 std::fill(weights.begin(), weights.end(), 1);
564 exec_space_instances_ = Kokkos::Experimental::partition_space(execution_space(), weights);
566 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
567 if (!hasStreamReordered_) {
568 if (hasStreamsWithRCB_) {
569 TEUCHOS_TEST_FOR_EXCEPTION(A_coordinates_.is_null(), std::runtime_error, prefix <<
"The coordinates associated with rows of the input matrix is null while RILUK uses streams with RCB. Please call setCoord() with a nonnull input before calling this method.");
570 auto A_coordinates_lcl = A_coordinates_->getLocalViewDevice(Tpetra::Access::ReadOnly);
571 perm_rcb_ = perm_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"perm_rcb_"), A_coordinates_lcl.extent(0));
572 coors_rcb_ = coors_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"coors_rcb_"), A_coordinates_lcl.extent(0), A_coordinates_lcl.extent(1));
573 Kokkos::deep_copy(coors_rcb_, A_coordinates_lcl);
574 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, coors_rcb_,
575 A_local_diagblks_v_, perm_rcb_);
577 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_);
580 perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_,
true);
581 reverse_perm_v_.resize(perm_v_.size());
582 for (
size_t istream = 0; istream < perm_v_.size(); ++istream) {
583 using perm_type =
typename lno_nonzero_view_t::non_const_type;
584 const auto perm = perm_v_[istream];
585 const auto perm_length = perm.extent(0);
586 perm_type reverse_perm(
587 Kokkos::view_alloc(Kokkos::WithoutInitializing,
"reverse_perm"),
589 Kokkos::parallel_for(
590 Kokkos::RangePolicy<execution_space>(exec_space_instances_[istream], 0, perm_length),
591 KOKKOS_LAMBDA(
const local_ordinal_type ii) {
592 reverse_perm(perm(ii)) = ii;
594 reverse_perm_v_[istream] = reverse_perm;
598 A_local_diagblks_rowmap_v_ = std::vector<lno_row_view_t>(num_streams_);
599 A_local_diagblks_entries_v_ = std::vector<lno_nonzero_view_t>(num_streams_);
600 A_local_diagblks_values_v_ = std::vector<scalar_nonzero_view_t>(num_streams_);
602 for (
int i = 0; i < num_streams_; i++) {
603 A_local_diagblks_rowmap_v_[i] = A_local_diagblks_v_[i].graph.row_map;
604 A_local_diagblks_entries_v_[i] = A_local_diagblks_v_[i].graph.entries;
605 A_local_diagblks_values_v_[i] = A_local_diagblks_v_[i].values;
607 Teuchos::RCP<const crs_map_type> A_local_diagblks_RowMap = rcp(
new crs_map_type(A_local_diagblks_v_[i].numRows(),
608 A_local_diagblks_v_[i].numRows(),
609 A_local_crs_->getRowMap()->getComm()));
610 Teuchos::RCP<const crs_map_type> A_local_diagblks_ColMap = rcp(
new crs_map_type(A_local_diagblks_v_[i].numCols(),
611 A_local_diagblks_v_[i].numCols(),
612 A_local_crs_->getColMap()->getComm()));
613 Teuchos::RCP<crs_matrix_type> A_local_diagblks = rcp(
new crs_matrix_type(A_local_diagblks_RowMap,
614 A_local_diagblks_ColMap,
615 A_local_diagblks_v_[i]));
617 LevelOfFill_, 0, Overalloc_));
622 if (this->isKokkosKernelsSpiluk_) {
623 if (!isKokkosKernelsStream_) {
624 this->KernelHandle_ = Teuchos::rcp(
new kk_handle_type());
625 KernelHandle_->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
626 A_local_->getLocalNumRows(),
627 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1),
628 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1));
629 Graph_->initialize(KernelHandle_);
631 KernelHandle_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(num_streams_);
632 for (
int i = 0; i < num_streams_; i++) {
633 KernelHandle_v_[i] = Teuchos::rcp(
new kk_handle_type());
634 KernelHandle_v_[i]->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
635 A_local_diagblks_v_[i].numRows(),
636 2 * A_local_diagblks_v_[i].nnz() * (LevelOfFill_ + 1),
637 2 * A_local_diagblks_v_[i].nnz() * (LevelOfFill_ + 1));
638 Graph_v_[i]->initialize(KernelHandle_v_[i]);
642 Graph_->initialize();
646 checkOrderingConsistency(*A_local_);
647 if (!isKokkosKernelsStream_) {
648 L_solver_->setMatrix(L_);
650 L_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
651 L_solver_->setMatrices(L_v_);
653 L_solver_->initialize();
655 if (!isKokkosKernelsStream_) {
656 U_solver_->setMatrix(U_);
658 U_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
659 U_solver_->setMatrices(U_v_);
661 U_solver_->initialize();
668 isInitialized_ =
true;
670 initializeTime_ += (timer.wallTime() - startTime);
673template <
class MatrixType>
674void RILUK<MatrixType>::
675 checkOrderingConsistency(
const row_matrix_type& A) {
679 Teuchos::ArrayView<const global_ordinal_type> rowGIDs = A.getRowMap()->getLocalElementList();
680 Teuchos::ArrayView<const global_ordinal_type> colGIDs = A.getColMap()->getLocalElementList();
681 bool gidsAreConsistentlyOrdered =
true;
682 global_ordinal_type indexOfInconsistentGID = 0;
683 for (global_ordinal_type i = 0; i < rowGIDs.size(); ++i) {
684 if (rowGIDs[i] != colGIDs[i]) {
685 gidsAreConsistentlyOrdered =
false;
686 indexOfInconsistentGID = i;
690 TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered ==
false, std::runtime_error,
691 "The ordering of the local GIDs in the row and column maps is not the same"
693 <<
"at index " << indexOfInconsistentGID
694 <<
". Consistency is required, as all calculations are done with"
696 <<
"local indexing.");
699template <
class MatrixType>
700void RILUK<MatrixType>::
701 initAllValues(
const row_matrix_type& A) {
702 using Teuchos::ArrayRCP;
706 using Teuchos::REDUCE_SUM;
707 using Teuchos::reduceAll;
708 typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
710 size_t NumIn = 0, NumL = 0, NumU = 0;
711 bool DiagFound =
false;
712 size_t NumNonzeroDiags = 0;
713 size_t MaxNumEntries = A.getGlobalMaxNumRowEntries();
717 nonconst_local_inds_host_view_type InI(
"InI", MaxNumEntries);
718 Teuchos::Array<local_ordinal_type> LI(MaxNumEntries);
719 Teuchos::Array<local_ordinal_type> UI(MaxNumEntries);
720 nonconst_values_host_view_type InV(
"InV", MaxNumEntries);
721 Teuchos::Array<scalar_type> LV(MaxNumEntries);
722 Teuchos::Array<scalar_type> UV(MaxNumEntries);
725 const bool ReplaceValues = L_->isStaticGraph() || L_->isLocallyIndexed();
730 L_->setAllToScalar(STS::zero());
731 U_->setAllToScalar(STS::zero());
734 D_->putScalar(STS::zero());
735 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
737 RCP<const map_type> rowMap = L_->getRowMap();
747 Teuchos::ArrayView<const global_ordinal_type> nodeGIDs = rowMap->getLocalElementList();
748 for (
size_t myRow = 0; myRow < A.getLocalNumRows(); ++myRow) {
749 local_ordinal_type local_row = myRow;
753 A.getLocalRowCopy(local_row, InI, InV, NumIn);
761 for (
size_t j = 0; j < NumIn; ++j) {
762 const local_ordinal_type k = InI[j];
764 if (k == local_row) {
767 DV(local_row) += Rthresh_ * InV[j] + IFPACK2_SGN(InV[j]) * Athresh_;
769 TEUCHOS_TEST_FOR_EXCEPTION(
770 true, std::runtime_error,
771 "Ifpack2::RILUK::initAllValues: current "
773 << k <<
" < 0. I'm not sure why this is an error; it is "
774 "probably an artifact of the undocumented assumptions of the "
775 "original implementation (likely copied and pasted from Ifpack). "
776 "Nevertheless, the code I found here insisted on this being an error "
777 "state, so I will throw an exception here.");
778 }
else if (k < local_row) {
782 }
else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
794 DV(local_row) = Athresh_;
799 L_->replaceLocalValues(local_row, LI(0, NumL), LV(0, NumL));
803 L_->insertLocalValues(local_row, LI(0, NumL), LV(0, NumL));
809 U_->replaceLocalValues(local_row, UI(0, NumU), UV(0, NumU));
813 U_->insertLocalValues(local_row, UI(0, NumU), UV(0, NumU));
821 isInitialized_ =
true;
824template <
class MatrixType>
825void RILUK<MatrixType>::compute_serial() {
828 initAllValues(*A_local_);
833 const scalar_type MinDiagonalValue = STS::rmin();
834 const scalar_type MaxDiagonalValue = STS::one() / MinDiagonalValue;
836 size_t NumIn, NumL, NumU;
839 const size_t MaxNumEntries =
840 L_->getLocalMaxNumRowEntries() + U_->getLocalMaxNumRowEntries() + 1;
842 Teuchos::Array<local_ordinal_type> InI(MaxNumEntries);
843 Teuchos::Array<scalar_type> InV(MaxNumEntries);
844 size_t num_cols = U_->getColMap()->getLocalNumElements();
845 Teuchos::Array<int> colflag(num_cols, -1);
847 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
851 using IST =
typename row_matrix_type::impl_scalar_type;
852 for (
size_t i = 0; i < L_->getLocalNumRows(); ++i) {
853 local_ordinal_type local_row = i;
856 local_inds_host_view_type UUI;
857 values_host_view_type UUV;
861 NumIn = MaxNumEntries;
862 nonconst_local_inds_host_view_type InI_v(InI.data(), MaxNumEntries);
863 nonconst_values_host_view_type InV_v(
reinterpret_cast<IST*
>(InV.data()), MaxNumEntries);
865 L_->getLocalRowCopy(local_row, InI_v, InV_v, NumL);
868 InI[NumL] = local_row;
870 nonconst_local_inds_host_view_type InI_sub(InI.data() + NumL + 1, MaxNumEntries - NumL - 1);
871 nonconst_values_host_view_type InV_sub(
reinterpret_cast<IST*
>(InV.data()) + NumL + 1, MaxNumEntries - NumL - 1);
873 U_->getLocalRowCopy(local_row, InI_sub, InV_sub, NumU);
874 NumIn = NumL + NumU + 1;
877 for (
size_t j = 0; j < NumIn; ++j) {
881 scalar_type diagmod = STS::zero();
883 for (
size_t jj = 0; jj < NumL; ++jj) {
884 local_ordinal_type j = InI[jj];
885 IST multiplier = InV[jj];
887 InV[jj] *=
static_cast<scalar_type
>(DV(j));
889 U_->getLocalRowView(j, UUI, UUV);
892 if (RelaxValue_ == STM::zero()) {
893 for (
size_t k = 0; k < NumUU; ++k) {
894 const int kk = colflag[UUI[k]];
899 InV[kk] -=
static_cast<scalar_type
>(multiplier * UUV[k]);
904 for (
size_t k = 0; k < NumUU; ++k) {
908 const int kk = colflag[UUI[k]];
910 InV[kk] -=
static_cast<scalar_type
>(multiplier * UUV[k]);
912 diagmod -=
static_cast<scalar_type
>(multiplier * UUV[k]);
920 L_->replaceLocalValues(local_row, InI(0, NumL), InV(0, NumL));
925 if (RelaxValue_ != STM::zero()) {
926 DV(i) += RelaxValue_ * diagmod;
929 if (STS::magnitude(DV(i)) > STS::magnitude(MaxDiagonalValue)) {
930 if (STS::real(DV(i)) < STM::zero()) {
931 DV(i) = -MinDiagonalValue;
933 DV(i) = MinDiagonalValue;
936 DV(i) =
static_cast<impl_scalar_type
>(STS::one()) / DV(i);
939 for (
size_t j = 0; j < NumU; ++j) {
940 InV[NumL + 1 + j] *=
static_cast<scalar_type
>(DV(i));
945 U_->replaceLocalValues(local_row, InI(NumL + 1, NumU), InV(NumL + 1, NumU));
949 for (
size_t j = 0; j < NumIn; ++j) {
950 colflag[InI[j]] = -1;
961 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
962 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
965 L_solver_->setMatrix(L_);
966 L_solver_->compute();
967 U_solver_->setMatrix(U_);
968 U_solver_->compute();
971template <
class MatrixType>
972void RILUK<MatrixType>::compute_kkspiluk() {
976 L_->setAllToScalar(STS::zero());
977 U_->setAllToScalar(STS::zero());
979 using row_map_type =
typename crs_matrix_type::local_matrix_device_type::row_map_type;
980 auto lclL = L_->getLocalMatrixDevice();
981 row_map_type L_rowmap = lclL.graph.row_map;
982 auto L_entries = lclL.graph.entries;
983 auto L_values = lclL.values;
985 auto lclU = U_->getLocalMatrixDevice();
986 row_map_type U_rowmap = lclU.graph.row_map;
987 auto U_entries = lclU.graph.entries;
988 auto U_values = lclU.values;
990 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
991 KokkosSparse::spiluk_numeric(KernelHandle_.getRawPtr(), LevelOfFill_,
992 lclMtx.graph.row_map, lclMtx.graph.entries, lclMtx.values,
993 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values);
995 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
996 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
998 L_solver_->compute();
999 U_solver_->compute();
1002template <
class MatrixType>
1003void RILUK<MatrixType>::compute_kkspiluk_stream() {
1004 std::vector<lno_row_view_t> L_rowmap_v(num_streams_);
1005 std::vector<lno_nonzero_view_t> L_entries_v(num_streams_);
1006 std::vector<scalar_nonzero_view_t> L_values_v(num_streams_);
1007 std::vector<lno_row_view_t> U_rowmap_v(num_streams_);
1008 std::vector<lno_nonzero_view_t> U_entries_v(num_streams_);
1009 std::vector<scalar_nonzero_view_t> U_values_v(num_streams_);
1010 std::vector<kk_handle_type*> KernelHandle_rawptr_v_(num_streams_);
1011 for (
int i = 0; i < num_streams_; i++) {
1012 L_v_[i]->resumeFill();
1013 U_v_[i]->resumeFill();
1015 L_v_[i]->setAllToScalar(STS::zero());
1016 U_v_[i]->setAllToScalar(STS::zero());
1018 auto lclL = L_v_[i]->getLocalMatrixDevice();
1019 L_rowmap_v[i] = lclL.graph.row_map;
1020 L_entries_v[i] = lclL.graph.entries;
1021 L_values_v[i] = lclL.values;
1023 auto lclU = U_v_[i]->getLocalMatrixDevice();
1024 U_rowmap_v[i] = lclU.graph.row_map;
1025 U_entries_v[i] = lclU.graph.entries;
1026 U_values_v[i] = lclU.values;
1027 KernelHandle_rawptr_v_[i] = KernelHandle_v_[i].getRawPtr();
1030 if (hasStreamReordered_) {
1034 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1037 using TeamPolicy = Kokkos::TeamPolicy<execution_space>;
1038 const auto A_nrows = lclMtx.numRows();
1039 auto rows_per_block = ((A_nrows % num_streams_) == 0)
1040 ? (A_nrows / num_streams_)
1041 : (A_nrows / num_streams_ + 1);
1042 for (
int i = 0; i < num_streams_; i++) {
1043 const auto start_row_offset = i * rows_per_block;
1044 auto rowptrs = A_local_diagblks_rowmap_v_[i];
1045 auto colindices = A_local_diagblks_entries_v_[i];
1046 auto values = A_local_diagblks_values_v_[i];
1047 const bool reordered = hasStreamReordered_;
1048 typename lno_nonzero_view_t::non_const_type reverse_perm = hasStreamReordered_ ? reverse_perm_v_[i] :
typename lno_nonzero_view_t::non_const_type{};
1049 TeamPolicy pol(exec_space_instances_[i], A_local_diagblks_rowmap_v_[i].extent(0) - 1, Kokkos::AUTO);
1050 Kokkos::parallel_for(
1051 pol, KOKKOS_LAMBDA(
const typename TeamPolicy::member_type& team) {
1052 const auto irow = team.league_rank();
1053 const auto irow_A = start_row_offset + (reordered ? reverse_perm(irow) : irow);
1054 const auto A_local_crs_row = lclMtx.rowConst(irow_A);
1055 const auto begin_row = rowptrs(irow);
1056 const auto num_entries = rowptrs(irow + 1) - begin_row;
1057 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, num_entries), [&](
const int j) {
1058 const auto colidx = colindices(begin_row + j);
1059 const auto colidx_A = start_row_offset + (reordered ? reverse_perm(colidx) : colidx);
1061 const int offset = KokkosSparse::findRelOffset(
1062 &A_local_crs_row.colidx(0), A_local_crs_row.length, colidx_A, 0,
false);
1063 values(begin_row + j) = A_local_crs_row.value(offset);
1069 KokkosSparse::Experimental::spiluk_numeric_streams(exec_space_instances_, KernelHandle_rawptr_v_, LevelOfFill_,
1070 A_local_diagblks_rowmap_v_, A_local_diagblks_entries_v_, A_local_diagblks_values_v_,
1071 L_rowmap_v, L_entries_v, L_values_v,
1072 U_rowmap_v, U_entries_v, U_values_v);
1074 for (
int i = 0; i < num_streams_; i++) {
1075 L_v_[i]->fillComplete();
1076 U_v_[i]->fillComplete();
1079 L_solver_->compute();
1080 U_solver_->compute();
1083template <
class MatrixType>
1085 using Teuchos::Array;
1086 using Teuchos::ArrayView;
1089 using Teuchos::rcp_const_cast;
1090 using Teuchos::rcp_dynamic_cast;
1091 const char prefix[] =
"Ifpack2::RILUK::compute: ";
1096 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix <<
"The matrix is null. Please "
1097 "call setMatrix() with a nonnull input before calling this method.");
1098 TEUCHOS_TEST_FOR_EXCEPTION(!A_->isFillComplete(), std::runtime_error, prefix <<
"The matrix is not "
1099 "fill complete. You may not invoke initialize() or compute() with this "
1100 "matrix until the matrix is fill complete. If your matrix is a "
1101 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
1102 "range Maps, if appropriate) before calling this method.");
1104 if (!isInitialized()) {
1108 Teuchos::Time timer(
"RILUK::compute");
1111 Teuchos::TimeMonitor timeMon(timer);
1112 double startTime = timer.wallTime();
1114 isComputed_ =
false;
1116 if (!this->isKokkosKernelsSpiluk_) {
1120 if (!A_local_crs_nc_.is_null()) {
1122 A_local_crs_nc_->resumeFill();
1124 nonconst_local_inds_host_view_type indices(
"indices", A_local_->getLocalMaxNumRowEntries());
1125 nonconst_values_host_view_type values(
"values", A_local_->getLocalMaxNumRowEntries());
1127 size_t numEntries = 0;
1128 A_local_->getLocalRowCopy(i, indices, values, numEntries);
1129 A_local_crs_nc_->replaceLocalValues(i, numEntries,
reinterpret_cast<scalar_type*
>(values.data()), indices.data());
1131 A_local_crs_nc_->fillComplete(A_local_->getDomainMap(), A_local_->getRangeMap());
1134 if (!isKokkosKernelsStream_) {
1138 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1139 if (!hasStreamsWithRCB_) {
1140 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_, hasStreamReordered_);
1142 auto A_coordinates_lcl = A_coordinates_->getLocalViewDevice(Tpetra::Access::ReadOnly);
1143 Kokkos::deep_copy(coors_rcb_, A_coordinates_lcl);
1144 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, coors_rcb_,
1145 A_local_diagblks_v_, perm_rcb_);
1147 for (
int i = 0; i < num_streams_; i++) {
1148 A_local_diagblks_values_v_[i] = A_local_diagblks_v_[i].values;
1151 compute_kkspiluk_stream();
1157 computeTime_ += (timer.wallTime() - startTime);
1161template <
typename MV,
typename Map>
1162void resetMultiVecIfNeeded(std::unique_ptr<MV>& mv_ptr,
const Map& map,
const size_t numVectors,
bool initialize) {
1163 if (!mv_ptr || mv_ptr->getNumVectors() != numVectors) {
1164 mv_ptr.reset(
new MV(map, numVectors, initialize));
1169template <
class MatrixType>
1171 apply(
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1172 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1173 Teuchos::ETransp mode,
1177 using Teuchos::rcpFromRef;
1179 TEUCHOS_TEST_FOR_EXCEPTION(
1180 A_.is_null(), std::runtime_error,
1181 "Ifpack2::RILUK::apply: The matrix is "
1182 "null. Please call setMatrix() with a nonnull input, then initialize() "
1183 "and compute(), before calling this method.");
1184 TEUCHOS_TEST_FOR_EXCEPTION(
1185 !isComputed(), std::runtime_error,
1186 "Ifpack2::RILUK::apply: If you have not yet called compute(), "
1187 "you must call compute() before calling this method.");
1188 TEUCHOS_TEST_FOR_EXCEPTION(
1189 X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
1190 "Ifpack2::RILUK::apply: X and Y do not have the same number of columns. "
1191 "X.getNumVectors() = "
1192 << X.getNumVectors()
1193 <<
" != Y.getNumVectors() = " << Y.getNumVectors() <<
".");
1194 TEUCHOS_TEST_FOR_EXCEPTION(
1195 STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
1196 "Ifpack2::RILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1197 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1198 "fixed. There is a FIXME in this file about this very issue.");
1199#ifdef HAVE_IFPACK2_DEBUG
1201 if (!isKokkosKernelsStream_) {
1203 TEUCHOS_TEST_FOR_EXCEPTION(STM::isnaninf(D_nrm1), std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf.");
1205 Teuchos::Array<magnitude_type> norms(X.getNumVectors());
1208 for (
size_t j = 0; j < X.getNumVectors(); ++j) {
1209 if (STM::isnaninf(norms[j])) {
1214 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the input X is NaN or Inf.");
1221 Teuchos::Time timer(
"RILUK::apply");
1222 double startTime = timer.wallTime();
1224 Teuchos::TimeMonitor timeMon(timer);
1225 if (alpha == one && beta == zero) {
1226 if (isKokkosKernelsSpiluk_ && isKokkosKernelsStream_ && (hasStreamReordered_ || hasStreamsWithRCB_)) {
1227 Impl::resetMultiVecIfNeeded(reordered_x_, X.getMap(), X.getNumVectors(),
false);
1228 Impl::resetMultiVecIfNeeded(reordered_y_, Y.getMap(), Y.getNumVectors(),
false);
1231 for (
size_t j = 0; j < X.getNumVectors(); j++) {
1232 auto X_j = X.getVector(j);
1233 auto ReorderedX_j = reordered_x_->getVectorNonConst(j);
1234 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1235 auto ReorderedX_lcl = ReorderedX_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1236 if (hasStreamReordered_) {
1239 for (
int i = 0; i < num_streams_; i++) {
1240 auto perm_i = perm_v_[i];
1241 stream_end = stream_begin + perm_i.extent(0);
1242 auto X_lcl_sub = Kokkos::subview(X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1243 auto ReorderedX_lcl_sub = Kokkos::subview(ReorderedX_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1244 Kokkos::parallel_for(
1245 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0,
static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1246 ReorderedX_lcl_sub(perm_i(ii)) = X_lcl_sub(ii);
1248 stream_begin = stream_end;
1251 auto perm = perm_rcb_;
1252 Kokkos::parallel_for(
1253 Kokkos::RangePolicy<execution_space>(0,
static_cast<int>(X_lcl.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1254 ReorderedX_lcl(perm(ii), 0) = X_lcl(ii, 0);
1260 if (mode == Teuchos::NO_TRANS) {
1262 L_solver_->apply(*reordered_x_, Y, mode);
1264 U_solver_->apply(Y, *reordered_y_, mode);
1267 U_solver_->apply(*reordered_x_, Y, mode);
1269 L_solver_->apply(Y, *reordered_y_, mode);
1272 for (
size_t j = 0; j < Y.getNumVectors(); j++) {
1273 auto Y_j = Y.getVectorNonConst(j);
1274 auto ReorderedY_j = reordered_y_->getVector(j);
1275 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1276 auto ReorderedY_lcl = ReorderedY_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1277 if (hasStreamReordered_) {
1280 for (
int i = 0; i < num_streams_; i++) {
1281 auto perm_i = perm_v_[i];
1282 stream_end = stream_begin + perm_i.extent(0);
1283 auto Y_lcl_sub = Kokkos::subview(Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1284 auto ReorderedY_lcl_sub = Kokkos::subview(ReorderedY_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1285 Kokkos::parallel_for(
1286 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0,
static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1287 Y_lcl_sub(ii) = ReorderedY_lcl_sub(perm_i(ii));
1289 stream_begin = stream_end;
1292 auto perm = perm_rcb_;
1293 Kokkos::parallel_for(
1294 Kokkos::RangePolicy<execution_space>(0,
static_cast<int>(Y_lcl.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1295 Y_lcl(ii, 0) = ReorderedY_lcl(perm(ii), 0);
1301 if (mode == Teuchos::NO_TRANS) {
1302#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1306 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1309 L_solver_->apply(X, *Y_tmp_, mode);
1311 if (!this->isKokkosKernelsSpiluk_) {
1314 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1317 U_solver_->apply(*Y_tmp_, Y, mode);
1320 L_solver_->apply(X, Y, mode);
1322 if (!this->isKokkosKernelsSpiluk_) {
1325 Y.elementWiseMultiply(one, *D_, Y, zero);
1328 U_solver_->apply(Y, Y, mode);
1331#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1335 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1338 U_solver_->apply(X, *Y_tmp_, mode);
1340 if (!this->isKokkosKernelsSpiluk_) {
1346 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1349 L_solver_->apply(*Y_tmp_, Y, mode);
1352 U_solver_->apply(X, Y, mode);
1354 if (!this->isKokkosKernelsSpiluk_) {
1360 Y.elementWiseMultiply(one, *D_, Y, zero);
1363 L_solver_->apply(Y, Y, mode);
1368 if (alpha == zero) {
1378 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1379 apply(X, *Y_tmp_, mode);
1380 Y.update(alpha, *Y_tmp_, beta);
1385#ifdef HAVE_IFPACK2_DEBUG
1387 Teuchos::Array<magnitude_type> norms(Y.getNumVectors());
1390 for (
size_t j = 0; j < Y.getNumVectors(); ++j) {
1391 if (STM::isnaninf(norms[j])) {
1396 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the output Y is NaN or Inf.");
1401 applyTime_ += (timer.wallTime() - startTime);
1437template <
class MatrixType>
1439 std::ostringstream os;
1444 os <<
"\"Ifpack2::RILUK\": {";
1445 os <<
"Initialized: " << (isInitialized() ?
"true" :
"false") <<
", "
1446 <<
"Computed: " << (isComputed() ?
"true" :
"false") <<
", ";
1448 os <<
"Level-of-fill: " << getLevelOfFill() <<
", ";
1450 if (isKokkosKernelsSpiluk_) os <<
"KK-SPILUK, ";
1451 if (isKokkosKernelsStream_) os <<
"KK-Stream, ";
1454 os <<
"Matrix: null";
1456 os <<
"Global matrix dimensions: ["
1457 << A_->getGlobalNumRows() <<
", " << A_->getGlobalNumCols() <<
"]"
1458 <<
", Global nnz: " << A_->getGlobalNumEntries();
1461 if (!L_solver_.is_null()) os <<
", " << L_solver_->description();
1462 if (!U_solver_.is_null()) os <<
", " << U_solver_->description();
1470#define IFPACK2_RILUK_INSTANT(S, LO, GO, N) \
1471 template class Ifpack2::RILUK<Tpetra::RowMatrix<S, LO, GO, N> >;