559 if (!isKokkosKernelsStream_) {
561 LevelOfFill_, 0, Overalloc_));
563 std::vector<int> weights(num_streams_);
564 std::fill(weights.begin(), weights.end(), 1);
565 exec_space_instances_ = Kokkos::Experimental::partition_space(execution_space(), weights);
567 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
568 if (!hasStreamReordered_) {
569 if (hasStreamsWithRCB_) {
570 TEUCHOS_TEST_FOR_EXCEPTION(A_coordinates_.is_null(), std::runtime_error, prefix <<
"The coordinates associated with rows of the input matrix is null while RILUK uses streams with RCB. Please call setCoord() with a nonnull input before calling this method.");
571 auto A_coordinates_lcl = A_coordinates_->getLocalViewDevice(Tpetra::Access::ReadOnly);
572 perm_rcb_ = perm_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"perm_rcb_"), A_coordinates_lcl.extent(0));
573#if KOKKOS_VERSION >= 50100
574 reverse_perm_rcb_ = perm_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"reverse_perm_rcb_"), A_coordinates_lcl.extent(0));
576 coors_rcb_ = coors_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"coors_rcb_"), A_coordinates_lcl.extent(0), A_coordinates_lcl.extent(1));
577 Kokkos::deep_copy(coors_rcb_, A_coordinates_lcl);
578#if KOKKOS_VERSION >= 50100
579 local_ordinal_type n_levels =
static_cast<local_ordinal_type
>(std::log2(
static_cast<double>(num_streams_)) + 1);
580 partition_sizes_rcb_ = KokkosGraph::Experimental::recursive_coordinate_bisection(coors_rcb_, perm_rcb_, reverse_perm_rcb_, n_levels);
581 KokkosSparse::Experimental::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, perm_rcb_, reverse_perm_rcb_, partition_sizes_rcb_,
582 A_local_diagblks_v_);
584 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, coors_rcb_,
585 A_local_diagblks_v_, perm_rcb_);
588#if KOKKOS_VERSION >= 50100
589 KokkosSparse::Experimental::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_);
591 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_);
595#if KOKKOS_VERSION >= 50100
596 perm_v_ = KokkosSparse::Experimental::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_,
true);
598 perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_,
true);
600 reverse_perm_v_.resize(perm_v_.size());
601 for (
size_t istream = 0; istream < perm_v_.size(); ++istream) {
602 using perm_type =
typename lno_nonzero_view_t::non_const_type;
603 const auto perm = perm_v_[istream];
604 const auto perm_length = perm.extent(0);
605 perm_type reverse_perm(
606 Kokkos::view_alloc(Kokkos::WithoutInitializing,
"reverse_perm"),
608 Kokkos::parallel_for(
609 Kokkos::RangePolicy<execution_space>(exec_space_instances_[istream], 0, perm_length),
610 KOKKOS_LAMBDA(
const local_ordinal_type ii) {
611 reverse_perm(perm(ii)) = ii;
613 reverse_perm_v_[istream] = reverse_perm;
617 A_local_diagblks_rowmap_v_ = std::vector<lno_row_view_t>(num_streams_);
618 A_local_diagblks_entries_v_ = std::vector<lno_nonzero_view_t>(num_streams_);
619 A_local_diagblks_values_v_ = std::vector<scalar_nonzero_view_t>(num_streams_);
621 for (
int i = 0; i < num_streams_; i++) {
622 A_local_diagblks_rowmap_v_[i] = A_local_diagblks_v_[i].graph.row_map;
623 A_local_diagblks_entries_v_[i] = A_local_diagblks_v_[i].graph.entries;
624 A_local_diagblks_values_v_[i] = A_local_diagblks_v_[i].values;
626 Teuchos::RCP<const crs_map_type> A_local_diagblks_RowMap = rcp(
new crs_map_type(A_local_diagblks_v_[i].numRows(),
627 A_local_diagblks_v_[i].numRows(),
628 A_local_crs_->getRowMap()->getComm()));
629 Teuchos::RCP<const crs_map_type> A_local_diagblks_ColMap = rcp(
new crs_map_type(A_local_diagblks_v_[i].numCols(),
630 A_local_diagblks_v_[i].numCols(),
631 A_local_crs_->getColMap()->getComm()));
632 Teuchos::RCP<crs_matrix_type> A_local_diagblks = rcp(
new crs_matrix_type(A_local_diagblks_RowMap,
633 A_local_diagblks_ColMap,
634 A_local_diagblks_v_[i]));
636 LevelOfFill_, 0, Overalloc_));
641 if (this->isKokkosKernelsSpiluk_) {
642 if (!isKokkosKernelsStream_) {
643 this->KernelHandle_ = Teuchos::rcp(
new kk_handle_type());
644 KernelHandle_->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
645 A_local_->getLocalNumRows(),
646 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1),
647 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1));
648 Graph_->initialize(KernelHandle_);
650 KernelHandle_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(num_streams_);
651 for (
int i = 0; i < num_streams_; i++) {
652 KernelHandle_v_[i] = Teuchos::rcp(
new kk_handle_type());
653 KernelHandle_v_[i]->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
654 A_local_diagblks_v_[i].numRows(),
655 2 * A_local_diagblks_v_[i].nnz() * (LevelOfFill_ + 1),
656 2 * A_local_diagblks_v_[i].nnz() * (LevelOfFill_ + 1));
657 Graph_v_[i]->initialize(KernelHandle_v_[i]);
661 Graph_->initialize();
665 checkOrderingConsistency(*A_local_);
666 if (!isKokkosKernelsStream_) {
667 L_solver_->setMatrix(L_);
669 L_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
670 L_solver_->setMatrices(L_v_);
672 L_solver_->initialize();
674 if (!isKokkosKernelsStream_) {
675 U_solver_->setMatrix(U_);
677 U_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
678 U_solver_->setMatrices(U_v_);
680 U_solver_->initialize();
687 isInitialized_ =
true;
689 initializeTime_ += (timer.wallTime() - startTime);
692template <
class MatrixType>
693void RILUK<MatrixType>::
694 checkOrderingConsistency(
const row_matrix_type& A) {
698 Teuchos::ArrayView<const global_ordinal_type> rowGIDs = A.getRowMap()->getLocalElementList();
699 Teuchos::ArrayView<const global_ordinal_type> colGIDs = A.getColMap()->getLocalElementList();
700 bool gidsAreConsistentlyOrdered =
true;
701 global_ordinal_type indexOfInconsistentGID = 0;
702 for (global_ordinal_type i = 0; i < rowGIDs.size(); ++i) {
703 if (rowGIDs[i] != colGIDs[i]) {
704 gidsAreConsistentlyOrdered =
false;
705 indexOfInconsistentGID = i;
709 TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered ==
false, std::runtime_error,
710 "The ordering of the local GIDs in the row and column maps is not the same"
712 <<
"at index " << indexOfInconsistentGID
713 <<
". Consistency is required, as all calculations are done with"
715 <<
"local indexing.");
718template <
class MatrixType>
719void RILUK<MatrixType>::
720 initAllValues(
const row_matrix_type& A) {
721 using Teuchos::ArrayRCP;
725 using Teuchos::REDUCE_SUM;
726 using Teuchos::reduceAll;
727 typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
729 size_t NumIn = 0, NumL = 0, NumU = 0;
730 bool DiagFound =
false;
731 size_t NumNonzeroDiags = 0;
732 size_t MaxNumEntries = A.getGlobalMaxNumRowEntries();
736 nonconst_local_inds_host_view_type InI(
"InI", MaxNumEntries);
737 Teuchos::Array<local_ordinal_type> LI(MaxNumEntries);
738 Teuchos::Array<local_ordinal_type> UI(MaxNumEntries);
739 nonconst_values_host_view_type InV(
"InV", MaxNumEntries);
740 Teuchos::Array<scalar_type> LV(MaxNumEntries);
741 Teuchos::Array<scalar_type> UV(MaxNumEntries);
744 const bool ReplaceValues = L_->isStaticGraph() || L_->isLocallyIndexed();
749 L_->setAllToScalar(STS::zero());
750 U_->setAllToScalar(STS::zero());
753 D_->putScalar(STS::zero());
754 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
756 RCP<const map_type> rowMap = L_->getRowMap();
766 Teuchos::ArrayView<const global_ordinal_type> nodeGIDs = rowMap->getLocalElementList();
767 for (
size_t myRow = 0; myRow < A.getLocalNumRows(); ++myRow) {
768 local_ordinal_type local_row = myRow;
772 A.getLocalRowCopy(local_row, InI, InV, NumIn);
780 for (
size_t j = 0; j < NumIn; ++j) {
781 const local_ordinal_type k = InI[j];
783 if (k == local_row) {
786 DV(local_row) += Rthresh_ * InV[j] + IFPACK2_SGN(InV[j]) * Athresh_;
788 TEUCHOS_TEST_FOR_EXCEPTION(
789 true, std::runtime_error,
790 "Ifpack2::RILUK::initAllValues: current "
792 << k <<
" < 0. I'm not sure why this is an error; it is "
793 "probably an artifact of the undocumented assumptions of the "
794 "original implementation (likely copied and pasted from Ifpack). "
795 "Nevertheless, the code I found here insisted on this being an error "
796 "state, so I will throw an exception here.");
797 }
else if (k < local_row) {
801 }
else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
813 DV(local_row) = Athresh_;
818 L_->replaceLocalValues(local_row, LI(0, NumL), LV(0, NumL));
822 L_->insertLocalValues(local_row, LI(0, NumL), LV(0, NumL));
828 U_->replaceLocalValues(local_row, UI(0, NumU), UV(0, NumU));
832 U_->insertLocalValues(local_row, UI(0, NumU), UV(0, NumU));
840 isInitialized_ =
true;
843template <
class MatrixType>
844void RILUK<MatrixType>::compute_serial() {
847 initAllValues(*A_local_);
852 const scalar_type MinDiagonalValue = STS::rmin();
853 const scalar_type MaxDiagonalValue = STS::one() / MinDiagonalValue;
855 size_t NumIn, NumL, NumU;
858 const size_t MaxNumEntries =
859 L_->getLocalMaxNumRowEntries() + U_->getLocalMaxNumRowEntries() + 1;
861 Teuchos::Array<local_ordinal_type> InI(MaxNumEntries);
862 Teuchos::Array<scalar_type> InV(MaxNumEntries);
863 size_t num_cols = U_->getColMap()->getLocalNumElements();
864 Teuchos::Array<int> colflag(num_cols, -1);
866 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
870 using IST =
typename row_matrix_type::impl_scalar_type;
871 for (
size_t i = 0; i < L_->getLocalNumRows(); ++i) {
872 local_ordinal_type local_row = i;
875 local_inds_host_view_type UUI;
876 values_host_view_type UUV;
880 NumIn = MaxNumEntries;
881 nonconst_local_inds_host_view_type InI_v(InI.data(), MaxNumEntries);
882 nonconst_values_host_view_type InV_v(
reinterpret_cast<IST*
>(InV.data()), MaxNumEntries);
884 L_->getLocalRowCopy(local_row, InI_v, InV_v, NumL);
887 InI[NumL] = local_row;
889 nonconst_local_inds_host_view_type InI_sub(InI.data() + NumL + 1, MaxNumEntries - NumL - 1);
890 nonconst_values_host_view_type InV_sub(
reinterpret_cast<IST*
>(InV.data()) + NumL + 1, MaxNumEntries - NumL - 1);
892 U_->getLocalRowCopy(local_row, InI_sub, InV_sub, NumU);
893 NumIn = NumL + NumU + 1;
896 for (
size_t j = 0; j < NumIn; ++j) {
900 scalar_type diagmod = STS::zero();
902 for (
size_t jj = 0; jj < NumL; ++jj) {
903 local_ordinal_type j = InI[jj];
904 IST multiplier = InV[jj];
906 InV[jj] *=
static_cast<scalar_type
>(DV(j));
908 U_->getLocalRowView(j, UUI, UUV);
911 if (RelaxValue_ == STM::zero()) {
912 for (
size_t k = 0; k < NumUU; ++k) {
913 const int kk = colflag[UUI[k]];
918 InV[kk] -=
static_cast<scalar_type
>(multiplier * UUV[k]);
923 for (
size_t k = 0; k < NumUU; ++k) {
927 const int kk = colflag[UUI[k]];
929 InV[kk] -=
static_cast<scalar_type
>(multiplier * UUV[k]);
931 diagmod -=
static_cast<scalar_type
>(multiplier * UUV[k]);
939 L_->replaceLocalValues(local_row, InI(0, NumL), InV(0, NumL));
944 if (RelaxValue_ != STM::zero()) {
945 DV(i) += RelaxValue_ * diagmod;
948 if (STS::magnitude(DV(i)) > STS::magnitude(MaxDiagonalValue)) {
949 if (STS::real(DV(i)) < STM::zero()) {
950 DV(i) = -MinDiagonalValue;
952 DV(i) = MinDiagonalValue;
955 DV(i) =
static_cast<impl_scalar_type
>(STS::one()) / DV(i);
958 for (
size_t j = 0; j < NumU; ++j) {
959 InV[NumL + 1 + j] *=
static_cast<scalar_type
>(DV(i));
964 U_->replaceLocalValues(local_row, InI(NumL + 1, NumU), InV(NumL + 1, NumU));
968 for (
size_t j = 0; j < NumIn; ++j) {
969 colflag[InI[j]] = -1;
980 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
981 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
984 L_solver_->setMatrix(L_);
985 L_solver_->compute();
986 U_solver_->setMatrix(U_);
987 U_solver_->compute();
990template <
class MatrixType>
991void RILUK<MatrixType>::compute_kkspiluk() {
995 L_->setAllToScalar(STS::zero());
996 U_->setAllToScalar(STS::zero());
998 using row_map_type =
typename crs_matrix_type::local_matrix_device_type::row_map_type;
999 auto lclL = L_->getLocalMatrixDevice();
1000 row_map_type L_rowmap = lclL.graph.row_map;
1001 auto L_entries = lclL.graph.entries;
1002 auto L_values = lclL.values;
1004 auto lclU = U_->getLocalMatrixDevice();
1005 row_map_type U_rowmap = lclU.graph.row_map;
1006 auto U_entries = lclU.graph.entries;
1007 auto U_values = lclU.values;
1009 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1010 KokkosSparse::spiluk_numeric(KernelHandle_.getRawPtr(), LevelOfFill_,
1011 lclMtx.graph.row_map, lclMtx.graph.entries, lclMtx.values,
1012 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values);
1014 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
1015 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
1017 L_solver_->compute();
1018 U_solver_->compute();
1021template <
class MatrixType>
1022void RILUK<MatrixType>::compute_kkspiluk_stream() {
1023 std::vector<lno_row_view_t> L_rowmap_v(num_streams_);
1024 std::vector<lno_nonzero_view_t> L_entries_v(num_streams_);
1025 std::vector<scalar_nonzero_view_t> L_values_v(num_streams_);
1026 std::vector<lno_row_view_t> U_rowmap_v(num_streams_);
1027 std::vector<lno_nonzero_view_t> U_entries_v(num_streams_);
1028 std::vector<scalar_nonzero_view_t> U_values_v(num_streams_);
1029 std::vector<kk_handle_type*> KernelHandle_rawptr_v_(num_streams_);
1030 for (
int i = 0; i < num_streams_; i++) {
1031 L_v_[i]->resumeFill();
1032 U_v_[i]->resumeFill();
1034 L_v_[i]->setAllToScalar(STS::zero());
1035 U_v_[i]->setAllToScalar(STS::zero());
1037 auto lclL = L_v_[i]->getLocalMatrixDevice();
1038 L_rowmap_v[i] = lclL.graph.row_map;
1039 L_entries_v[i] = lclL.graph.entries;
1040 L_values_v[i] = lclL.values;
1042 auto lclU = U_v_[i]->getLocalMatrixDevice();
1043 U_rowmap_v[i] = lclU.graph.row_map;
1044 U_entries_v[i] = lclU.graph.entries;
1045 U_values_v[i] = lclU.values;
1046 KernelHandle_rawptr_v_[i] = KernelHandle_v_[i].getRawPtr();
1049 if (hasStreamReordered_) {
1053 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1056 using TeamPolicy = Kokkos::TeamPolicy<execution_space>;
1057 const auto A_nrows = lclMtx.numRows();
1058 auto rows_per_block = ((A_nrows % num_streams_) == 0)
1059 ? (A_nrows / num_streams_)
1060 : (A_nrows / num_streams_ + 1);
1061 for (
int i = 0; i < num_streams_; i++) {
1062 const auto start_row_offset = i * rows_per_block;
1063 auto rowptrs = A_local_diagblks_rowmap_v_[i];
1064 auto colindices = A_local_diagblks_entries_v_[i];
1065 auto values = A_local_diagblks_values_v_[i];
1066 const bool reordered = hasStreamReordered_;
1067 typename lno_nonzero_view_t::non_const_type reverse_perm = hasStreamReordered_ ? reverse_perm_v_[i] :
typename lno_nonzero_view_t::non_const_type{};
1068 TeamPolicy pol(exec_space_instances_[i], A_local_diagblks_rowmap_v_[i].extent(0) - 1, Kokkos::AUTO);
1069 Kokkos::parallel_for(
1070 pol, KOKKOS_LAMBDA(
const typename TeamPolicy::member_type& team) {
1071 const auto irow = team.league_rank();
1072 const auto irow_A = start_row_offset + (reordered ? reverse_perm(irow) : irow);
1073 const auto A_local_crs_row = lclMtx.rowConst(irow_A);
1074 const auto begin_row = rowptrs(irow);
1075 const auto num_entries = rowptrs(irow + 1) - begin_row;
1076 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, num_entries), [&](
const int j) {
1077 const auto colidx = colindices(begin_row + j);
1078 const auto colidx_A = start_row_offset + (reordered ? reverse_perm(colidx) : colidx);
1080 const int offset = KokkosSparse::findRelOffset(
1081 &A_local_crs_row.colidx(0), A_local_crs_row.length, colidx_A, 0,
false);
1082 values(begin_row + j) = A_local_crs_row.value(offset);
1088 KokkosSparse::Experimental::spiluk_numeric_streams(exec_space_instances_, KernelHandle_rawptr_v_, LevelOfFill_,
1089 A_local_diagblks_rowmap_v_, A_local_diagblks_entries_v_, A_local_diagblks_values_v_,
1090 L_rowmap_v, L_entries_v, L_values_v,
1091 U_rowmap_v, U_entries_v, U_values_v);
1093 for (
int i = 0; i < num_streams_; i++) {
1094 L_v_[i]->fillComplete();
1095 U_v_[i]->fillComplete();
1098 L_solver_->compute();
1099 U_solver_->compute();
1102template <
class MatrixType>
1104 using Teuchos::Array;
1105 using Teuchos::ArrayView;
1108 using Teuchos::rcp_const_cast;
1109 using Teuchos::rcp_dynamic_cast;
1110 const char prefix[] =
"Ifpack2::RILUK::compute: ";
1115 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix <<
"The matrix is null. Please "
1116 "call setMatrix() with a nonnull input before calling this method.");
1117 TEUCHOS_TEST_FOR_EXCEPTION(!A_->isFillComplete(), std::runtime_error, prefix <<
"The matrix is not "
1118 "fill complete. You may not invoke initialize() or compute() with this "
1119 "matrix until the matrix is fill complete. If your matrix is a "
1120 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
1121 "range Maps, if appropriate) before calling this method.");
1123 if (!isInitialized()) {
1127 Teuchos::Time timer(
"RILUK::compute");
1130 Teuchos::TimeMonitor timeMon(timer);
1131 double startTime = timer.wallTime();
1133 isComputed_ =
false;
1135 if (!this->isKokkosKernelsSpiluk_) {
1139 if (!A_local_crs_nc_.is_null()) {
1141 A_local_crs_nc_->resumeFill();
1143 nonconst_local_inds_host_view_type indices(
"indices", A_local_->getLocalMaxNumRowEntries());
1144 nonconst_values_host_view_type values(
"values", A_local_->getLocalMaxNumRowEntries());
1146 size_t numEntries = 0;
1147 A_local_->getLocalRowCopy(i, indices, values, numEntries);
1148 A_local_crs_nc_->replaceLocalValues(i, numEntries,
reinterpret_cast<scalar_type*
>(values.data()), indices.data());
1150 A_local_crs_nc_->fillComplete(A_local_->getDomainMap(), A_local_->getRangeMap());
1153 if (!isKokkosKernelsStream_) {
1157 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1158 if (!hasStreamsWithRCB_) {
1159#if KOKKOS_VERSION >= 50100
1160 KokkosSparse::Experimental::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_, hasStreamReordered_);
1162 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_, hasStreamReordered_);
1165#if KOKKOS_VERSION >= 50100
1166 KokkosSparse::Experimental::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, perm_rcb_, reverse_perm_rcb_, partition_sizes_rcb_,
1167 A_local_diagblks_v_);
1169 auto A_coordinates_lcl = A_coordinates_->getLocalViewDevice(Tpetra::Access::ReadOnly);
1170 Kokkos::deep_copy(coors_rcb_, A_coordinates_lcl);
1171 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, coors_rcb_,
1172 A_local_diagblks_v_, perm_rcb_);
1175 for (
int i = 0; i < num_streams_; i++) {
1176 A_local_diagblks_values_v_[i] = A_local_diagblks_v_[i].values;
1179 compute_kkspiluk_stream();
1185 computeTime_ += (timer.wallTime() - startTime);
1189template <
typename MV,
typename Map>
1190void resetMultiVecIfNeeded(std::unique_ptr<MV>& mv_ptr,
const Map& map,
const size_t numVectors,
bool initialize) {
1191 if (!mv_ptr || mv_ptr->getNumVectors() != numVectors) {
1192 mv_ptr.reset(
new MV(map, numVectors, initialize));
1197template <
class MatrixType>
1199 apply(
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1200 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1201 Teuchos::ETransp mode,
1205 using Teuchos::rcpFromRef;
1207 TEUCHOS_TEST_FOR_EXCEPTION(
1208 A_.is_null(), std::runtime_error,
1209 "Ifpack2::RILUK::apply: The matrix is "
1210 "null. Please call setMatrix() with a nonnull input, then initialize() "
1211 "and compute(), before calling this method.");
1212 TEUCHOS_TEST_FOR_EXCEPTION(
1213 !isComputed(), std::runtime_error,
1214 "Ifpack2::RILUK::apply: If you have not yet called compute(), "
1215 "you must call compute() before calling this method.");
1216 TEUCHOS_TEST_FOR_EXCEPTION(
1217 X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
1218 "Ifpack2::RILUK::apply: X and Y do not have the same number of columns. "
1219 "X.getNumVectors() = "
1220 << X.getNumVectors()
1221 <<
" != Y.getNumVectors() = " << Y.getNumVectors() <<
".");
1222 TEUCHOS_TEST_FOR_EXCEPTION(
1223 STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
1224 "Ifpack2::RILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1225 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1226 "fixed. There is a FIXME in this file about this very issue.");
1227#ifdef HAVE_IFPACK2_DEBUG
1229 if (!isKokkosKernelsStream_) {
1231 TEUCHOS_TEST_FOR_EXCEPTION(STM::isnaninf(D_nrm1), std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf.");
1233 Teuchos::Array<magnitude_type> norms(X.getNumVectors());
1236 for (
size_t j = 0; j < X.getNumVectors(); ++j) {
1237 if (STM::isnaninf(norms[j])) {
1242 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the input X is NaN or Inf.");
1249 Teuchos::Time timer(
"RILUK::apply");
1250 double startTime = timer.wallTime();
1252 Teuchos::TimeMonitor timeMon(timer);
1253 if (alpha == one && beta == zero) {
1254 if (isKokkosKernelsSpiluk_ && isKokkosKernelsStream_ && (hasStreamReordered_ || hasStreamsWithRCB_)) {
1255 Impl::resetMultiVecIfNeeded(reordered_x_, X.getMap(), X.getNumVectors(),
false);
1256 Impl::resetMultiVecIfNeeded(reordered_y_, Y.getMap(), Y.getNumVectors(),
false);
1259 for (
size_t j = 0; j < X.getNumVectors(); j++) {
1260 auto X_j = X.getVector(j);
1261 auto ReorderedX_j = reordered_x_->getVectorNonConst(j);
1262 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1263 auto ReorderedX_lcl = ReorderedX_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1264 if (hasStreamReordered_) {
1267 for (
int i = 0; i < num_streams_; i++) {
1268 auto perm_i = perm_v_[i];
1269 stream_end = stream_begin + perm_i.extent(0);
1270 auto X_lcl_sub = Kokkos::subview(X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1271 auto ReorderedX_lcl_sub = Kokkos::subview(ReorderedX_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1272 Kokkos::parallel_for(
1273 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0,
static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1274 ReorderedX_lcl_sub(perm_i(ii)) = X_lcl_sub(ii);
1276 stream_begin = stream_end;
1279 auto perm = perm_rcb_;
1280 Kokkos::parallel_for(
1281 Kokkos::RangePolicy<execution_space>(0,
static_cast<int>(X_lcl.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1282 ReorderedX_lcl(perm(ii), 0) = X_lcl(ii, 0);
1288 if (mode == Teuchos::NO_TRANS) {
1290 L_solver_->apply(*reordered_x_, Y, mode);
1292 U_solver_->apply(Y, *reordered_y_, mode);
1295 U_solver_->apply(*reordered_x_, Y, mode);
1297 L_solver_->apply(Y, *reordered_y_, mode);
1300 for (
size_t j = 0; j < Y.getNumVectors(); j++) {
1301 auto Y_j = Y.getVectorNonConst(j);
1302 auto ReorderedY_j = reordered_y_->getVector(j);
1303 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1304 auto ReorderedY_lcl = ReorderedY_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1305 if (hasStreamReordered_) {
1308 for (
int i = 0; i < num_streams_; i++) {
1309 auto perm_i = perm_v_[i];
1310 stream_end = stream_begin + perm_i.extent(0);
1311 auto Y_lcl_sub = Kokkos::subview(Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1312 auto ReorderedY_lcl_sub = Kokkos::subview(ReorderedY_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1313 Kokkos::parallel_for(
1314 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0,
static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1315 Y_lcl_sub(ii) = ReorderedY_lcl_sub(perm_i(ii));
1317 stream_begin = stream_end;
1320 auto perm = perm_rcb_;
1321 Kokkos::parallel_for(
1322 Kokkos::RangePolicy<execution_space>(0,
static_cast<int>(Y_lcl.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1323 Y_lcl(ii, 0) = ReorderedY_lcl(perm(ii), 0);
1329 if (mode == Teuchos::NO_TRANS) {
1330#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1334 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1337 L_solver_->apply(X, *Y_tmp_, mode);
1339 if (!this->isKokkosKernelsSpiluk_) {
1342 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1345 U_solver_->apply(*Y_tmp_, Y, mode);
1348 L_solver_->apply(X, Y, mode);
1350 if (!this->isKokkosKernelsSpiluk_) {
1353 Y.elementWiseMultiply(one, *D_, Y, zero);
1356 U_solver_->apply(Y, Y, mode);
1359#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1363 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1366 U_solver_->apply(X, *Y_tmp_, mode);
1368 if (!this->isKokkosKernelsSpiluk_) {
1374 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1377 L_solver_->apply(*Y_tmp_, Y, mode);
1380 U_solver_->apply(X, Y, mode);
1382 if (!this->isKokkosKernelsSpiluk_) {
1388 Y.elementWiseMultiply(one, *D_, Y, zero);
1391 L_solver_->apply(Y, Y, mode);
1396 if (alpha == zero) {
1406 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1407 apply(X, *Y_tmp_, mode);
1408 Y.update(alpha, *Y_tmp_, beta);
1413#ifdef HAVE_IFPACK2_DEBUG
1415 Teuchos::Array<magnitude_type> norms(Y.getNumVectors());
1418 for (
size_t j = 0; j < Y.getNumVectors(); ++j) {
1419 if (STM::isnaninf(norms[j])) {
1424 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the output Y is NaN or Inf.");
1429 applyTime_ += (timer.wallTime() - startTime);
1465template <
class MatrixType>
1467 std::ostringstream os;
1472 os <<
"\"Ifpack2::RILUK\": {";
1473 os <<
"Initialized: " << (isInitialized() ?
"true" :
"false") <<
", "
1474 <<
"Computed: " << (isComputed() ?
"true" :
"false") <<
", ";
1476 os <<
"Level-of-fill: " << getLevelOfFill() <<
", ";
1478 if (isKokkosKernelsSpiluk_) os <<
"KK-SPILUK, ";
1479 if (isKokkosKernelsStream_) os <<
"KK-Stream, ";
1482 os <<
"Matrix: null";
1484 os <<
"Global matrix dimensions: ["
1485 << A_->getGlobalNumRows() <<
", " << A_->getGlobalNumCols() <<
"]"
1486 <<
", Global nnz: " << A_->getGlobalNumEntries();
1489 if (!L_solver_.is_null()) os <<
", " << L_solver_->description();
1490 if (!U_solver_.is_null()) os <<
", " << U_solver_->description();
1498#define IFPACK2_RILUK_INSTANT(S, LO, GO, N) \
1499 template class Ifpack2::RILUK<Tpetra::RowMatrix<S, LO, GO, N> >;