558 A_local_crs_ = rcp_const_cast<const crs_matrix_type>(A_local_crs_nc_);
560 if (!isKokkosKernelsStream_) {
562 LevelOfFill_, 0, Overalloc_));
564 std::vector<int> weights(num_streams_);
565 std::fill(weights.begin(), weights.end(), 1);
566 exec_space_instances_ = Kokkos::Experimental::partition_space(execution_space(), weights);
568 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
569 if (!hasStreamReordered_) {
570 if (hasStreamsWithRCB_) {
571 TEUCHOS_TEST_FOR_EXCEPTION(A_coordinates_.is_null(), std::runtime_error, prefix <<
"The coordinates associated with rows of the input matrix is null while RILUK uses streams with RCB. Please call setCoord() with a nonnull input before calling this method.");
572 auto A_coordinates_lcl = A_coordinates_->getLocalViewDevice(Tpetra::Access::ReadOnly);
573 perm_rcb_ = perm_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"perm_rcb_"), A_coordinates_lcl.extent(0));
574#if KOKKOS_VERSION >= 50100
575 reverse_perm_rcb_ = perm_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"reverse_perm_rcb_"), A_coordinates_lcl.extent(0));
577 coors_rcb_ = coors_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"coors_rcb_"), A_coordinates_lcl.extent(0), A_coordinates_lcl.extent(1));
578 Kokkos::deep_copy(coors_rcb_, A_coordinates_lcl);
579#if KOKKOS_VERSION >= 50100
580 local_ordinal_type n_levels =
static_cast<local_ordinal_type
>(std::log2(
static_cast<double>(num_streams_)) + 1);
581 partition_sizes_rcb_ = KokkosGraph::Experimental::recursive_coordinate_bisection(coors_rcb_, perm_rcb_, reverse_perm_rcb_, n_levels);
582 KokkosSparse::Experimental::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, perm_rcb_, reverse_perm_rcb_, partition_sizes_rcb_,
583 A_local_diagblks_v_);
585 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, coors_rcb_,
586 A_local_diagblks_v_, perm_rcb_);
589#if KOKKOS_VERSION >= 50100
590 KokkosSparse::Experimental::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_);
592 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_);
596#if KOKKOS_VERSION >= 50100
597 perm_v_ = KokkosSparse::Experimental::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_,
true);
599 perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_,
true);
601 reverse_perm_v_.resize(perm_v_.size());
602 for (
size_t istream = 0; istream < perm_v_.size(); ++istream) {
603 using perm_type =
typename lno_nonzero_view_t::non_const_type;
604 const auto perm = perm_v_[istream];
605 const auto perm_length = perm.extent(0);
606 perm_type reverse_perm(
607 Kokkos::view_alloc(Kokkos::WithoutInitializing,
"reverse_perm"),
609 Kokkos::parallel_for(
610 Kokkos::RangePolicy<execution_space>(exec_space_instances_[istream], 0, perm_length),
611 KOKKOS_LAMBDA(
const local_ordinal_type ii) {
612 reverse_perm(perm(ii)) = ii;
614 reverse_perm_v_[istream] = reverse_perm;
618 A_local_diagblks_rowmap_v_ = std::vector<lno_row_view_t>(num_streams_);
619 A_local_diagblks_entries_v_ = std::vector<lno_nonzero_view_t>(num_streams_);
620 A_local_diagblks_values_v_ = std::vector<scalar_nonzero_view_t>(num_streams_);
622 for (
int i = 0; i < num_streams_; i++) {
623 A_local_diagblks_rowmap_v_[i] = A_local_diagblks_v_[i].graph.row_map;
624 A_local_diagblks_entries_v_[i] = A_local_diagblks_v_[i].graph.entries;
625 A_local_diagblks_values_v_[i] = A_local_diagblks_v_[i].values;
627 Teuchos::RCP<const crs_map_type> A_local_diagblks_RowMap = rcp(
new crs_map_type(A_local_diagblks_v_[i].numRows(),
628 A_local_diagblks_v_[i].numRows(),
629 A_local_crs_->getRowMap()->getComm()));
630 Teuchos::RCP<const crs_map_type> A_local_diagblks_ColMap = rcp(
new crs_map_type(A_local_diagblks_v_[i].numCols(),
631 A_local_diagblks_v_[i].numCols(),
632 A_local_crs_->getColMap()->getComm()));
633 Teuchos::RCP<crs_matrix_type> A_local_diagblks = rcp(
new crs_matrix_type(A_local_diagblks_RowMap,
634 A_local_diagblks_ColMap,
635 A_local_diagblks_v_[i]));
637 LevelOfFill_, 0, Overalloc_));
642 if (this->isKokkosKernelsSpiluk_) {
643 if (!isKokkosKernelsStream_) {
644 this->KernelHandle_ = Teuchos::rcp(
new kk_handle_type());
645 KernelHandle_->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
646 A_local_->getLocalNumRows(),
647 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1),
648 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1));
649 Graph_->initialize(KernelHandle_);
651 KernelHandle_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(num_streams_);
652 for (
int i = 0; i < num_streams_; i++) {
653 KernelHandle_v_[i] = Teuchos::rcp(
new kk_handle_type());
654 KernelHandle_v_[i]->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
655 A_local_diagblks_v_[i].numRows(),
656 2 * A_local_diagblks_v_[i].nnz() * (LevelOfFill_ + 1),
657 2 * A_local_diagblks_v_[i].nnz() * (LevelOfFill_ + 1));
658 Graph_v_[i]->initialize(KernelHandle_v_[i]);
662 Graph_->initialize();
666 checkOrderingConsistency(*A_local_);
667 if (!isKokkosKernelsStream_) {
668 L_solver_->setMatrix(L_);
670 L_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
671 L_solver_->setMatrices(L_v_);
673 L_solver_->initialize();
675 if (!isKokkosKernelsStream_) {
676 U_solver_->setMatrix(U_);
678 U_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
679 U_solver_->setMatrices(U_v_);
681 U_solver_->initialize();
688 isInitialized_ =
true;
690 initializeTime_ += (timer.wallTime() - startTime);
693template <
class MatrixType>
694void RILUK<MatrixType>::
695 checkOrderingConsistency(
const row_matrix_type& A) {
699 Teuchos::ArrayView<const global_ordinal_type> rowGIDs = A.getRowMap()->getLocalElementList();
700 Teuchos::ArrayView<const global_ordinal_type> colGIDs = A.getColMap()->getLocalElementList();
701 bool gidsAreConsistentlyOrdered =
true;
702 global_ordinal_type indexOfInconsistentGID = 0;
703 for (global_ordinal_type i = 0; i < rowGIDs.size(); ++i) {
704 if (rowGIDs[i] != colGIDs[i]) {
705 gidsAreConsistentlyOrdered =
false;
706 indexOfInconsistentGID = i;
710 TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered ==
false, std::runtime_error,
711 "The ordering of the local GIDs in the row and column maps is not the same"
713 <<
"at index " << indexOfInconsistentGID
714 <<
". Consistency is required, as all calculations are done with"
716 <<
"local indexing.");
719template <
class MatrixType>
720void RILUK<MatrixType>::
721 initAllValues(
const row_matrix_type& A) {
722 using Teuchos::ArrayRCP;
726 using Teuchos::REDUCE_SUM;
727 using Teuchos::reduceAll;
728 typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
730 size_t NumIn = 0, NumL = 0, NumU = 0;
731 bool DiagFound =
false;
732 size_t NumNonzeroDiags = 0;
733 size_t MaxNumEntries = A.getGlobalMaxNumRowEntries();
737 nonconst_local_inds_host_view_type InI(
"InI", MaxNumEntries);
738 Teuchos::Array<local_ordinal_type> LI(MaxNumEntries);
739 Teuchos::Array<local_ordinal_type> UI(MaxNumEntries);
740 nonconst_values_host_view_type InV(
"InV", MaxNumEntries);
741 Teuchos::Array<scalar_type> LV(MaxNumEntries);
742 Teuchos::Array<scalar_type> UV(MaxNumEntries);
745 const bool ReplaceValues = L_->isStaticGraph() || L_->isLocallyIndexed();
750 L_->setAllToScalar(STS::zero());
751 U_->setAllToScalar(STS::zero());
754 D_->putScalar(STS::zero());
755 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
757 RCP<const map_type> rowMap = L_->getRowMap();
767 Teuchos::ArrayView<const global_ordinal_type> nodeGIDs = rowMap->getLocalElementList();
768 for (
size_t myRow = 0; myRow < A.getLocalNumRows(); ++myRow) {
769 local_ordinal_type local_row = myRow;
773 A.getLocalRowCopy(local_row, InI, InV, NumIn);
781 for (
size_t j = 0; j < NumIn; ++j) {
782 const local_ordinal_type k = InI[j];
784 if (k == local_row) {
787 DV(local_row) += Rthresh_ * InV[j] + IFPACK2_SGN(InV[j]) * Athresh_;
789 TEUCHOS_TEST_FOR_EXCEPTION(
790 true, std::runtime_error,
791 "Ifpack2::RILUK::initAllValues: current "
793 << k <<
" < 0. I'm not sure why this is an error; it is "
794 "probably an artifact of the undocumented assumptions of the "
795 "original implementation (likely copied and pasted from Ifpack). "
796 "Nevertheless, the code I found here insisted on this being an error "
797 "state, so I will throw an exception here.");
798 }
else if (k < local_row) {
802 }
else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
814 DV(local_row) = Athresh_;
819 L_->replaceLocalValues(local_row, LI(0, NumL), LV(0, NumL));
823 L_->insertLocalValues(local_row, LI(0, NumL), LV(0, NumL));
829 U_->replaceLocalValues(local_row, UI(0, NumU), UV(0, NumU));
833 U_->insertLocalValues(local_row, UI(0, NumU), UV(0, NumU));
841 isInitialized_ =
true;
844template <
class MatrixType>
845void RILUK<MatrixType>::compute_serial() {
848 initAllValues(*A_local_);
853 const scalar_type MinDiagonalValue = STS::rmin();
854 const scalar_type MaxDiagonalValue = STS::one() / MinDiagonalValue;
856 size_t NumIn, NumL, NumU;
859 const size_t MaxNumEntries =
860 L_->getLocalMaxNumRowEntries() + U_->getLocalMaxNumRowEntries() + 1;
862 Teuchos::Array<local_ordinal_type> InI(MaxNumEntries);
863 Teuchos::Array<scalar_type> InV(MaxNumEntries);
864 size_t num_cols = U_->getColMap()->getLocalNumElements();
865 Teuchos::Array<int> colflag(num_cols, -1);
867 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
871 using IST =
typename row_matrix_type::impl_scalar_type;
872 for (
size_t i = 0; i < L_->getLocalNumRows(); ++i) {
873 local_ordinal_type local_row = i;
876 local_inds_host_view_type UUI;
877 values_host_view_type UUV;
881 NumIn = MaxNumEntries;
882 nonconst_local_inds_host_view_type InI_v(InI.data(), MaxNumEntries);
883 nonconst_values_host_view_type InV_v(
reinterpret_cast<IST*
>(InV.data()), MaxNumEntries);
885 L_->getLocalRowCopy(local_row, InI_v, InV_v, NumL);
888 InI[NumL] = local_row;
890 nonconst_local_inds_host_view_type InI_sub(InI.data() + NumL + 1, MaxNumEntries - NumL - 1);
891 nonconst_values_host_view_type InV_sub(
reinterpret_cast<IST*
>(InV.data()) + NumL + 1, MaxNumEntries - NumL - 1);
893 U_->getLocalRowCopy(local_row, InI_sub, InV_sub, NumU);
894 NumIn = NumL + NumU + 1;
897 for (
size_t j = 0; j < NumIn; ++j) {
901 scalar_type diagmod = STS::zero();
903 for (
size_t jj = 0; jj < NumL; ++jj) {
904 local_ordinal_type j = InI[jj];
905 IST multiplier = InV[jj];
907 InV[jj] *=
static_cast<scalar_type
>(DV(j));
909 U_->getLocalRowView(j, UUI, UUV);
912 if (RelaxValue_ == STM::zero()) {
913 for (
size_t k = 0; k < NumUU; ++k) {
914 const int kk = colflag[UUI[k]];
919 InV[kk] -=
static_cast<scalar_type
>(multiplier * UUV[k]);
924 for (
size_t k = 0; k < NumUU; ++k) {
928 const int kk = colflag[UUI[k]];
930 InV[kk] -=
static_cast<scalar_type
>(multiplier * UUV[k]);
932 diagmod -=
static_cast<scalar_type
>(multiplier * UUV[k]);
940 L_->replaceLocalValues(local_row, InI(0, NumL), InV(0, NumL));
945 if (RelaxValue_ != STM::zero()) {
946 DV(i) += RelaxValue_ * diagmod;
949 if (STS::magnitude(DV(i)) > STS::magnitude(MaxDiagonalValue)) {
950 if (STS::real(DV(i)) < STM::zero()) {
951 DV(i) = -MinDiagonalValue;
953 DV(i) = MinDiagonalValue;
956 DV(i) =
static_cast<impl_scalar_type
>(STS::one()) / DV(i);
959 for (
size_t j = 0; j < NumU; ++j) {
960 InV[NumL + 1 + j] *=
static_cast<scalar_type
>(DV(i));
965 U_->replaceLocalValues(local_row, InI(NumL + 1, NumU), InV(NumL + 1, NumU));
969 for (
size_t j = 0; j < NumIn; ++j) {
970 colflag[InI[j]] = -1;
981 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
982 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
985 L_solver_->setMatrix(L_);
986 L_solver_->compute();
987 U_solver_->setMatrix(U_);
988 U_solver_->compute();
991template <
class MatrixType>
992void RILUK<MatrixType>::compute_kkspiluk() {
996 L_->setAllToScalar(STS::zero());
997 U_->setAllToScalar(STS::zero());
999 using row_map_type =
typename crs_matrix_type::local_matrix_device_type::row_map_type;
1000 auto lclL = L_->getLocalMatrixDevice();
1001 row_map_type L_rowmap = lclL.graph.row_map;
1002 auto L_entries = lclL.graph.entries;
1003 auto L_values = lclL.values;
1005 auto lclU = U_->getLocalMatrixDevice();
1006 row_map_type U_rowmap = lclU.graph.row_map;
1007 auto U_entries = lclU.graph.entries;
1008 auto U_values = lclU.values;
1010 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1011 KokkosSparse::spiluk_numeric(KernelHandle_.getRawPtr(), LevelOfFill_,
1012 lclMtx.graph.row_map, lclMtx.graph.entries, lclMtx.values,
1013 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values);
1015 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
1016 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
1018 L_solver_->compute();
1019 U_solver_->compute();
1022template <
class MatrixType>
1023void RILUK<MatrixType>::compute_kkspiluk_stream() {
1024 std::vector<lno_row_view_t> L_rowmap_v(num_streams_);
1025 std::vector<lno_nonzero_view_t> L_entries_v(num_streams_);
1026 std::vector<scalar_nonzero_view_t> L_values_v(num_streams_);
1027 std::vector<lno_row_view_t> U_rowmap_v(num_streams_);
1028 std::vector<lno_nonzero_view_t> U_entries_v(num_streams_);
1029 std::vector<scalar_nonzero_view_t> U_values_v(num_streams_);
1030 std::vector<kk_handle_type*> KernelHandle_rawptr_v_(num_streams_);
1031 for (
int i = 0; i < num_streams_; i++) {
1032 L_v_[i]->resumeFill();
1033 U_v_[i]->resumeFill();
1035 L_v_[i]->setAllToScalar(STS::zero());
1036 U_v_[i]->setAllToScalar(STS::zero());
1038 auto lclL = L_v_[i]->getLocalMatrixDevice();
1039 L_rowmap_v[i] = lclL.graph.row_map;
1040 L_entries_v[i] = lclL.graph.entries;
1041 L_values_v[i] = lclL.values;
1043 auto lclU = U_v_[i]->getLocalMatrixDevice();
1044 U_rowmap_v[i] = lclU.graph.row_map;
1045 U_entries_v[i] = lclU.graph.entries;
1046 U_values_v[i] = lclU.values;
1047 KernelHandle_rawptr_v_[i] = KernelHandle_v_[i].getRawPtr();
1050 if (hasStreamReordered_) {
1054 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1057 using TeamPolicy = Kokkos::TeamPolicy<execution_space>;
1058 const auto A_nrows = lclMtx.numRows();
1059 auto rows_per_block = ((A_nrows % num_streams_) == 0)
1060 ? (A_nrows / num_streams_)
1061 : (A_nrows / num_streams_ + 1);
1062 for (
int i = 0; i < num_streams_; i++) {
1063 const auto start_row_offset = i * rows_per_block;
1064 auto rowptrs = A_local_diagblks_rowmap_v_[i];
1065 auto colindices = A_local_diagblks_entries_v_[i];
1066 auto values = A_local_diagblks_values_v_[i];
1067 const bool reordered = hasStreamReordered_;
1068 typename lno_nonzero_view_t::non_const_type reverse_perm = hasStreamReordered_ ? reverse_perm_v_[i] :
typename lno_nonzero_view_t::non_const_type{};
1069 TeamPolicy pol(exec_space_instances_[i], A_local_diagblks_rowmap_v_[i].extent(0) - 1, Kokkos::AUTO);
1070 Kokkos::parallel_for(
1071 pol, KOKKOS_LAMBDA(
const typename TeamPolicy::member_type& team) {
1072 const auto irow = team.league_rank();
1073 const auto irow_A = start_row_offset + (reordered ? reverse_perm(irow) : irow);
1074 const auto A_local_crs_row = lclMtx.rowConst(irow_A);
1075 const auto begin_row = rowptrs(irow);
1076 const auto num_entries = rowptrs(irow + 1) - begin_row;
1077 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, num_entries), [&](
const int j) {
1078 const auto colidx = colindices(begin_row + j);
1079 const auto colidx_A = start_row_offset + (reordered ? reverse_perm(colidx) : colidx);
1081 const int offset = KokkosSparse::findRelOffset(
1082 &A_local_crs_row.colidx(0), A_local_crs_row.length, colidx_A, 0,
false);
1083 values(begin_row + j) = A_local_crs_row.value(offset);
1089 KokkosSparse::Experimental::spiluk_numeric_streams(exec_space_instances_, KernelHandle_rawptr_v_, LevelOfFill_,
1090 A_local_diagblks_rowmap_v_, A_local_diagblks_entries_v_, A_local_diagblks_values_v_,
1091 L_rowmap_v, L_entries_v, L_values_v,
1092 U_rowmap_v, U_entries_v, U_values_v);
1094 for (
int i = 0; i < num_streams_; i++) {
1095 L_v_[i]->fillComplete();
1096 U_v_[i]->fillComplete();
1099 L_solver_->compute();
1100 U_solver_->compute();
1103template <
class MatrixType>
1105 using Teuchos::Array;
1106 using Teuchos::ArrayView;
1109 using Teuchos::rcp_const_cast;
1110 using Teuchos::rcp_dynamic_cast;
1111 const char prefix[] =
"Ifpack2::RILUK::compute: ";
1116 TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix <<
"The matrix is null. Please "
1117 "call setMatrix() with a nonnull input before calling this method.");
1118 TEUCHOS_TEST_FOR_EXCEPTION(!A_->isFillComplete(), std::runtime_error, prefix <<
"The matrix is not "
1119 "fill complete. You may not invoke initialize() or compute() with this "
1120 "matrix until the matrix is fill complete. If your matrix is a "
1121 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
1122 "range Maps, if appropriate) before calling this method.");
1124 if (!isInitialized()) {
1128 Teuchos::Time timer(
"RILUK::compute");
1131 Teuchos::TimeMonitor timeMon(timer);
1132 double startTime = timer.wallTime();
1134 isComputed_ =
false;
1136 if (!this->isKokkosKernelsSpiluk_) {
1140 if (!A_local_crs_nc_.is_null()) {
1142 A_local_crs_nc_->resumeFill();
1144 nonconst_local_inds_host_view_type indices(
"indices", A_local_->getLocalMaxNumRowEntries());
1145 nonconst_values_host_view_type values(
"values", A_local_->getLocalMaxNumRowEntries());
1147 size_t numEntries = 0;
1148 A_local_->getLocalRowCopy(i, indices, values, numEntries);
1149 A_local_crs_nc_->replaceLocalValues(i, numEntries,
reinterpret_cast<scalar_type*
>(values.data()), indices.data());
1151 A_local_crs_nc_->fillComplete(A_local_->getDomainMap(), A_local_->getRangeMap());
1154 if (!isKokkosKernelsStream_) {
1158 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1159 if (!hasStreamsWithRCB_) {
1160#if KOKKOS_VERSION >= 50100
1161 KokkosSparse::Experimental::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_, hasStreamReordered_);
1163 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_, hasStreamReordered_);
1166#if KOKKOS_VERSION >= 50100
1167 KokkosSparse::Experimental::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, perm_rcb_, reverse_perm_rcb_, partition_sizes_rcb_,
1168 A_local_diagblks_v_);
1170 auto A_coordinates_lcl = A_coordinates_->getLocalViewDevice(Tpetra::Access::ReadOnly);
1171 Kokkos::deep_copy(coors_rcb_, A_coordinates_lcl);
1172 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, coors_rcb_,
1173 A_local_diagblks_v_, perm_rcb_);
1176 for (
int i = 0; i < num_streams_; i++) {
1177 A_local_diagblks_values_v_[i] = A_local_diagblks_v_[i].values;
1180 compute_kkspiluk_stream();
1186 computeTime_ += (timer.wallTime() - startTime);
1190template <
typename MV,
typename Map>
1191void resetMultiVecIfNeeded(std::unique_ptr<MV>& mv_ptr,
const Map& map,
const size_t numVectors,
bool initialize) {
1192 if (!mv_ptr || mv_ptr->getNumVectors() != numVectors) {
1193 mv_ptr.reset(
new MV(map, numVectors, initialize));
1198template <
class MatrixType>
1200 apply(
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1201 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1202 Teuchos::ETransp mode,
1206 using Teuchos::rcpFromRef;
1208 TEUCHOS_TEST_FOR_EXCEPTION(
1209 A_.is_null(), std::runtime_error,
1210 "Ifpack2::RILUK::apply: The matrix is "
1211 "null. Please call setMatrix() with a nonnull input, then initialize() "
1212 "and compute(), before calling this method.");
1213 TEUCHOS_TEST_FOR_EXCEPTION(
1214 !isComputed(), std::runtime_error,
1215 "Ifpack2::RILUK::apply: If you have not yet called compute(), "
1216 "you must call compute() before calling this method.");
1217 TEUCHOS_TEST_FOR_EXCEPTION(
1218 X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
1219 "Ifpack2::RILUK::apply: X and Y do not have the same number of columns. "
1220 "X.getNumVectors() = "
1221 << X.getNumVectors()
1222 <<
" != Y.getNumVectors() = " << Y.getNumVectors() <<
".");
1223 TEUCHOS_TEST_FOR_EXCEPTION(
1224 STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
1225 "Ifpack2::RILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1226 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1227 "fixed. There is a FIXME in this file about this very issue.");
1229 if (!isKokkosKernelsStream_) {
1231 TEUCHOS_TEST_FOR_EXCEPTION(STM::isnaninf(D_nrm1), std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf.");
1233 Teuchos::Array<magnitude_type> norms(X.getNumVectors());
1236 for (
size_t j = 0; j < X.getNumVectors(); ++j) {
1237 if (STM::isnaninf(norms[j])) {
1242 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the input X is NaN or Inf.");
1248 Teuchos::Time timer(
"RILUK::apply");
1249 double startTime = timer.wallTime();
1251 Teuchos::TimeMonitor timeMon(timer);
1252 if (alpha == one && beta == zero) {
1253 if (isKokkosKernelsSpiluk_ && isKokkosKernelsStream_ && (hasStreamReordered_ || hasStreamsWithRCB_)) {
1254 Impl::resetMultiVecIfNeeded(reordered_x_, X.getMap(), X.getNumVectors(),
false);
1255 Impl::resetMultiVecIfNeeded(reordered_y_, Y.getMap(), Y.getNumVectors(),
false);
1258 for (
size_t j = 0; j < X.getNumVectors(); j++) {
1259 auto X_j = X.getVector(j);
1260 auto ReorderedX_j = reordered_x_->getVectorNonConst(j);
1261 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1262 auto ReorderedX_lcl = ReorderedX_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1263 if (hasStreamReordered_) {
1266 for (
int i = 0; i < num_streams_; i++) {
1267 auto perm_i = perm_v_[i];
1268 stream_end = stream_begin + perm_i.extent(0);
1269 auto X_lcl_sub = Kokkos::subview(X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1270 auto ReorderedX_lcl_sub = Kokkos::subview(ReorderedX_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1271 Kokkos::parallel_for(
1272 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0,
static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1273 ReorderedX_lcl_sub(perm_i(ii)) = X_lcl_sub(ii);
1275 stream_begin = stream_end;
1278 auto perm = perm_rcb_;
1279 Kokkos::parallel_for(
1280 Kokkos::RangePolicy<execution_space>(0,
static_cast<int>(X_lcl.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1281 ReorderedX_lcl(perm(ii), 0) = X_lcl(ii, 0);
1287 if (mode == Teuchos::NO_TRANS) {
1289 L_solver_->apply(*reordered_x_, Y, mode);
1291 U_solver_->apply(Y, *reordered_y_, mode);
1294 U_solver_->apply(*reordered_x_, Y, mode);
1296 L_solver_->apply(Y, *reordered_y_, mode);
1299 for (
size_t j = 0; j < Y.getNumVectors(); j++) {
1300 auto Y_j = Y.getVectorNonConst(j);
1301 auto ReorderedY_j = reordered_y_->getVector(j);
1302 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1303 auto ReorderedY_lcl = ReorderedY_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1304 if (hasStreamReordered_) {
1307 for (
int i = 0; i < num_streams_; i++) {
1308 auto perm_i = perm_v_[i];
1309 stream_end = stream_begin + perm_i.extent(0);
1310 auto Y_lcl_sub = Kokkos::subview(Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1311 auto ReorderedY_lcl_sub = Kokkos::subview(ReorderedY_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1312 Kokkos::parallel_for(
1313 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0,
static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1314 Y_lcl_sub(ii) = ReorderedY_lcl_sub(perm_i(ii));
1316 stream_begin = stream_end;
1319 auto perm = perm_rcb_;
1320 Kokkos::parallel_for(
1321 Kokkos::RangePolicy<execution_space>(0,
static_cast<int>(Y_lcl.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1322 Y_lcl(ii, 0) = ReorderedY_lcl(perm(ii), 0);
1328 if (mode == Teuchos::NO_TRANS) {
1329#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1333 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1336 L_solver_->apply(X, *Y_tmp_, mode);
1338 if (!this->isKokkosKernelsSpiluk_) {
1341 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1344 U_solver_->apply(*Y_tmp_, Y, mode);
1347 L_solver_->apply(X, Y, mode);
1349 if (!this->isKokkosKernelsSpiluk_) {
1352 Y.elementWiseMultiply(one, *D_, Y, zero);
1355 U_solver_->apply(Y, Y, mode);
1358#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1362 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1365 U_solver_->apply(X, *Y_tmp_, mode);
1367 if (!this->isKokkosKernelsSpiluk_) {
1373 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1376 L_solver_->apply(*Y_tmp_, Y, mode);
1379 U_solver_->apply(X, Y, mode);
1381 if (!this->isKokkosKernelsSpiluk_) {
1387 Y.elementWiseMultiply(one, *D_, Y, zero);
1390 L_solver_->apply(Y, Y, mode);
1395 if (alpha == zero) {
1405 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1406 apply(X, *Y_tmp_, mode);
1407 Y.update(alpha, *Y_tmp_, beta);
1413 Teuchos::Array<magnitude_type> norms(Y.getNumVectors());
1416 for (
size_t j = 0; j < Y.getNumVectors(); ++j) {
1417 if (STM::isnaninf(norms[j])) {
1422 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the output Y is NaN or Inf.");
1426 applyTime_ += (timer.wallTime() - startTime);
1462template <
class MatrixType>
1464 std::ostringstream os;
1469 os <<
"\"Ifpack2::RILUK\": {";
1470 os <<
"Initialized: " << (isInitialized() ?
"true" :
"false") <<
", "
1471 <<
"Computed: " << (isComputed() ?
"true" :
"false") <<
", ";
1473 os <<
"Level-of-fill: " << getLevelOfFill() <<
", ";
1475 if (isKokkosKernelsSpiluk_) os <<
"KK-SPILUK, ";
1476 if (isKokkosKernelsStream_) os <<
"KK-Stream, ";
1479 os <<
"Matrix: null";
1481 os <<
"Global matrix dimensions: ["
1482 << A_->getGlobalNumRows() <<
", " << A_->getGlobalNumCols() <<
"]"
1483 <<
", Global nnz: " << A_->getGlobalNumEntries();
1486 if (!L_solver_.is_null()) os <<
", " << L_solver_->description();
1487 if (!U_solver_.is_null()) os <<
", " << U_solver_->description();
1495#define IFPACK2_RILUK_INSTANT(S, LO, GO, N) \
1496 template class Ifpack2::RILUK<Tpetra::RowMatrix<S, LO, GO, N> >;