129 using Ifpack2::Details::getParamTryingTypes;
130 const char prefix[] =
"Ifpack2::ILUT: ";
137 IlutImplType::Enum ilutimplType = IlutImplType::Serial;
139 static const char typeName[] =
"fact: type";
141 if (!params.isType<std::string>(typeName))
break;
144 Teuchos::Array<std::string> ilutimplTypeStrs;
145 Teuchos::Array<IlutImplType::Enum> ilutimplTypeEnums;
146 IlutImplType::loadPLTypeOption(ilutimplTypeStrs, ilutimplTypeEnums);
147 Teuchos::StringToIntegralParameterEntryValidator<IlutImplType::Enum>
148 s2i(ilutimplTypeStrs(), ilutimplTypeEnums(), typeName,
false);
150 ilutimplType = s2i.getIntegralValue(params.get<std::string>(typeName));
153 if (ilutimplType == IlutImplType::PAR_ILUT) {
154 this->useKokkosKernelsParILUT_ =
true;
156 this->useKokkosKernelsParILUT_ =
false;
162 double fillLevel = LevelOfFill_;
164 const std::string paramName(
"fact: ilut level-of-fill");
165 TEUCHOS_TEST_FOR_EXCEPTION(
166 (params.isParameter(paramName) && this->useKokkosKernelsParILUT_), std::runtime_error,
167 "Ifpack2::ILUT: Parameter " << paramName <<
" is meaningless for algorithm par_ilut.");
168 getParamTryingTypes<double, double, float>(fillLevel, params, paramName, prefix);
169 TEUCHOS_TEST_FOR_EXCEPTION(fillLevel < 1.0, std::runtime_error,
170 "Ifpack2::ILUT: The \"" << paramName <<
"\" parameter must be >= "
171 "1.0, but you set it to "
172 << fillLevel <<
". For ILUT, the fill level "
173 "means something different than it does for ILU(k). ILU(0) produces "
174 "factors with the same sparsity structure as the input matrix A. For "
175 "ILUT, level-of-fill = 1.0 will produce factors with nonzeros matching "
176 "the sparsity structure of A. level-of-fill > 1.0 allows for additional "
182 const std::string paramName(
"fact: absolute threshold");
183 getParamTryingTypes<magnitude_type, magnitude_type, double>(absThresh, params, paramName, prefix);
188 const std::string paramName(
"fact: relative threshold");
189 getParamTryingTypes<magnitude_type, magnitude_type, double>(relThresh, params, paramName, prefix);
194 const std::string paramName(
"fact: relax value");
195 getParamTryingTypes<magnitude_type, magnitude_type, double>(relaxValue, params, paramName, prefix);
200 const std::string paramName(
"fact: drop tolerance");
201 getParamTryingTypes<magnitude_type, magnitude_type, double>(dropTol, params, paramName, prefix);
204 int par_ilut_max_iter = 20;
206 int par_ilut_team_size = 0;
207 int par_ilut_vector_size = 0;
208 float par_ilut_fill_in_limit = 0.75;
209 bool par_ilut_verbose =
false;
210 if (this->useKokkosKernelsParILUT_) {
211 par_ilut_max_iter = par_ilut_options_.max_iter;
212 par_ilut_residual_norm_delta_stop = par_ilut_options_.residual_norm_delta_stop;
213 par_ilut_team_size = par_ilut_options_.team_size;
214 par_ilut_vector_size = par_ilut_options_.vector_size;
215 par_ilut_fill_in_limit = par_ilut_options_.fill_in_limit;
216 par_ilut_verbose = par_ilut_options_.verbose;
218 std::string par_ilut_plist_name(
"parallel ILUT options");
219 if (params.isSublist(par_ilut_plist_name)) {
220 Teuchos::ParameterList
const& par_ilut_plist = params.sublist(par_ilut_plist_name);
222 std::string paramName(
"maximum iterations");
223 getParamTryingTypes<int, int>(par_ilut_max_iter, par_ilut_plist, paramName, prefix);
225 paramName =
"residual norm delta stop";
226 getParamTryingTypes<magnitude_type, magnitude_type, double>(par_ilut_residual_norm_delta_stop, par_ilut_plist, paramName, prefix);
228 paramName =
"team size";
229 getParamTryingTypes<int, int>(par_ilut_team_size, par_ilut_plist, paramName, prefix);
231 paramName =
"vector size";
232 getParamTryingTypes<int, int>(par_ilut_vector_size, par_ilut_plist, paramName, prefix);
234 paramName =
"fill in limit";
235 getParamTryingTypes<float, float, double>(par_ilut_fill_in_limit, par_ilut_plist, paramName, prefix);
237 paramName =
"verbose";
238 getParamTryingTypes<bool, bool>(par_ilut_verbose, par_ilut_plist, paramName, prefix);
242 par_ilut_options_.max_iter = par_ilut_max_iter;
243 par_ilut_options_.residual_norm_delta_stop = par_ilut_residual_norm_delta_stop;
244 par_ilut_options_.team_size = par_ilut_team_size;
245 par_ilut_options_.vector_size = par_ilut_vector_size;
246 par_ilut_options_.fill_in_limit = par_ilut_fill_in_limit;
247 par_ilut_options_.verbose = par_ilut_verbose;
252 L_solver_->setParameters(params);
253 U_solver_->setParameters(params);
255 LevelOfFill_ = fillLevel;
256 Athresh_ = absThresh;
257 Rthresh_ = relThresh;
258 RelaxValue_ = relaxValue;
259 DropTolerance_ = dropTol;
430 using Teuchos::Array;
432 using Teuchos::rcp_const_cast;
433 Teuchos::Time timer(
"ILUT::initialize");
434 double startTime = timer.wallTime();
436 Teuchos::TimeMonitor timeMon(timer);
439 TEUCHOS_TEST_FOR_EXCEPTION(
440 A_.is_null(), std::runtime_error,
441 "Ifpack2::ILUT::initialize: "
442 "The matrix to precondition is null. Please call setMatrix() with a "
443 "nonnull input before calling this method.");
446 IsInitialized_ =
false;
448 A_local_ = Teuchos::null;
452 A_local_ = makeLocalFilter(A_);
453 TEUCHOS_TEST_FOR_EXCEPTION(
454 A_local_.is_null(), std::logic_error,
455 "Ifpack2::RILUT::initialize: "
456 "makeLocalFilter returned null; it failed to compute A_local. "
457 "Please report this bug to the Ifpack2 developers.");
459 if (this->useKokkosKernelsParILUT_) {
460 this->KernelHandle_ = Teuchos::rcp(
new kk_handle_type());
461 KernelHandle_->create_par_ilut_handle();
462 auto par_ilut_handle = KernelHandle_->get_par_ilut_handle();
463 par_ilut_handle->set_residual_norm_delta_stop(par_ilut_options_.residual_norm_delta_stop);
464 par_ilut_handle->set_team_size(par_ilut_options_.team_size);
465 par_ilut_handle->set_vector_size(par_ilut_options_.vector_size);
466 par_ilut_handle->set_max_iter(par_ilut_options_.max_iter);
467 par_ilut_handle->set_fill_in_limit(par_ilut_options_.fill_in_limit);
468 par_ilut_handle->set_verbose(par_ilut_options_.verbose);
469 par_ilut_handle->set_async_update(
false);
471 RCP<const crs_matrix_type> A_local_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_local_);
472 if (A_local_crs.is_null()) {
475 Array<size_t> entriesPerRow(numRows);
477 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
479 RCP<crs_matrix_type> A_local_crs_nc =
481 A_local_->getColMap(),
484 nonconst_local_inds_host_view_type indices(
"indices", A_local_->getLocalMaxNumRowEntries());
485 nonconst_values_host_view_type values(
"values", A_local_->getLocalMaxNumRowEntries());
487 size_t numEntries = 0;
488 A_local_->getLocalRowCopy(i, indices, values, numEntries);
489 A_local_crs_nc->insertLocalValues(i, numEntries,
reinterpret_cast<scalar_type*
>(values.data()), indices.data());
491 A_local_crs_nc->fillComplete(A_local_->getDomainMap(), A_local_->getRangeMap());
492 A_local_crs = rcp_const_cast<const crs_matrix_type>(A_local_crs_nc);
494 auto A_local_crs_device = A_local_crs->getLocalMatrixDevice();
497 typedef typename Kokkos::View<usize_type*, array_layout, device_type> ulno_row_view_t;
498 const int NumMyRows = A_local_crs->getRowMap()->getLocalNumElements();
499 L_rowmap_ = ulno_row_view_t(
"L_row_map", NumMyRows + 1);
500 U_rowmap_ = ulno_row_view_t(
"U_row_map", NumMyRows + 1);
501 L_rowmap_orig_ = ulno_row_view_t(
"L_row_map_orig", NumMyRows + 1);
502 U_rowmap_orig_ = ulno_row_view_t(
"U_row_map_orig", NumMyRows + 1);
504 KokkosSparse::Experimental::par_ilut_symbolic(KernelHandle_.getRawPtr(),
505 A_local_crs_device.graph.row_map, A_local_crs_device.graph.entries,
509 Kokkos::deep_copy(L_rowmap_orig_, L_rowmap_);
510 Kokkos::deep_copy(U_rowmap_orig_, U_rowmap_);
513 IsInitialized_ =
true;
516 InitializeTime_ += (timer.wallTime() - startTime);
527 using Teuchos::Array;
528 using Teuchos::ArrayRCP;
529 using Teuchos::ArrayView;
533 using Teuchos::rcp_const_cast;
534 using Teuchos::reduceAll;
537 if (!isInitialized()) {
541 Teuchos::Time timer(
"ILUT::compute");
542 double startTime = timer.wallTime();
544 Teuchos::TimeMonitor timeMon(timer,
true);
546 if (!this->useKokkosKernelsParILUT_) {
578#ifdef IFPACK2_WRITE_ILUT_FACTORS
579 std::ofstream ofsL(
"L.ifpack2_ilut.mtx", std::ios::out);
580 std::ofstream ofsU(
"U.ifpack2_ilut.mtx", std::ios::out);
585 double local_nnz =
static_cast<double>(A_local_->getLocalNumEntries());
586 double fill = ((getLevelOfFill() - 1.0) * local_nnz) / (2 * myNumRows);
591 double fill_ceil = std::ceil(fill);
595 size_type fillL =
static_cast<size_type
>(fill_ceil);
596 size_type fillU =
static_cast<size_type
>(fill_ceil);
598 Array<scalar_type> InvDiagU(myNumRows, zero);
600 Array<Array<local_ordinal_type> > L_tmp_idx(myNumRows);
601 Array<Array<scalar_type> > L_tmpv(myNumRows);
602 Array<Array<local_ordinal_type> > U_tmp_idx(myNumRows);
603 Array<Array<scalar_type> > U_tmpv(myNumRows);
610 Array<int> pattern(max_col, UNUSED);
611 Array<scalar_type> cur_row(max_col, zero);
612 Array<magnitude_type> unorm(max_col);
614 Array<local_ordinal_type> L_cols_heap;
615 Array<local_ordinal_type> U_cols;
616 Array<local_ordinal_type> L_vals_heap;
617 Array<local_ordinal_type> U_vals_heap;
622 greater_indirect<scalar_type, local_ordinal_type> vals_comp(cur_row);
627 nonconst_local_inds_host_view_type ColIndicesARCP;
628 nonconst_values_host_view_type ColValuesARCP;
629 if (!A_local_->supportsRowViews()) {
630 const size_t maxnz = A_local_->getLocalMaxNumRowEntries();
631 Kokkos::resize(ColIndicesARCP, maxnz);
632 Kokkos::resize(ColValuesARCP, maxnz);
636 local_inds_host_view_type ColIndicesA;
637 values_host_view_type ColValuesA;
640 if (A_local_->supportsRowViews()) {
641 A_local_->getLocalRowView(row_i, ColIndicesA, ColValuesA);
642 RowNnz = ColIndicesA.size();
644 A_local_->getLocalRowCopy(row_i, ColIndicesARCP, ColValuesARCP, RowNnz);
645 ColIndicesA = Kokkos::subview(ColIndicesARCP, std::make_pair((
size_t)0, RowNnz));
646 ColValuesA = Kokkos::subview(ColValuesARCP, std::make_pair((
size_t)0, RowNnz));
651 U_cols.push_back(row_i);
652 cur_row[row_i] = zero;
653 pattern[row_i] = ORIG;
655 size_type L_cols_heaplen = 0;
656 rownorm = STM::zero();
657 for (
size_t i = 0; i < RowNnz; ++i) {
658 if (ColIndicesA[i] < myNumRows) {
659 if (ColIndicesA[i] < row_i) {
660 add_to_heap(ColIndicesA[i], L_cols_heap, L_cols_heaplen);
661 }
else if (ColIndicesA[i] > row_i) {
662 U_cols.push_back(ColIndicesA[i]);
665 cur_row[ColIndicesA[i]] = ColValuesA[i];
666 pattern[ColIndicesA[i]] = ORIG;
667 rownorm += scalar_mag(ColValuesA[i]);
676 cur_row[row_i] = as<scalar_type>(getAbsoluteThreshold() * IFPACK2_SGN(v)) + rthresh * v;
678 size_type orig_U_len = U_cols.size();
679 RowNnz = L_cols_heap.size() + orig_U_len;
680 rownorm = getDropTolerance() * rownorm / RowNnz;
683 size_type L_vals_heaplen = 0;
684 while (L_cols_heaplen > 0) {
687 scalar_type multiplier = cur_row[row_k] * InvDiagU[row_k];
688 cur_row[row_k] = multiplier;
690 if (mag_mult * unorm[row_k] < rownorm) {
691 pattern[row_k] = UNUSED;
695 if (pattern[row_k] != ORIG) {
696 if (L_vals_heaplen < fillL) {
697 add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp);
698 }
else if (L_vals_heaplen == 0 ||
699 mag_mult < scalar_mag(cur_row[L_vals_heap.front()])) {
700 pattern[row_k] = UNUSED;
704 pattern[L_vals_heap.front()] = UNUSED;
706 add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp);
712 ArrayView<local_ordinal_type> ColIndicesU = U_tmp_idx[row_k]();
713 ArrayView<scalar_type> ColValuesU = U_tmpv[row_k]();
714 size_type ColNnzU = ColIndicesU.size();
716 for (size_type j = 0; j < ColNnzU; ++j) {
717 if (ColIndicesU[j] > row_k) {
720 if (pattern[col_j] != UNUSED) {
721 cur_row[col_j] -= tmp;
722 }
else if (scalar_mag(tmp) > rownorm) {
723 cur_row[col_j] = -tmp;
724 pattern[col_j] = FILL;
726 U_cols.push_back(col_j);
740 for (size_type i = 0; i < (size_type)ColIndicesA.size(); ++i) {
741 if (ColIndicesA[i] < row_i) {
742 L_tmp_idx[row_i].push_back(ColIndicesA[i]);
743 L_tmpv[row_i].push_back(cur_row[ColIndicesA[i]]);
744 pattern[ColIndicesA[i]] = UNUSED;
749 for (size_type j = 0; j < L_vals_heaplen; ++j) {
750 L_tmp_idx[row_i].push_back(L_vals_heap[j]);
751 L_tmpv[row_i].push_back(cur_row[L_vals_heap[j]]);
752 pattern[L_vals_heap[j]] = UNUSED;
760#ifdef IFPACK2_WRITE_ILUT_FACTORS
761 for (size_type ii = 0; ii < L_tmp_idx[row_i].size(); ++ii) {
762 ofsL << row_i <<
" " << L_tmp_idx[row_i][ii] <<
" "
763 << L_tmpv[row_i][ii] << std::endl;
768 if (cur_row[row_i] == zero) {
769 std::cerr <<
"Ifpack2::ILUT::Compute: zero pivot encountered! "
770 <<
"Replacing with rownorm and continuing..."
771 <<
"(You may need to set the parameter "
772 <<
"'fact: absolute threshold'.)" << std::endl;
773 cur_row[row_i] = rownorm;
775 InvDiagU[row_i] = one / cur_row[row_i];
778 U_tmp_idx[row_i].push_back(row_i);
779 U_tmpv[row_i].push_back(cur_row[row_i]);
780 unorm[row_i] = scalar_mag(cur_row[row_i]);
781 pattern[row_i] = UNUSED;
787 size_type U_vals_heaplen = 0;
788 for (size_type j = 1; j < U_cols.size(); ++j) {
790 if (pattern[col] != ORIG) {
791 if (U_vals_heaplen < fillU) {
792 add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp);
793 }
else if (U_vals_heaplen != 0 && scalar_mag(cur_row[col]) >
794 scalar_mag(cur_row[U_vals_heap.front()])) {
796 add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp);
799 U_tmp_idx[row_i].push_back(col);
800 U_tmpv[row_i].push_back(cur_row[col]);
801 unorm[row_i] += scalar_mag(cur_row[col]);
803 pattern[col] = UNUSED;
806 for (size_type j = 0; j < U_vals_heaplen; ++j) {
807 U_tmp_idx[row_i].push_back(U_vals_heap[j]);
808 U_tmpv[row_i].push_back(cur_row[U_vals_heap[j]]);
809 unorm[row_i] += scalar_mag(cur_row[U_vals_heap[j]]);
812 unorm[row_i] /= (orig_U_len + U_vals_heaplen);
814#ifdef IFPACK2_WRITE_ILUT_FACTORS
815 for (
int ii = 0; ii < U_tmp_idx[row_i].size(); ++ii) {
816 ofsU << row_i <<
" " << U_tmp_idx[row_i][ii] <<
" "
817 << U_tmpv[row_i][ii] << std::endl;
828 Array<size_t> nnzPerRow(myNumRows);
834 L_solver_->setMatrix(Teuchos::null);
835 U_solver_->setMatrix(Teuchos::null);
838 nnzPerRow[row_i] = L_tmp_idx[row_i].size();
841 L_ = rcp(
new crs_matrix_type(A_local_->getRowMap(), A_local_->getColMap(),
845 L_->insertLocalValues(row_i, L_tmp_idx[row_i](), L_tmpv[row_i]());
851 nnzPerRow[row_i] = U_tmp_idx[row_i].size();
854 U_ = rcp(
new crs_matrix_type(A_local_->getRowMap(), A_local_->getColMap(),
858 U_->insertLocalValues(row_i, U_tmp_idx[row_i](), U_tmpv[row_i]());
863 L_solver_->setMatrix(L_);
864 L_solver_->initialize();
865 L_solver_->compute();
867 U_solver_->setMatrix(U_);
868 U_solver_->initialize();
869 U_solver_->compute();
875 if (this->isComputed()) {
876 Kokkos::resize(L_rowmap_, L_rowmap_orig_.size());
877 Kokkos::resize(U_rowmap_, U_rowmap_orig_.size());
878 Kokkos::deep_copy(L_rowmap_, L_rowmap_orig_);
879 Kokkos::deep_copy(U_rowmap_, U_rowmap_orig_);
882 RCP<const crs_matrix_type> A_local_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_local_);
884 if (A_local_crs.is_null()) {
886 Array<size_t> entriesPerRow(numRows);
888 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
890 RCP<crs_matrix_type> A_local_crs_nc =
892 A_local_->getColMap(),
895 nonconst_local_inds_host_view_type indices(
"indices", A_local_->getLocalMaxNumRowEntries());
896 nonconst_values_host_view_type values(
"values", A_local_->getLocalMaxNumRowEntries());
898 size_t numEntries = 0;
899 A_local_->getLocalRowCopy(i, indices, values, numEntries);
900 A_local_crs_nc->insertLocalValues(i, numEntries,
reinterpret_cast<scalar_type*
>(values.data()), indices.data());
902 A_local_crs_nc->fillComplete(A_local_->getDomainMap(), A_local_->getRangeMap());
903 A_local_crs = rcp_const_cast<const crs_matrix_type>(A_local_crs_nc);
905 auto lclMtx = A_local_crs->getLocalMatrixDevice();
906 A_local_rowmap_ = lclMtx.graph.row_map;
907 A_local_entries_ = lclMtx.graph.entries;
908 A_local_values_ = lclMtx.values;
912 auto par_ilut_handle = KernelHandle_->get_par_ilut_handle();
913 auto nnzL = par_ilut_handle->get_nnzL();
914 static_graph_entries_t L_entries_ = static_graph_entries_t(
"L_entries", nnzL);
915 local_matrix_values_t L_values_ = local_matrix_values_t(
"L_values", nnzL);
917 auto nnzU = par_ilut_handle->get_nnzU();
918 static_graph_entries_t U_entries_ = static_graph_entries_t(
"U_entries", nnzU);
919 local_matrix_values_t U_values_ = local_matrix_values_t(
"U_values", nnzU);
921 KokkosSparse::Experimental::par_ilut_numeric(KernelHandle_.getRawPtr(),
922 A_local_rowmap_, A_local_entries_, A_local_values_,
923 L_rowmap_, L_entries_, L_values_, U_rowmap_, U_entries_, U_values_);
925 auto L_kokkosCrsGraph = local_graph_device_type(L_entries_, L_rowmap_);
926 auto U_kokkosCrsGraph = local_graph_device_type(U_entries_, U_rowmap_);
928 local_matrix_device_type L_localCrsMatrix_device;
929 L_localCrsMatrix_device = local_matrix_device_type(
"L_Factor_localmatrix",
930 A_local_->getLocalNumRows(),
935 A_local_crs->getRowMap(),
936 A_local_crs->getColMap(),
937 A_local_crs->getDomainMap(),
938 A_local_crs->getRangeMap(),
939 A_local_crs->getGraph()->getImporter(),
940 A_local_crs->getGraph()->getExporter()));
942 local_matrix_device_type U_localCrsMatrix_device;
943 U_localCrsMatrix_device = local_matrix_device_type(
"U_Factor_localmatrix",
944 A_local_->getLocalNumRows(),
949 A_local_crs->getRowMap(),
950 A_local_crs->getColMap(),
951 A_local_crs->getDomainMap(),
952 A_local_crs->getRangeMap(),
953 A_local_crs->getGraph()->getImporter(),
954 A_local_crs->getGraph()->getExporter()));
956 L_solver_->setMatrix(L_);
957 L_solver_->compute();
958 U_solver_->setMatrix(U_);
959 U_solver_->compute();
963 ComputeTime_ += (timer.wallTime() - startTime);