101 if (data_.symbolic_ != NULL) {
102 ::KLU2::klu_free_symbolic<klu2_dtype, local_ordinal_type>
103 (&(data_.symbolic_), &(data_.common_)) ;
106 if ( single_proc_optimization() ) {
107 host_ordinal_type_array host_row_ptr_view;
108 host_ordinal_type_array host_cols_view;
109 this->matrixA_->returnRowPtr_kokkos_view(host_row_ptr_view);
110 this->matrixA_->returnColInd_kokkos_view(host_cols_view);
111 data_.symbolic_ = ::KLU2::klu_analyze<klu2_dtype, local_ordinal_type>
112 ((local_ordinal_type)this->globalNumCols_, host_row_ptr_view.data(),
113 host_cols_view.data(), &(data_.common_)) ;
117 data_.symbolic_ = ::KLU2::klu_analyze<klu2_dtype, local_ordinal_type>
118 ((local_ordinal_type)this->globalNumCols_, host_col_ptr_view_.data(),
119 host_rows_view_.data(), &(data_.common_)) ;
141#ifdef HAVE_AMESOS2_TIMERS
142 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
145 if (data_.numeric_ != NULL) {
146 ::KLU2::klu_free_numeric<klu2_dtype, local_ordinal_type>
147 (&(data_.numeric_), &(data_.common_));
150 if ( single_proc_optimization() ) {
151 host_ordinal_type_array host_row_ptr_view;
152 host_ordinal_type_array host_cols_view;
153 this->matrixA_->returnRowPtr_kokkos_view(host_row_ptr_view);
154 this->matrixA_->returnColInd_kokkos_view(host_cols_view);
155 this->matrixA_->returnValues_kokkos_view(host_nzvals_view_);
156 klu2_dtype * pValues = function_map::convert_scalar(host_nzvals_view_.data());
157 data_.numeric_ = ::KLU2::klu_factor<klu2_dtype, local_ordinal_type>
158 (host_row_ptr_view.data(), host_cols_view.data(), pValues,
159 data_.symbolic_, &(data_.common_));
162 klu2_dtype * pValues = function_map::convert_scalar(host_nzvals_view_.data());
163 data_.numeric_ = ::KLU2::klu_factor<klu2_dtype, local_ordinal_type>
164 (host_col_ptr_view_.data(), host_rows_view_.data(), pValues,
165 data_.symbolic_, &(data_.common_));
173 if(data_.numeric_ ==
nullptr) {
175 if(debug_level_ > 0) {
176 std::cout <<
" ** Amesos2::KLU2::numericFactorization failed with status = ";
177 if(data_.common_.status == KLU_OK)
178 std::cout <<
"KLU_OK **\n";
179 else if (data_.common_.status == KLU_SINGULAR)
180 std::cout <<
"KLU_SINGULAR **\n";
181 else if (data_.common_.status == KLU_OUT_OF_MEMORY)
182 std::cout <<
"KLU_OUT_OF_MEMORY **\n";
183 else if (data_.common_.status == KLU_INVALID)
184 std::cout <<
"KLU_INVALID **\n";
185 else if (data_.common_.status == KLU_TOO_LARGE)
186 std::cout <<
"KLU_TOO_LARGE **\n";
193 this->setNnzLU( as<size_t>((data_.numeric_)->lnz) + as<size_t>((data_.numeric_)->unz) );
200 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
202 TEUCHOS_TEST_FOR_EXCEPTION(info > 0, std::runtime_error,
203 "KLU2 numeric factorization failed(info="+std::to_string(info)+
")");
217 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
218 const size_t nrhs = X->getGlobalNumVectors();
219 if (debug_level_ > 0) {
220 if (this->root_) std::cout <<
"\n == Amesos2_KLU2::solve_impl ==" << std::endl;
221 if (debug_level_ == 1) {
224 Teuchos::RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
225 if (!is_null(B->getMap())) B->getMap()->describe(*fancy, Teuchos::VERB_EXTREME);
226 std::cout << std::endl;
227 B->describe(*fancy, Teuchos::VERB_EXTREME);
233 bool use_gather = use_gather_;
234 use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1);
235 use_gather = (use_gather && (std::is_same<vector_scalar_type, float>::value ||
236 std::is_same<vector_scalar_type, double>::value));
238#ifdef HAVE_AMESOS2_TIMERS
239 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
241 const bool initialize_data =
true;
242 const bool do_not_initialize_data =
false;
243 if ( single_proc_optimization() && nrhs == 1 ) {
245 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
246 host_solve_array_t>::do_get(initialize_data, B, bValues_, as<size_t>(ld_rhs));
248 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
249 host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_, as<size_t>(ld_rhs));
253 int rval = B->gather(bValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
256 X->gather(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
265 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
266 host_solve_array_t>::do_get(initialize_data, B, bValues_,
269 this->rowIndexBase_);
271 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
272 host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_,
275 this->rowIndexBase_);
285 Kokkos::deep_copy(xValues_, bValues_);
293 klu2_dtype * pxValues = function_map::convert_scalar(xValues_.data());
294 klu2_dtype * pbValues = function_map::convert_scalar(bValues_.data());
298 TEUCHOS_TEST_FOR_EXCEPTION(pbValues ==
nullptr,
299 std::runtime_error,
"Amesos2 Runtime Error: b_vector returned null ");
301 TEUCHOS_TEST_FOR_EXCEPTION(pxValues ==
nullptr,
302 std::runtime_error,
"Amesos2 Runtime Error: x_vector returned null ");
305 if ( single_proc_optimization() && nrhs == 1 ) {
306#ifdef HAVE_AMESOS2_TIMERS
307 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
315 ::KLU2::klu_tsolve2<klu2_dtype, local_ordinal_type>
316 (data_.symbolic_, data_.numeric_,
317 (local_ordinal_type)this->globalNumCols_,
318 (local_ordinal_type)nrhs,
319 pbValues, pxValues, &(data_.common_)) ;
322 ::KLU2::klu_solve2<klu2_dtype, local_ordinal_type>
323 (data_.symbolic_, data_.numeric_,
324 (local_ordinal_type)this->globalNumCols_,
325 (local_ordinal_type)nrhs,
326 pbValues, pxValues, &(data_.common_)) ;
337#ifdef HAVE_AMESOS2_TIMERS
338 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
345 if ( single_proc_optimization() ) {
346 ::KLU2::klu_tsolve<klu2_dtype, local_ordinal_type>
347 (data_.symbolic_, data_.numeric_,
348 (local_ordinal_type)this->globalNumCols_,
349 (local_ordinal_type)nrhs,
350 pxValues, &(data_.common_)) ;
353 ::KLU2::klu_solve<klu2_dtype, local_ordinal_type>
354 (data_.symbolic_, data_.numeric_,
355 (local_ordinal_type)this->globalNumCols_,
356 (local_ordinal_type)nrhs,
357 pxValues, &(data_.common_)) ;
365 if ( single_proc_optimization() ) {
366 ::KLU2::klu_solve<klu2_dtype, local_ordinal_type>
367 (data_.symbolic_, data_.numeric_,
368 (local_ordinal_type)this->globalNumCols_,
369 (local_ordinal_type)nrhs,
370 pxValues, &(data_.common_)) ;
373 ::KLU2::klu_tsolve<klu2_dtype, local_ordinal_type>
374 (data_.symbolic_, data_.numeric_,
375 (local_ordinal_type)this->globalNumCols_,
376 (local_ordinal_type)nrhs,
377 pxValues, &(data_.common_)) ;
386#ifdef HAVE_AMESOS2_TIMERS
387 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
390 int rval = X->scatter(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
392 if (rval != 0) use_gather =
false;
395 Util::put_1d_data_helper_kokkos_view<
399 this->rowIndexBase_);
402 if (debug_level_ > 0) {
403 if (debug_level_ == 1) {
406 Teuchos::RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
407 if (!is_null(X->getMap())) X->getMap()->describe(*fancy, Teuchos::VERB_EXTREME);
408 std::cout << std::endl;
409 X->describe(*fancy, Teuchos::VERB_EXTREME);
499#ifdef HAVE_AMESOS2_TIMERS
500 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
503 if(current_phase == SOLVE)
return(
false);
504 if (debug_level_ > 0 && current_phase == NUMFACT) {
506 std::cout <<
"\n == Amesos2_KLU2::loadA_impl";
507 if (current_phase == PREORDERING) std::cout <<
"(PreOrder)";
508 if (current_phase == SYMBFACT) std::cout <<
"(SymFact)";
509 if (current_phase == NUMFACT) std::cout <<
"(NumFact)";
510 std::cout <<
" ==" << std::endl;
512 Teuchos::RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
513 this->matrixA_->describe(*fancy, (debug_level_ == 1 ? Teuchos::VERB_LOW : Teuchos::VERB_EXTREME));
516 if ( single_proc_optimization() ) {
523 if (host_nzvals_view_.extent(0) != this->globalNumNonZeros_)
524 Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
525 if (host_rows_view_.extent(0) != this->globalNumNonZeros_)
526 Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
527 if (host_col_ptr_view_.extent(0) != (this->globalNumRows_ + 1))
528 Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
530 local_ordinal_type nnz_ret = -1;
531 bool use_gather = use_gather_;
532 use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1);
533 use_gather = (use_gather && (std::is_same<scalar_type, float>::value || std::is_same<scalar_type, double>::value));
535#ifdef HAVE_AMESOS2_TIMERS
536 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
539 bool column_major =
true;
540 if (!is_contiguous_) {
541 auto contig_mat = this->matrixA_->reindex(this->contig_rowmap_, this->contig_colmap_, current_phase);
542 nnz_ret = contig_mat->gather(host_nzvals_view_, host_rows_view_, host_col_ptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
543 this->transpose_map, this->nzvals_t, column_major, current_phase);
545 nnz_ret = this->matrixA_->gather(host_nzvals_view_, host_rows_view_, host_col_ptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
546 this->transpose_map, this->nzvals_t, column_major, current_phase);
550 if (nnz_ret < 0) use_gather =
false;
555 ::do_get(this->matrixA_.ptr(), host_nzvals_view_, host_rows_view_, host_col_ptr_view_, nnz_ret,
558 this->rowIndexBase_);
563 if (use_gather || this->root_) {
564 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<local_ordinal_type>(this->globalNumNonZeros_),
566 "Amesos2_KLU2 loadA_impl: Did not get the expected number of non-zero vals("
567 +std::to_string(nnz_ret)+
" vs "+std::to_string(this->globalNumNonZeros_)+
")");