36 Teuchos::RCP<Vector> X,
37 Teuchos::RCP<const Vector> B)
42 , force_symbfact_(false)
43 , is_contiguous_(true)
49 using Teuchos::MpiComm;
50 using Teuchos::outArg;
51 using Teuchos::ParameterList;
52 using Teuchos::parameterList;
55 using Teuchos::rcp_dynamic_cast;
56 using Teuchos::REDUCE_SUM;
57 using Teuchos::reduceAll;
58 typedef global_ordinal_type GO;
64 RCP<const Comm<int> > comm = this->
getComm ();
66 numProcs = comm->getSize ();
68 SLUD::int_t nprow, npcol;
74 RCP<const Comm<int> > matComm = this->
matrixA_->getComm ();
75 TEUCHOS_TEST_FOR_EXCEPTION(
76 matComm.is_null (), std::logic_error,
"Amesos2::Superlustdist "
77 "constructor: The matrix's communicator is null!");
78 RCP<const MpiComm<int> > matMpiComm =
79 rcp_dynamic_cast<const MpiComm<int> > (matComm);
84 TEUCHOS_TEST_FOR_EXCEPTION(
85 matMpiComm.is_null (), std::logic_error,
"Amesos2::Superlustdist "
86 "constructor: The matrix's communicator is not an MpiComm!");
87 TEUCHOS_TEST_FOR_EXCEPTION(
88 matMpiComm->getRawMpiComm ().is_null (), std::logic_error,
"Amesos2::"
89 "Superlustdist constructor: The matrix's communicator claims to be a "
90 "Teuchos::MpiComm<int>, but its getRawPtrComm() method returns "
91 "Teuchos::null! This means that the underlying MPI_Comm doesn't even "
92 "exist, which likely implies that the Teuchos::MpiComm was constructed "
93 "incorrectly. It means something different than if the MPI_Comm were "
95 MPI_Comm rawMpiComm = (* (matMpiComm->getRawMpiComm ())) ();
96 data_.mat_comm = rawMpiComm;
102 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
111 SLUD::set_default_options_dist(&data_.options);
113 RCP<ParameterList> default_params =
118 data_.options.Fact = SLUD::DOFACT;
119 data_.equed = SLUD::NOEQUIL;
120 data_.options.SolveInitialized = SLUD::NO;
121 data_.options.RefineInitialized = SLUD::NO;
122 data_.rowequ =
false;
123 data_.colequ =
false;
126 data_.largediag_mc64_job = 4;
136 data_.symb_comm = MPI_COMM_NULL;
141 data_.domains = (int) ( pow(2.0, floor(log10((
double)nprow*npcol)/log10(2.0))) );
143 const int color = (
myRank < data_.domains) ? 0 : MPI_UNDEFINED;
144 MPI_Comm_split (data_.mat_comm, color,
myRank, &(data_.symb_comm));
156 int myProcParticipates = 0;
157 if (
myRank < nprow * npcol) {
159 myProcParticipates = 1;
164 int numParticipatingProcs = 0;
165 reduceAll<int, int> (*comm, REDUCE_SUM, myProcParticipates,
166 outArg (numParticipatingProcs));
167 TEUCHOS_TEST_FOR_EXCEPTION(
168 this->globalNumRows_ != 0 && numParticipatingProcs == 0,
169 std::logic_error,
"Amesos2::Superludist constructor: The matrix has "
170 << this->globalNumRows_ <<
" > 0 global row(s), but no processes in the "
171 "communicator want to participate in its factorization! nprow = "
172 << nprow <<
" and npcol = " << npcol <<
".");
175 size_t myNumRows = 0;
178 const GO quotient = (numParticipatingProcs == 0) ?
static_cast<GO
> (0) :
179 GNR /
static_cast<GO
> (numParticipatingProcs);
181 GNR - quotient *
static_cast<GO
> (numParticipatingProcs);
182 const GO lclNumRows = (
static_cast<GO
> (
myRank) < remainder) ?
183 (quotient +
static_cast<GO
> (1)) : quotient;
184 myNumRows =
static_cast<size_t> (lclNumRows);
188 const GO indexBase = this->
matrixA_->getRowMap ()->getIndexBase ();
190 rcp (
new map_type (this->globalNumRows_, myNumRows, indexBase, comm));
191 superlu_contig_rowmap_ = Teuchos::rcp (
new map_type (0, 0, indexBase, comm));
192 superlu_contig_colmap_ = Teuchos::rcp (
new map_type (0, 0, indexBase, comm));
198 data_.A.Store = NULL;
199 function_map::LUstructInit(this->globalNumRows_, this->globalNumCols_, &(data_.lu));
200 SLUD::PStatInit(&(data_.stat));
203 data_.scale_perm.perm_r = data_.perm_r.getRawPtr();
204 data_.scale_perm.perm_c = data_.perm_c.getRawPtr();
220 if ( this->status_.getNumPreOrder() > 0 ){
222#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
223 SUPERLU_FREE( data_.sizes );
224 SUPERLU_FREE( data_.fstVtxSep );
227 free( data_.fstVtxSep );
233 if( data_.A.Store != NULL ){
234 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
238 if ( this->status_.getNumNumericFact() > 0 ){
239 function_map::Destroy_LU(this->globalNumRows_, &(data_.grid), &(data_.lu));
241 function_map::LUstructFree(&(data_.lu));
247 if ( this->status_.symbolicFactorizationDone() &&
248 !this->status_.numericFactorizationDone() ){
249 if ( data_.pslu_freeable.xlsub != NULL ){
250#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
251 SUPERLU_FREE( data_.pslu_freeable.xlsub );
252 SUPERLU_FREE( data_.pslu_freeable.lsub );
254 free( data_.pslu_freeable.xlsub );
255 free( data_.pslu_freeable.lsub );
258 if ( data_.pslu_freeable.xusub != NULL ){
259#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
260 SUPERLU_FREE( data_.pslu_freeable.xusub );
261 SUPERLU_FREE( data_.pslu_freeable.usub );
263 free( data_.pslu_freeable.xusub );
264 free( data_.pslu_freeable.usub );
267 if ( data_.pslu_freeable.supno_loc != NULL ){
268#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
269 SUPERLU_FREE( data_.pslu_freeable.supno_loc );
270 SUPERLU_FREE( data_.pslu_freeable.xsup_beg_loc );
271 SUPERLU_FREE( data_.pslu_freeable.xsup_end_loc );
273 free( data_.pslu_freeable.supno_loc );
274 free( data_.pslu_freeable.xsup_beg_loc );
275 free( data_.pslu_freeable.xsup_end_loc );
278#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
279 SUPERLU_FREE( data_.pslu_freeable.globToLoc );
281 free( data_.pslu_freeable.globToLoc );
285 SLUD::PStatFree( &(data_.stat) ) ;
290 if ( data_.options.SolveInitialized == SLUD::YES )
291 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
295 SLUD::superlu_gridexit(&(data_.grid));
299 if ( data_.symb_comm != MPI_COMM_NULL ) MPI_Comm_free(&(data_.symb_comm));
306 int job = data_.largediag_mc64_job;
309 data_.R1.resize(data_.A.nrow);
310 data_.C1.resize(data_.A.ncol);
313 SLUD::NCformat *GAstore = (SLUD::NCformat*) GA.Store;
314 SLUD::int_t* colptr = GAstore->colptr;
315 SLUD::int_t* rowind = GAstore->rowind;
316 SLUD::int_t nnz = GAstore->nnz;
317 slu_type *a_GA = (slu_type *) GAstore->nzval;
318 MPI_Datatype mpi_dtype = Teuchos::Details::MpiTypeTraits<magnitude_type>::getType();
319 MPI_Datatype mpi_itype = Teuchos::Details::MpiTypeTraits<SLUD::int_t>::getType();
322 if ( !data_.grid.iam ) {
323 iinfo = function_map::ldperm_dist(job, data_.A.nrow, nnz, colptr, rowind, a_GA,
324 data_.perm_r.getRawPtr(), data_.R1.getRawPtr(), data_.C1.getRawPtr());
326 MPI_Bcast( &iinfo, 1, MPI_INT, 0, data_.grid.comm );
328 MPI_Bcast( data_.perm_r.getRawPtr(), data_.A.nrow, mpi_itype, 0, data_.grid.comm );
329 if ( job == 5 && data_.options.Equil ) {
330 MPI_Bcast( data_.R1.getRawPtr(), data_.A.nrow, mpi_dtype, 0, data_.grid.comm );
331 MPI_Bcast( data_.C1.getRawPtr(), data_.A.ncol, mpi_dtype, 0, data_.grid.comm );
335 MPI_Bcast( &iinfo, 1, mpi_int_t, 0, data_.grid.comm );
337 MPI_Bcast( data_.perm_r.getRawPtr(), data_.A.nrow, mpi_itype, 0, data_.grid.comm );
338 if ( job == 5 && data_.options.Equil ) {
339 MPI_Bcast( data_.R1.getRawPtr(), data_.A.nrow, mpi_dtype, 0, data_.grid.comm );
340 MPI_Bcast( data_.C1.getRawPtr(), data_.A.ncol, mpi_dtype, 0, data_.grid.comm );
344 TEUCHOS_TEST_FOR_EXCEPTION( iinfo != 0,
346 "SuperLU_DIST pre-ordering failed to compute row perm with "
347 << iinfo << std::endl);
351 for (SLUD::int_t i = 0; i < data_.A.nrow; ++i) data_.R1[i] = exp(data_.R1[i]);
352 for (SLUD::int_t i = 0; i < data_.A.ncol; ++i) data_.C1[i] = exp(data_.C1[i]);
361 if (debug_level_ > 0 && myRank == 0) {
362 std::cout <<
" * Superludist::preOrdering ( size(int_t)= " <<
sizeof(SLUD::int_t)
363 <<
", size(info_t) = * " <<
sizeof(SLUD::info_t)
364 <<
", size(perm_t) = " <<
sizeof(SLUD::perm_int_t) <<
") * " <<std::endl;
366 if (data_.options.RowPerm == SLUD::NOROWPERM) {
367 SLUD::int_t slu_rows_ub = Teuchos::as<SLUD::int_t>(this->globalNumRows_);
368 for( SLUD::int_t i = 0; i < slu_rows_ub; ++i ) data_.perm_r[i] = i;
370 else if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
371 if (!force_symbfact_)
373 return (EXIT_SUCCESS + 1);
383 if( this->status_.getNumPreOrder() > 0 ){
384#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
385 SUPERLU_FREE( data_.sizes );
386 SUPERLU_FREE( data_.fstVtxSep );
389 free( data_.fstVtxSep );
394#ifdef HAVE_AMESOS2_TIMERS
395 Teuchos::TimeMonitor preOrderTime( this->timers_.preOrderTime_ );
397 info = SLUD::get_perm_c_parmetis( &(data_.A),
398 data_.perm_r.getRawPtr(), data_.perm_c.getRawPtr(),
399 data_.grid.nprow * data_.grid.npcol, data_.domains,
400 &(data_.sizes), &(data_.fstVtxSep),
401 &(data_.grid), &(data_.symb_comm) );
403 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
405 "SuperLU_DIST pre-ordering ran out of memory after allocating "
406 << info <<
" bytes of memory" );
423 if (debug_level_ > 0 && myRank == 0) {
424 std::cout <<
" * Superludist::symbolicFactorization * " << std::endl;
426 if (!force_symbfact_) {
427 if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
429 return (EXIT_SUCCESS + 1);
437#ifdef HAVE_AMESOS2_TIMERS
438 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
441#if (SUPERLU_DIST_MAJOR_VERSION > 7)
442 info = SLUD::symbfact_dist(&(data_.options), (data_.grid.nprow) * (data_.grid.npcol),
443 data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
444 data_.perm_r.getRawPtr(), data_.sizes,
445 data_.fstVtxSep, &(data_.pslu_freeable),
446 &(data_.grid.comm), &(data_.symb_comm),
450 info = SLUD::symbfact_dist((data_.grid.nprow) * (data_.grid.npcol),
451 data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
452 data_.perm_r.getRawPtr(), data_.sizes,
453 data_.fstVtxSep, &(data_.pslu_freeable),
454 &(data_.grid.comm), &(data_.symb_comm),
458 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
460 "SuperLU_DIST symbolic factorization ran out of memory after"
461 " allocating " << info <<
" bytes of memory" );
463 same_symbolic_ =
false;
464 same_solve_struct_ =
false;
474 if (debug_level_ > 0 && myRank == 0) {
475 std::cout <<
" * Superludist::numericFactorization * " << std::endl;
479 SLUD::SuperMatrix GA;
480 bool need_value =
false;
483 if( data_.options.Equil == SLUD::YES ) {
484 SLUD::info_t info = 0;
487 data_.R.resize(this->globalNumRows_);
488 data_.C.resize(this->globalNumCols_);
489 function_map::gsequ_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
490 &(data_.rowcnd), &(data_.colcnd), &(data_.amax), &info, &(data_.grid));
493 function_map::laqgs_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
494 data_.rowcnd, data_.colcnd, data_.amax,
497 data_.rowequ = (data_.equed == SLUD::ROW) || (data_.equed == SLUD::BOTH);
498 data_.colequ = (data_.equed == SLUD::COL) || (data_.equed == SLUD::BOTH);
501 if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
504 SLUD::D::pdCompRow_loc_to_CompCol_global(
true, &data_.A, &data_.grid, &GA);
507 computeRowPermutationLargeDiagMC64(GA);
510 force_symbfact_ =
true;
512 symbolicFactorization_impl();
513 force_symbfact_ =
false;
517 if (data_.largediag_mc64_job == 5)
519 SLUD::NRformat_loc *Astore = (SLUD::NRformat_loc*) data_.A.Store;
520 slu_type *a = (slu_type*) Astore->nzval;
521 SLUD::int_t m_loc = Astore->m_loc;
522 SLUD::int_t fst_row = Astore->fst_row;
523 SLUD::int_t i, j, irow = fst_row, icol;
527 SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
528 for (j = 0; j < m_loc; ++j) {
529 for (i = rowptr_view_.data()[j]; i < rowptr_view_.data()[j+1]; ++i) {
530 icol = colind_view_.data()[i];
531 a[i] = mult_op(a[i], data_.R1[irow] * data_.C1[icol]);
537 if ( data_.rowequ )
for (i = 0; i < data_.A.nrow; ++i) data_.R[i] *= data_.R1[i];
538 else for (i = 0; i < data_.A.nrow; ++i) data_.R[i] = data_.R1[i];
539 if ( data_.colequ )
for (i = 0; i < data_.A.ncol; ++i) data_.C[i] *= data_.C1[i];
540 else for (i = 0; i < data_.A.ncol; ++i) data_.C[i] = data_.C1[i];
542 data_.rowequ = data_.colequ = 1;
548 size_t nnz_loc = ((SLUD::NRformat_loc*)data_.A.Store)->nnz_loc;
549 for(
size_t i = 0; i < nnz_loc; ++i ) colind_view_(i) = data_.perm_c[colind_view_(i)];
552 if( same_symbolic_ ){
557#if (SUPERLU_DIST_MAJOR_VERSION > 7)
558 data_.options.Fact = SLUD::SamePattern_SameRowPerm;
559 function_map::pdistribute(&(data_.options),
560 as<SLUD::int_t>(this->globalNumRows_),
561 &(data_.A), &(data_.scale_perm),
562 &(data_.glu_freeable), &(data_.lu),
565 function_map::pdistribute(SLUD::SamePattern_SameRowPerm,
566 as<SLUD::int_t>(this->globalNumRows_),
567 &(data_.A), &(data_.scale_perm),
568 &(data_.glu_freeable), &(data_.lu),
572#if (SUPERLU_DIST_MAJOR_VERSION > 7)
573 data_.options.Fact = SLUD::DOFACT;
574 function_map::dist_psymbtonum(&(data_.options),
575 as<SLUD::int_t>(this->globalNumRows_),
576 &(data_.A), &(data_.scale_perm),
577 &(data_.pslu_freeable), &(data_.lu),
580 function_map::dist_psymbtonum(SLUD::DOFACT,
581 as<SLUD::int_t>(this->globalNumRows_),
582 &(data_.A), &(data_.scale_perm),
583 &(data_.pslu_freeable), &(data_.lu),
589 bool notran = (data_.options.Trans == SLUD::NOTRANS);
590 magnitude_type anorm = function_map::plangs((notran ? (
char *)
"1" : (
char *)
"I"), &(data_.A), &(data_.grid));
594#ifdef HAVE_AMESOS2_TIMERS
595 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
597 function_map::gstrf(&(data_.options), this->globalNumRows_,
598 this->globalNumCols_, anorm, &(data_.lu),
599 &(data_.grid), &(data_.stat), &info);
603 TEUCHOS_TEST_FOR_EXCEPTION( info > 0,
605 "L and U factors have been computed but U("
606 << info <<
"," << info <<
") is exactly zero "
607 "(i.e. U is singular)");
611 SLUD::Destroy_CompCol_Matrix_dist(&GA);
617 data_.options.Fact = SLUD::FACTORED;
618 same_symbolic_ =
true;
633 const size_t local_len_rhs = superlu_rowmap_->getLocalNumElements();
634 const global_size_type nrhs = X->getGlobalNumVectors();
635 const global_ordinal_type first_global_row_b = superlu_contig_rowmap_->getMinGlobalIndex();
638 bvals_.resize(nrhs * local_len_rhs);
639 xvals_.resize(nrhs * local_len_rhs);
645#ifdef HAVE_AMESOS2_TIMERS
646 Teuchos::TimeMonitor convTimer(this->timers_.vecConvTime_);
655#ifdef HAVE_AMESOS2_TIMERS
656 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
661 copy_helper::do_get(B,
664 Teuchos::ptrInArg(*superlu_rowmap_));
685 if( !same_solve_struct_ ){
686 if( data_.options.SolveInitialized == SLUD::YES ){
687 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
689 function_map::SolveInit(&(data_.options), &(data_.A), data_.perm_r.getRawPtr(),
690 data_.perm_c.getRawPtr(), as<SLUD::int_t>(nrhs), &(data_.lu),
691 &(data_.grid), &(data_.solve_struct));
695 same_solve_struct_ =
true;
699 if (data_.options.Equil == SLUD::YES && data_.rowequ) {
700 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
701 SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
702 for(global_size_type j = 0; j < nrhs; ++j) {
703 for(
size_t i = 0; i < local_len_rhs; ++i) {
704 bvals_[i + j*ld] = mult_op(bvals_[i + j*ld], data_.R[first_global_row_b + i]);
712#ifdef HAVE_AMESOS2_TIMERS
713 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
716#if (SUPERLU_DIST_MAJOR_VERSION > 7)
717 function_map::gstrs(&(data_.options), as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
718 &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
719 as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
720 as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
721 &(data_.solve_struct), &(data_.stat), &ierr);
723 function_map::gstrs(as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
724 &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
725 as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
726 as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
727 &(data_.solve_struct), &(data_.stat), &ierr);
731 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
733 "Argument " << -ierr <<
" to gstrs had an illegal value" );
748#ifdef HAVE_AMESOS2_TIMERS
749 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
751 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
752 function_map::permute_Dense_Matrix(as<SLUD::int_t>(first_global_row_b),
753 as<SLUD::int_t>(local_len_rhs),
754 data_.solve_struct.row_to_proc,
755 data_.solve_struct.inv_perm_c,
756 bvals_.getRawPtr(), ld,
757 xvals_.getRawPtr(), ld,
763 if (data_.options.Equil == SLUD::YES && data_.colequ) {
764 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
765 SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
766 for(global_size_type j = 0; j < nrhs; ++j) {
767 for(
size_t i = 0; i < local_len_rhs; ++i) {
768 xvals_[i + j*ld] = mult_op(xvals_[i + j*ld], data_.C[first_global_row_b + i]);
776#ifdef HAVE_AMESOS2_TIMERS
777 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
780 put_helper::do_put(X,
783 Teuchos::ptrInArg(*superlu_rowmap_));
805 using Teuchos::getIntegralValue;
806 using Teuchos::ParameterEntryValidator;
808 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
810 if( parameterList->isParameter(
"npcol") || parameterList->isParameter(
"nprow") ){
811 TEUCHOS_TEST_FOR_EXCEPTION( !(parameterList->isParameter(
"nprow") &&
812 parameterList->isParameter(
"npcol")),
813 std::invalid_argument,
814 "nprow and npcol must be set together" );
816 SLUD::int_t nprow = parameterList->template get<SLUD::int_t>(
"nprow");
817 SLUD::int_t npcol = parameterList->template get<SLUD::int_t>(
"npcol");
819 TEUCHOS_TEST_FOR_EXCEPTION( nprow * npcol > this->getComm()->getSize(),
820 std::invalid_argument,
821 "nprow and npcol combination invalid" );
823 if( (npcol != data_.grid.npcol) || (nprow != data_.grid.nprow) ){
825 SLUD::superlu_gridexit(&(data_.grid));
827 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
831 TEUCHOS_TEST_FOR_EXCEPTION( this->control_.useTranspose_,
832 std::invalid_argument,
833 "SuperLU_DIST does not support solving the tranpose system" );
835 data_.options.Trans = SLUD::NOTRANS;
838 bool equil = parameterList->get<
bool>(
"Equil",
false);
839 data_.options.Equil = equil ? SLUD::YES : SLUD::NO;
841 if( parameterList->isParameter(
"RowPerm") ){
842 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry(
"RowPerm").validator();
843 parameterList->getEntry(
"RowPerm").setValidator(rowperm_validator);
845 data_.options.RowPerm = getIntegralValue<SLUD::rowperm_t>(*parameterList,
"RowPerm");
848 if( parameterList->isParameter(
"LargeDiag_MC64-Options") ){
849 data_.largediag_mc64_job = parameterList->template get<int>(
"LargeDiag_MC64-Options");
852 if( parameterList->isParameter(
"ColPerm") ){
853 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry(
"ColPerm").validator();
854 parameterList->getEntry(
"ColPerm").setValidator(colperm_validator);
856 data_.options.ColPerm = getIntegralValue<SLUD::colperm_t>(*parameterList,
"ColPerm");
865 data_.options.IterRefine = SLUD::NOREFINE;
867 bool replace_tiny = parameterList->get<
bool>(
"ReplaceTinyPivot",
true);
868 data_.options.ReplaceTinyPivot = replace_tiny ? SLUD::YES : SLUD::NO;
870 if( parameterList->isParameter(
"IsContiguous") ){
871 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous");
873 if( parameterList->isParameter(
"DebugLevel") ){
874 debug_level_ = parameterList->get<
int>(
"DebugLevel");
884 using Teuchos::tuple;
885 using Teuchos::ParameterList;
886 using Teuchos::EnhancedNumberValidator;
887 using Teuchos::setStringToIntegralParameter;
888 using Teuchos::setIntParameter;
889 using Teuchos::stringToIntegralParameterEntryValidator;
891 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
893 if( is_null(valid_params) ){
894 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
896 Teuchos::RCP<EnhancedNumberValidator<SLUD::int_t> > col_row_validator
897 = Teuchos::rcp(
new EnhancedNumberValidator<SLUD::int_t>() );
898 col_row_validator->setMin(1);
900 pl->set(
"npcol", data_.grid.npcol,
901 "Number of columns in the processor grid. "
902 "Must be set with nprow", col_row_validator);
903 pl->set(
"nprow", data_.grid.nprow,
904 "Number of rows in the SuperLU_DIST processor grid. "
905 "Must be set together with npcol", col_row_validator);
908 setStringToIntegralParameter<SLUD::trans_t>(
"Trans",
"NOTRANS",
909 "Solve for the transpose system or not",
910 tuple<string>(
"NOTRANS"),
911 tuple<string>(
"Do not solve with transpose"),
912 tuple<SLUD::trans_t>(SLUD::NOTRANS),
916 pl->set(
"Equil",
false,
"Whether to equilibrate the system before solve");
929 pl->set(
"ReplaceTinyPivot",
true,
930 "Specifies whether to replace tiny diagonals during LU factorization");
933 setStringToIntegralParameter<SLUD::rowperm_t>(
"RowPerm",
"NOROWPERM",
934 "Specifies how to permute the rows of the "
935 "matrix for sparsity preservation",
936 tuple<string>(
"NOROWPERM",
"LargeDiag_MC64"),
937 tuple<string>(
"Natural ordering",
938 "Duff/Koster algorithm"),
939 tuple<SLUD::rowperm_t>(SLUD::NOROWPERM,
940 SLUD::LargeDiag_MC64),
943 setIntParameter(
"LargeDiag_MC64-Options", 4,
"Options for RowPerm-LargeDiag_MC64", pl.getRawPtr());
946 setStringToIntegralParameter<SLUD::colperm_t>(
"ColPerm",
"PARMETIS",
947 "Specifies how to permute the columns of the "
948 "matrix for sparsity preservation",
949 tuple<string>(
"NATURAL",
"PARMETIS"),
950 tuple<string>(
"Natural ordering",
951 "ParMETIS ordering on A^T + A"),
952 tuple<SLUD::colperm_t>(SLUD::NATURAL,
956 pl->set(
"IsContiguous",
true,
"Whether GIDs contiguous");
957 pl->set(
"DebugLevel", 0,
"Message levels for debuging");
995 using Teuchos::Array;
996 using Teuchos::ArrayView;
997 using Teuchos::ptrInArg;
1001 const int nprow = data_.grid.nprow;
1002 const int npcol = data_.grid.npcol;
1004#ifdef HAVE_AMESOS2_TIMERS
1005 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
1009 if( data_.A.Store != NULL ){
1010 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
1011 data_.A.Store = NULL;
1016 int_t l_nnz, l_rows, g_rows, g_cols, fst_global_row;
1017 if (!is_contiguous_ && numProcs == nprow*npcol) {
1018#ifdef HAVE_AMESOS2_TIMERS
1019 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
1022 superlu_rowmap_ = this->matrixA_->getRowMap();
1023 Teuchos::RCP<const MatrixAdapter<Matrix> > contig_mat = this->matrixA_->reindex(superlu_contig_rowmap_, superlu_contig_colmap_, current_phase);
1024 l_nnz = as<int_t>(contig_mat->getLocalNNZ());
1025 l_rows = as<int_t>(contig_mat->getLocalNumRows());
1026 g_rows = as<int_t>(contig_mat->getGlobalNumRows());
1028 fst_global_row = as<int_t>(superlu_contig_rowmap_->getMinGlobalIndex());
1031 if (current_phase == PREORDERING)
1033 Kokkos::resize(nzvals_temp_, l_nnz);
1034 Kokkos::resize(nzvals_view_, l_nnz);
1035 Kokkos::resize(colind_view_, l_nnz);
1036 Kokkos::resize(rowptr_view_, l_rows + 1);
1039 host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get(
1041 nzvals_temp_, colind_view_, rowptr_view_,
1043 ptrInArg(*(contig_mat->getRowMap())),
1046 Kokkos::deep_copy(nzvals_view_, nzvals_temp_);
1048#ifdef HAVE_AMESOS2_TIMERS
1049 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
1051 Teuchos::RCP<const MatrixAdapter<Matrix> > redist_mat
1052 = this->matrixA_->get(ptrInArg(*superlu_rowmap_));
1055 superlu_contig_rowmap_ = superlu_rowmap_;
1057 l_nnz = as<int_t>(redist_mat->getLocalNNZ());
1058 l_rows = as<int_t>(redist_mat->getLocalNumRows());
1059 g_rows = as<int_t>(redist_mat->getGlobalNumRows());
1061 fst_global_row = as<int_t>(superlu_rowmap_->getMinGlobalIndex());
1063 Kokkos::resize(nzvals_view_, l_nnz);
1064 Kokkos::resize(colind_view_, l_nnz);
1065 Kokkos::resize(rowptr_view_, l_rows + 1);
1068 host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get(
1070 nzvals_view_, colind_view_, rowptr_view_,
1072 ptrInArg(*superlu_rowmap_),
1078 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != l_nnz,
1080 "Did not get the expected number of non-zero vals ("
1081 +std::to_string(nnz_ret)+
" vs "+std::to_string(l_nnz)+
")");
1084 SLUD::Dtype_t dtype = type_map::dtype;
1087 function_map::create_CompRowLoc_Matrix(&(data_.A),
1089 l_nnz, l_rows, fst_global_row,
1090 nzvals_view_.data(),
1091 colind_view_.data(),
1092 rowptr_view_.data(),
1094 dtype, SLUD::SLU_GE);