Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_Superludist_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Amesos2: Templated Direct Sparse Solver Package
4//
5// Copyright 2011 NTESS and the Amesos2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
18#ifndef AMESOS2_SUPERLUDIST_DEF_HPP
19#define AMESOS2_SUPERLUDIST_DEF_HPP
20
21#include <Teuchos_Tuple.hpp>
22#include <Teuchos_StandardParameterEntryValidators.hpp>
23#include <Teuchos_DefaultMpiComm.hpp>
24#include <Teuchos_Details_MpiTypeTraits.hpp>
25
28#include "Amesos2_Util.hpp"
29
30
31namespace Amesos2 {
32
33
34 template <class Matrix, class Vector>
35 Superludist<Matrix,Vector>::Superludist(Teuchos::RCP<const Matrix> A,
36 Teuchos::RCP<Vector> X,
37 Teuchos::RCP<const Vector> B)
38 : SolverCore<Amesos2::Superludist,Matrix,Vector>(A, X, B)
39 , bvals_()
40 , xvals_()
41 , in_grid_(false)
42 , force_symbfact_(false)
43 , is_contiguous_(true)
44 {
45 using Teuchos::Comm;
46 // It's OK to depend on MpiComm explicitly here, because
47 // SuperLU_DIST requires MPI anyway.
48 using Teuchos::MpiComm;
49 using Teuchos::outArg;
50 using Teuchos::ParameterList;
51 using Teuchos::parameterList;
52 using Teuchos::RCP;
53 using Teuchos::rcp;
54 using Teuchos::rcp_dynamic_cast;
55 using Teuchos::REDUCE_SUM;
56 using Teuchos::reduceAll;
57 typedef global_ordinal_type GO;
58
60 // Set up the SuperLU_DIST processor grid //
62
63 RCP<const Comm<int> > comm = this->getComm ();
64 const int myRank = comm->getRank ();
65 const int numProcs = comm->getSize ();
66
67 SLUD::int_t nprow, npcol;
68 get_default_grid_size (numProcs, nprow, npcol);
69 {
70 // FIXME (mfh 16 Dec 2014) getComm() just returns
71 // matrixA_->getComm(), so it's not clear why we need to ask for
72 // the matrix's communicator separately here.
73 RCP<const Comm<int> > matComm = this->matrixA_->getComm ();
74 TEUCHOS_TEST_FOR_EXCEPTION(
75 matComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
76 "constructor: The matrix's communicator is null!");
77 RCP<const MpiComm<int> > matMpiComm =
78 rcp_dynamic_cast<const MpiComm<int> > (matComm);
79 // FIXME (mfh 16 Dec 2014) If the matrix's communicator is a
80 // SerialComm, we probably could just use MPI_COMM_SELF here.
81 // I'm not sure if SuperLU_DIST is smart enough to handle that
82 // case, though.
83 TEUCHOS_TEST_FOR_EXCEPTION(
84 matMpiComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
85 "constructor: The matrix's communicator is not an MpiComm!");
86 TEUCHOS_TEST_FOR_EXCEPTION(
87 matMpiComm->getRawMpiComm ().is_null (), std::logic_error, "Amesos2::"
88 "Superlustdist constructor: The matrix's communicator claims to be a "
89 "Teuchos::MpiComm<int>, but its getRawPtrComm() method returns "
90 "Teuchos::null! This means that the underlying MPI_Comm doesn't even "
91 "exist, which likely implies that the Teuchos::MpiComm was constructed "
92 "incorrectly. It means something different than if the MPI_Comm were "
93 "MPI_COMM_NULL.");
94 MPI_Comm rawMpiComm = (* (matMpiComm->getRawMpiComm ())) ();
95 data_.mat_comm = rawMpiComm;
96 // This looks a bit like ScaLAPACK's grid initialization (which
97 // technically takes place in the BLACS, not in ScaLAPACK
98 // proper). See http://netlib.org/scalapack/slug/node34.html.
99 // The main difference is that SuperLU_DIST depends explicitly
100 // on MPI, while the BLACS hides its communication protocol.
101 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
102 }
103
105 // Set some default parameters. //
106 // //
107 // Must do this after grid has been created in //
108 // case user specifies the nprow and npcol parameters //
110 SLUD::set_default_options_dist(&data_.options);
111
112 RCP<ParameterList> default_params =
113 parameterList (* (this->getValidParameters ()));
114 this->setParameters (default_params);
115
116 // Set some internal options
117 data_.options.Fact = SLUD::DOFACT;
118 data_.equed = SLUD::NOEQUIL; // No equilibration has yet been performed
119 data_.options.SolveInitialized = SLUD::NO;
120 data_.options.RefineInitialized = SLUD::NO;
121 data_.rowequ = false;
122 data_.colequ = false;
123 data_.perm_r.resize(this->globalNumRows_);
124 data_.perm_c.resize(this->globalNumCols_);
125 data_.largediag_mc64_job = 4;
126 for (global_size_type i = 0; i < this->globalNumRows_; i++)
127 data_.perm_r[i] = i;
128 for (global_size_type i = 0; i < this->globalNumCols_; i++)
129 data_.perm_c[i] = i;
130
132 // Set up a communicator for the parallel column ordering and //
133 // parallel symbolic factorization. //
135 data_.symb_comm = MPI_COMM_NULL;
136
137 // domains is the next power of 2 less than nprow*npcol. This
138 // value will be used for creating an MPI communicator for the
139 // pre-ordering and symbolic factorization methods.
140 data_.domains = (int) ( pow(2.0, floor(log10((double)nprow*npcol)/log10(2.0))) );
141
142 const int color = (myRank < data_.domains) ? 0 : MPI_UNDEFINED;
143 MPI_Comm_split (data_.mat_comm, color, myRank, &(data_.symb_comm));
144
146 // Set up a row Map that only includes processes that are in the
147 // SuperLU process grid. This will be used for redistributing A.
149
150 // mfh 16 Dec 2014: We could use createWeightedContigMapWithNode
151 // with myProcParticipates as the weight, but that costs an extra
152 // all-reduce.
153
154 // Set to 1 if I am in the grid, and I get some of the matrix rows.
155 int myProcParticipates = 0;
156 if (myRank < nprow * npcol) {
157 in_grid_ = true;
158 myProcParticipates = 1;
159 }
160
161 // Compute how many processes in the communicator belong to the
162 // process grid.
163 int numParticipatingProcs = 0;
164 reduceAll<int, int> (*comm, REDUCE_SUM, myProcParticipates,
165 outArg (numParticipatingProcs));
166 TEUCHOS_TEST_FOR_EXCEPTION(
167 this->globalNumRows_ != 0 && numParticipatingProcs == 0,
168 std::logic_error, "Amesos2::Superludist constructor: The matrix has "
169 << this->globalNumRows_ << " > 0 global row(s), but no processes in the "
170 "communicator want to participate in its factorization! nprow = "
171 << nprow << " and npcol = " << npcol << ".");
172
173 // Divide up the rows among the participating processes.
174 size_t myNumRows = 0;
175 {
176 const GO GNR = static_cast<GO> (this->globalNumRows_);
177 const GO quotient = (numParticipatingProcs == 0) ? static_cast<GO> (0) :
178 GNR / static_cast<GO> (numParticipatingProcs);
179 const GO remainder =
180 GNR - quotient * static_cast<GO> (numParticipatingProcs);
181 const GO lclNumRows = (static_cast<GO> (myRank) < remainder) ?
182 (quotient + static_cast<GO> (1)) : quotient;
183 myNumRows = static_cast<size_t> (lclNumRows);
184 }
185
186 // TODO: might only need to initialize if parallel symbolic factorization is requested.
187 const GO indexBase = this->matrixA_->getRowMap ()->getIndexBase ();
189 rcp (new map_type (this->globalNumRows_, myNumRows, indexBase, comm));
190 superlu_contig_rowmap_ = Teuchos::rcp (new map_type (0, 0, indexBase, comm));
191 superlu_contig_colmap_ = Teuchos::rcp (new map_type (0, 0, indexBase, comm));
192
194 // Do some other initialization //
196
197 data_.A.Store = NULL;
198 function_map::LUstructInit(this->globalNumRows_, this->globalNumCols_, &(data_.lu));
199 SLUD::PStatInit(&(data_.stat));
200 // We do not use ScalePermstructInit because we will use our own
201 // arrays for storing perm_r and perm_c
202 data_.scale_perm.perm_r = data_.perm_r.getRawPtr();
203 data_.scale_perm.perm_c = data_.perm_c.getRawPtr();
204 }
205
206
207 template <class Matrix, class Vector>
209 {
210 /* Free SuperLU_DIST data_types
211 * - Matrices
212 * - Vectors
213 * - Stat object
214 * - ScalePerm, LUstruct, grid, and solve objects
215 *
216 * Note: the function definitions are the same regardless whether
217 * complex or real, so we arbitrarily use the D namespace
218 */
219 if ( this->status_.getNumPreOrder() > 0 ){
221#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
222 SUPERLU_FREE( data_.sizes );
223 SUPERLU_FREE( data_.fstVtxSep );
224#else
225 free( data_.sizes );
226 free( data_.fstVtxSep );
227#endif
228 }
229
230 // Cleanup old matrix store memory if it's non-NULL. Our
231 // Teuchos::Array's will destroy rowind, colptr, and nzval for us
232 if( data_.A.Store != NULL ){
233 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
234 }
235
236 // LU data is initialized in numericFactorization_impl()
237 if ( this->status_.getNumNumericFact() > 0 ){
238 function_map::Destroy_LU(this->globalNumRows_, &(data_.grid), &(data_.lu));
239 }
240 function_map::LUstructFree(&(data_.lu));
241
242 // If a symbolic factorization is ever performed without a
243 // follow-up numericfactorization, there are some arrays in the
244 // Pslu_freeable struct which will never be free'd by
245 // SuperLU_DIST.
246 if ( this->status_.symbolicFactorizationDone() &&
247 !this->status_.numericFactorizationDone() ){
248 if ( data_.pslu_freeable.xlsub != NULL ){
249#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
250 SUPERLU_FREE( data_.pslu_freeable.xlsub );
251 SUPERLU_FREE( data_.pslu_freeable.lsub );
252#else
253 free( data_.pslu_freeable.xlsub );
254 free( data_.pslu_freeable.lsub );
255#endif
256 }
257 if ( data_.pslu_freeable.xusub != NULL ){
258#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
259 SUPERLU_FREE( data_.pslu_freeable.xusub );
260 SUPERLU_FREE( data_.pslu_freeable.usub );
261#else
262 free( data_.pslu_freeable.xusub );
263 free( data_.pslu_freeable.usub );
264#endif
265 }
266 if ( data_.pslu_freeable.supno_loc != NULL ){
267#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
268 SUPERLU_FREE( data_.pslu_freeable.supno_loc );
269 SUPERLU_FREE( data_.pslu_freeable.xsup_beg_loc );
270 SUPERLU_FREE( data_.pslu_freeable.xsup_end_loc );
271#else
272 free( data_.pslu_freeable.supno_loc );
273 free( data_.pslu_freeable.xsup_beg_loc );
274 free( data_.pslu_freeable.xsup_end_loc );
275#endif
276 }
277#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
278 SUPERLU_FREE( data_.pslu_freeable.globToLoc );
279#else
280 free( data_.pslu_freeable.globToLoc );
281#endif
282 }
283
284 SLUD::PStatFree( &(data_.stat) ) ;
285
286 // Teuchos::Arrays will free R, C, perm_r, and perm_c
287 // SLUD::D::ScalePermstructFree(&(data_.scale_perm));
288
289 if ( data_.options.SolveInitialized == SLUD::YES )
290 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
291
292 // gridexit of an older version frees SuperLU_MPI_DOUBLE_COMPLE,
293 // which could cause an issue if there are still active instances of superludist?
294 SLUD::superlu_gridexit(&(data_.grid)); // TODO: are there any
295 // cases where grid
296 // wouldn't be initialized?
297
298 if ( data_.symb_comm != MPI_COMM_NULL ) MPI_Comm_free(&(data_.symb_comm));
299 }
300
301 template<class Matrix, class Vector>
302 void
304 {
305 int job = data_.largediag_mc64_job;
306 if (job == 5)
307 {
308 data_.R1.resize(data_.A.nrow);
309 data_.C1.resize(data_.A.ncol);
310 }
311
312 SLUD::NCformat *GAstore = (SLUD::NCformat*) GA.Store;
313 SLUD::int_t* colptr = GAstore->colptr;
314 SLUD::int_t* rowind = GAstore->rowind;
315 SLUD::int_t nnz = GAstore->nnz;
316 slu_type *a_GA = (slu_type *) GAstore->nzval;
317 MPI_Datatype mpi_dtype = Teuchos::Details::MpiTypeTraits<magnitude_type>::getType();
318 MPI_Datatype mpi_itype = Teuchos::Details::MpiTypeTraits<SLUD::int_t>::getType();
319
320 int iinfo = 0;
321 if ( !data_.grid.iam ) { /* Process 0 finds a row permutation */
322 iinfo = function_map::ldperm_dist(job, data_.A.nrow, nnz, colptr, rowind, a_GA,
323 data_.perm_r.getRawPtr(), data_.R1.getRawPtr(), data_.C1.getRawPtr());
324
325 MPI_Bcast( &iinfo, 1, MPI_INT, 0, data_.grid.comm );
326 if ( iinfo == 0 ) {
327 MPI_Bcast( data_.perm_r.getRawPtr(), data_.A.nrow, mpi_itype, 0, data_.grid.comm );
328 if ( job == 5 && data_.options.Equil ) {
329 MPI_Bcast( data_.R1.getRawPtr(), data_.A.nrow, mpi_dtype, 0, data_.grid.comm );
330 MPI_Bcast( data_.C1.getRawPtr(), data_.A.ncol, mpi_dtype, 0, data_.grid.comm );
331 }
332 }
333 } else {
334 MPI_Bcast( &iinfo, 1, mpi_int_t, 0, data_.grid.comm );
335 if ( iinfo == 0 ) {
336 MPI_Bcast( data_.perm_r.getRawPtr(), data_.A.nrow, mpi_itype, 0, data_.grid.comm );
337 if ( job == 5 && data_.options.Equil ) {
338 MPI_Bcast( data_.R1.getRawPtr(), data_.A.nrow, mpi_dtype, 0, data_.grid.comm );
339 MPI_Bcast( data_.C1.getRawPtr(), data_.A.ncol, mpi_dtype, 0, data_.grid.comm );
340 }
341 }
342 }
343 TEUCHOS_TEST_FOR_EXCEPTION( iinfo != 0,
344 std::runtime_error,
345 "SuperLU_DIST pre-ordering failed to compute row perm with "
346 << iinfo << std::endl);
347
348 if (job == 5)
349 {
350 for (SLUD::int_t i = 0; i < data_.A.nrow; ++i) data_.R1[i] = exp(data_.R1[i]);
351 for (SLUD::int_t i = 0; i < data_.A.ncol; ++i) data_.C1[i] = exp(data_.C1[i]);
352 }
353 }
354
355
356 template<class Matrix, class Vector>
357 int
359 {
360 if (data_.options.RowPerm == SLUD::NOROWPERM) {
361 SLUD::int_t slu_rows_ub = Teuchos::as<SLUD::int_t>(this->globalNumRows_);
362 for( SLUD::int_t i = 0; i < slu_rows_ub; ++i ) data_.perm_r[i] = i;
363 }
364 else if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
365 if (!force_symbfact_)
366 // defer to numerical factorization because row permutation requires the matrix values
367 return (EXIT_SUCCESS + 1);
368 }
369 // loadA_impl(); // Refresh matrix values
370
371 if( in_grid_ ){
372 // If this function has been called at least once, then the
373 // sizes, and fstVtxSep arrays were allocated in
374 // get_perm_c_parmetis. Delete them before calling that
375 // function again. These arrays will also be dealloc'd in the
376 // deconstructor.
377 if( this->status_.getNumPreOrder() > 0 ){
378#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
379 SUPERLU_FREE( data_.sizes );
380 SUPERLU_FREE( data_.fstVtxSep );
381#else
382 free( data_.sizes );
383 free( data_.fstVtxSep );
384#endif
385 }
386 float info = 0.0;
387 {
388#ifdef HAVE_AMESOS2_TIMERS
389 Teuchos::TimeMonitor preOrderTime( this->timers_.preOrderTime_ );
390#endif
391 info = SLUD::get_perm_c_parmetis( &(data_.A),
392 data_.perm_r.getRawPtr(), data_.perm_c.getRawPtr(),
393 data_.grid.nprow * data_.grid.npcol, data_.domains,
394 &(data_.sizes), &(data_.fstVtxSep),
395 &(data_.grid), &(data_.symb_comm) );
396 }
397 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
398 std::runtime_error,
399 "SuperLU_DIST pre-ordering ran out of memory after allocating "
400 << info << " bytes of memory" );
401 }
402
403 // Ordering will be applied directly before numeric factorization,
404 // after we have a chance to get updated coefficients from the
405 // matrix
406
407 return EXIT_SUCCESS;
408 }
409
410
411
412 template <class Matrix, class Vector>
413 int
415 {
416 // loadA_impl(); // Refresh matrix values
417 if (!force_symbfact_) {
418 if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
419 // defer to numerical factorization because row permutation requires the matrix values
420 return (EXIT_SUCCESS + 1);
421 }
422 }
423
424 if( in_grid_ ){
425
426 float info = 0.0;
427 {
428#ifdef HAVE_AMESOS2_TIMERS
429 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
430#endif
431
432#if (SUPERLU_DIST_MAJOR_VERSION > 7)
433 info = SLUD::symbfact_dist(&(data_.options), (data_.grid.nprow) * (data_.grid.npcol),
434 data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
435 data_.perm_r.getRawPtr(), data_.sizes,
436 data_.fstVtxSep, &(data_.pslu_freeable),
437 &(data_.grid.comm), &(data_.symb_comm),
438 &(data_.mem_usage));
439
440#else
441 info = SLUD::symbfact_dist((data_.grid.nprow) * (data_.grid.npcol),
442 data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
443 data_.perm_r.getRawPtr(), data_.sizes,
444 data_.fstVtxSep, &(data_.pslu_freeable),
445 &(data_.grid.comm), &(data_.symb_comm),
446 &(data_.mem_usage));
447#endif
448 }
449 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
450 std::runtime_error,
451 "SuperLU_DIST symbolic factorization ran out of memory after"
452 " allocating " << info << " bytes of memory" );
453 }
454 same_symbolic_ = false;
455 same_solve_struct_ = false;
456
457 return EXIT_SUCCESS;
458 }
459
460
461 template <class Matrix, class Vector>
462 int
464 using Teuchos::as;
465
466 // loadA_impl(); // Refresh the matrix values
467 SLUD::SuperMatrix GA; /* Global A in NC format */
468 bool need_value = false;
469
470 if( in_grid_ ) {
471 if( data_.options.Equil == SLUD::YES ) {
472 SLUD::int_t info = 0;
473
474 // Compute scaling
475 data_.R.resize(this->globalNumRows_);
476 data_.C.resize(this->globalNumCols_);
477 function_map::gsequ_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
478 &(data_.rowcnd), &(data_.colcnd), &(data_.amax), &info, &(data_.grid));
479
480 // Apply the scalings
481 function_map::laqgs_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
482 data_.rowcnd, data_.colcnd, data_.amax,
483 &(data_.equed));
484
485 data_.rowequ = (data_.equed == SLUD::ROW) || (data_.equed == SLUD::BOTH);
486 data_.colequ = (data_.equed == SLUD::COL) || (data_.equed == SLUD::BOTH);
487
488 // Compute and apply the row permutation
489 if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
490 // Create a column-order copy of A
491 need_value = true;
492 SLUD::D::pdCompRow_loc_to_CompCol_global(true, &data_.A, &data_.grid, &GA);
493
494 // Compute row permutation
495 computeRowPermutationLargeDiagMC64(GA);
496
497 // Here we do symbolic factorization
498 force_symbfact_ = true;
499 preOrdering_impl();
500 symbolicFactorization_impl();
501 force_symbfact_ = false;
502
503 // Apply row-permutation scaling for job=5
504 // Here we do it manually to bypass the threshold check in laqgs_loc
505 if (data_.largediag_mc64_job == 5)
506 {
507 SLUD::NRformat_loc *Astore = (SLUD::NRformat_loc*) data_.A.Store;
508 slu_type *a = (slu_type*) Astore->nzval;
509 SLUD::int_t m_loc = Astore->m_loc;
510 SLUD::int_t fst_row = Astore->fst_row;
511 SLUD::int_t i, j, irow = fst_row, icol;
512
513 /* Scale the distributed matrix further.
514 A <-- diag(R1)*A*diag(C1) */
515 SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
516 for (j = 0; j < m_loc; ++j) {
517 for (i = rowptr_view_.data()[j]; i < rowptr_view_.data()[j+1]; ++i) {
518 icol = colind_view_.data()[i];
519 a[i] = mult_op(a[i], data_.R1[irow] * data_.C1[icol]);
520 }
521 ++irow;
522 }
523
524 /* Multiply together the scaling factors */
525 if ( data_.rowequ ) for (i = 0; i < data_.A.nrow; ++i) data_.R[i] *= data_.R1[i];
526 else for (i = 0; i < data_.A.nrow; ++i) data_.R[i] = data_.R1[i];
527 if ( data_.colequ ) for (i = 0; i < data_.A.ncol; ++i) data_.C[i] *= data_.C1[i];
528 else for (i = 0; i < data_.A.ncol; ++i) data_.C[i] = data_.C1[i];
529
530 data_.rowequ = data_.colequ = 1;
531 }
532 }
533 }
534
535 // Apply the column ordering, so that AC is the column-permuted A, and compute etree
536 size_t nnz_loc = ((SLUD::NRformat_loc*)data_.A.Store)->nnz_loc;
537 for( size_t i = 0; i < nnz_loc; ++i ) colind_view_(i) = data_.perm_c[colind_view_(i)];
538
539 // Distribute data from the symbolic factorization
540 if( same_symbolic_ ){
541 // Note: with the SamePattern_SameRowPerm options, it does not
542 // matter that the glu_freeable member has never been
543 // initialized, because it is never accessed. It is a
544 // placeholder arg. The real work is done in data_.lu
545#if (SUPERLU_DIST_MAJOR_VERSION > 7)
546 data_.options.Fact = SLUD::SamePattern_SameRowPerm;
547 function_map::pdistribute(&(data_.options),
548 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
549 &(data_.A), &(data_.scale_perm),
550 &(data_.glu_freeable), &(data_.lu),
551 &(data_.grid));
552#else
553 function_map::pdistribute(SLUD::SamePattern_SameRowPerm,
554 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
555 &(data_.A), &(data_.scale_perm),
556 &(data_.glu_freeable), &(data_.lu),
557 &(data_.grid));
558#endif
559 } else {
560#if (SUPERLU_DIST_MAJOR_VERSION > 7)
561 data_.options.Fact = SLUD::DOFACT;
562 function_map::dist_psymbtonum(&(data_.options),
563 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
564 &(data_.A), &(data_.scale_perm),
565 &(data_.pslu_freeable), &(data_.lu),
566 &(data_.grid));
567#else
568 function_map::dist_psymbtonum(SLUD::DOFACT,
569 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
570 &(data_.A), &(data_.scale_perm),
571 &(data_.pslu_freeable), &(data_.lu),
572 &(data_.grid));
573#endif
574 }
575
576 // Retrieve the normI of A (required by gstrf).
577 bool notran = (data_.options.Trans == SLUD::NOTRANS);
578 magnitude_type anorm = function_map::plangs((notran ? (char *)"1" : (char *)"I"), &(data_.A), &(data_.grid));
579
580 int info = 0;
581 {
582#ifdef HAVE_AMESOS2_TIMERS
583 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
584#endif
585 function_map::gstrf(&(data_.options), this->globalNumRows_,
586 this->globalNumCols_, anorm, &(data_.lu),
587 &(data_.grid), &(data_.stat), &info);
588 }
589
590 // Check output
591 TEUCHOS_TEST_FOR_EXCEPTION( info > 0,
592 std::runtime_error,
593 "L and U factors have been computed but U("
594 << info << "," << info << ") is exactly zero "
595 "(i.e. U is singular)");
596 }
597
598 if (need_value)
599 SLUD::Destroy_CompCol_Matrix_dist(&GA);
600
601 // The other option, that info_st < 0, denotes invalid parameters
602 // to the function, but we'll assume for now that that won't
603 // happen.
604
605 data_.options.Fact = SLUD::FACTORED;
606 same_symbolic_ = true;
607
608 return EXIT_SUCCESS;
609 }
610
611
612 template <class Matrix, class Vector>
613 int
615 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
616 {
617 using Teuchos::as;
618
619 // local_len_rhs is how many of the multivector rows belong to
620 // this processor in the SuperLU_DIST processor grid.
621 const size_t local_len_rhs = superlu_rowmap_->getLocalNumElements();
622 const global_size_type nrhs = X->getGlobalNumVectors();
623 const global_ordinal_type first_global_row_b = superlu_contig_rowmap_->getMinGlobalIndex();
624
625 // make sure our multivector storage is sized appropriately
626 bvals_.resize(nrhs * local_len_rhs);
627 xvals_.resize(nrhs * local_len_rhs);
628
629 // We assume the global length of the two vectors have already been
630 // checked for compatibility
631
632 { // get the values from B
633#ifdef HAVE_AMESOS2_TIMERS
634 Teuchos::TimeMonitor convTimer(this->timers_.vecConvTime_);
635#endif
636 {
637 // The input dense matrix for B should be distributed in the
638 // same manner as the superlu_dist matrix. That is, if a
639 // processor has m_loc rows of A, then it should also have
640 // m_loc rows of B (and the same rows). We accomplish this by
641 // distributing the multivector rows with the same Map that
642 // the matrix A's rows are distributed.
643#ifdef HAVE_AMESOS2_TIMERS
644 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
645#endif
646 // get grid-distributed mv data. The multivector data will be
647 // distributed across the processes in the SuperLU_DIST grid.
648 typedef Util::get_1d_copy_helper<MultiVecAdapter<Vector>,slu_type> copy_helper;
649 copy_helper::do_get(B,
650 bvals_(),
651 local_len_rhs,
652 Teuchos::ptrInArg(*superlu_rowmap_));
653 }
654 } // end block for conversion time
655
656 if( in_grid_ ){
657 // if( data_.options.trans == SLUD::NOTRANS ){
658 // if( data_.rowequ ){ // row equilibration has been done on AC
659 // // scale bxvals_ by diag(R)
660 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
661 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
662 // }
663 // } else if( data_.colequ ){ // column equilibration has been done on AC
664 // // scale bxvals_ by diag(C)
665 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
666 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
667 // }
668
669 // Initialize the SOLVEstruct_t.
670 //
671 // We are able to reuse the solve struct if we have not changed
672 // the sparsity pattern of L and U since the last solve
673 if( !same_solve_struct_ ){
674 if( data_.options.SolveInitialized == SLUD::YES ){
675 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
676 }
677 function_map::SolveInit(&(data_.options), &(data_.A), data_.perm_r.getRawPtr(),
678 data_.perm_c.getRawPtr(), as<SLUD::int_t>(nrhs), &(data_.lu),
679 &(data_.grid), &(data_.solve_struct));
680 // Flag that we can reuse this solve_struct unless another
681 // symbolicFactorization is called between here and the next
682 // solve.
683 same_solve_struct_ = true;
684 }
685
686 // Apply row-scaling if requested
687 if (data_.options.Equil == SLUD::YES && data_.rowequ) {
688 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
689 SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
690 for(global_size_type j = 0; j < nrhs; ++j) {
691 for(size_t i = 0; i < local_len_rhs; ++i) {
692 bvals_[i + j*ld] = mult_op(bvals_[i + j*ld], data_.R[first_global_row_b + i]);
693 }
694 }
695 }
696
697 // Solve
698 int ierr = 0; // returned error code
699 {
700#ifdef HAVE_AMESOS2_TIMERS
701 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
702#endif
703
704#if (SUPERLU_DIST_MAJOR_VERSION > 7)
705 function_map::gstrs(&(data_.options), as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
706 &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
707 as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
708 as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
709 &(data_.solve_struct), &(data_.stat), &ierr);
710#else
711 function_map::gstrs(as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
712 &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
713 as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
714 as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
715 &(data_.solve_struct), &(data_.stat), &ierr);
716#endif
717 } // end block for solve time
718
719 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
720 std::runtime_error,
721 "Argument " << -ierr << " to gstrs had an illegal value" );
722
723 // "Un-scale" the solution so that it is a solution of the original system
724 // if( data_.options.trans == SLUD::NOTRANS ){
725 // if( data_.colequ ){ // column equilibration has been done on AC
726 // // scale bxvals_ by diag(C)
727 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
728 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
729 // }
730 // } else if( data_.rowequ ){ // row equilibration has been done on AC
731 // // scale bxvals_ by diag(R)
732 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
733 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
734 // }
735 { // permute B to a solution of the original system
736#ifdef HAVE_AMESOS2_TIMERS
737 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
738#endif
739 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
740 function_map::permute_Dense_Matrix(as<SLUD::int_t>(first_global_row_b),
741 as<SLUD::int_t>(local_len_rhs),
742 data_.solve_struct.row_to_proc,
743 data_.solve_struct.inv_perm_c,
744 bvals_.getRawPtr(), ld,
745 xvals_.getRawPtr(), ld,
746 as<int>(nrhs),
747 &(data_.grid));
748 }
749
750 // Apply col-scaling if requested
751 if (data_.options.Equil == SLUD::YES && data_.colequ) {
752 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
753 SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
754 for(global_size_type j = 0; j < nrhs; ++j) {
755 for(size_t i = 0; i < local_len_rhs; ++i) {
756 xvals_[i + j*ld] = mult_op(xvals_[i + j*ld], data_.C[first_global_row_b + i]);
757 }
758 }
759 }
760 }
761
762 /* Update X's global values */
763 {
764#ifdef HAVE_AMESOS2_TIMERS
765 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
766#endif
767 typedef Util::put_1d_data_helper<MultiVecAdapter<Vector>,slu_type> put_helper;
768 put_helper::do_put(X,
769 xvals_(),
770 local_len_rhs,
771 Teuchos::ptrInArg(*superlu_rowmap_));
772 }
773
774 return EXIT_SUCCESS;
775 }
776
777
778 template <class Matrix, class Vector>
779 bool
781 {
782 // SuperLU_DIST requires square matrices
783 return( this->globalNumRows_ == this->globalNumCols_ );
784 }
785
786
787 template <class Matrix, class Vector>
788 void
789 Superludist<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
790 {
791 using Teuchos::as;
792 using Teuchos::RCP;
793 using Teuchos::getIntegralValue;
794 using Teuchos::ParameterEntryValidator;
795
796 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
797
798 if( parameterList->isParameter("npcol") || parameterList->isParameter("nprow") ){
799 TEUCHOS_TEST_FOR_EXCEPTION( !(parameterList->isParameter("nprow") &&
800 parameterList->isParameter("npcol")),
801 std::invalid_argument,
802 "nprow and npcol must be set together" );
803
804 SLUD::int_t nprow = parameterList->template get<SLUD::int_t>("nprow");
805 SLUD::int_t npcol = parameterList->template get<SLUD::int_t>("npcol");
806
807 TEUCHOS_TEST_FOR_EXCEPTION( nprow * npcol > this->getComm()->getSize(),
808 std::invalid_argument,
809 "nprow and npcol combination invalid" );
810
811 if( (npcol != data_.grid.npcol) || (nprow != data_.grid.nprow) ){
812 // De-allocate the default grid that was initialized in the constructor
813 SLUD::superlu_gridexit(&(data_.grid));
814 // Create a new grid
815 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
816 } // else our grid has not changed size since the last initialization
817 }
818
819 TEUCHOS_TEST_FOR_EXCEPTION( this->control_.useTranspose_,
820 std::invalid_argument,
821 "SuperLU_DIST does not support solving the tranpose system" );
822
823 data_.options.Trans = SLUD::NOTRANS; // should always be set this way;
824
825 // Equilbration option
826 bool equil = parameterList->get<bool>("Equil", false);
827 data_.options.Equil = equil ? SLUD::YES : SLUD::NO;
828
829 if( parameterList->isParameter("RowPerm") ){
830 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
831 parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
832
833 data_.options.RowPerm = getIntegralValue<SLUD::rowperm_t>(*parameterList, "RowPerm");
834 }
835
836 if( parameterList->isParameter("LargeDiag_MC64-Options") ){
837 data_.largediag_mc64_job = parameterList->template get<int>("LargeDiag_MC64-Options");
838 }
839
840 if( parameterList->isParameter("ColPerm") ){
841 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
842 parameterList->getEntry("ColPerm").setValidator(colperm_validator);
843
844 data_.options.ColPerm = getIntegralValue<SLUD::colperm_t>(*parameterList, "ColPerm");
845 }
846
847 // TODO: Uncomment when supported
848 // if( parameterList->isParameter("IterRefine") ){
849 // RCP<const ParameterEntryValidator> iter_refine_validator = valid_params->getEntry("IterRefine").validator();
850 // parameterList->getEntry("IterRefine").setValidator(iter_refine_validator);
851 // data_.options.IterRefine = getIntegralValue<SLUD::IterRefine_t>(*parameterList, "IterRefine");
852 // }
853 data_.options.IterRefine = SLUD::NOREFINE;
854
855 bool replace_tiny = parameterList->get<bool>("ReplaceTinyPivot", true);
856 data_.options.ReplaceTinyPivot = replace_tiny ? SLUD::YES : SLUD::NO;
857
858 if( parameterList->isParameter("IsContiguous") ){
859 is_contiguous_ = parameterList->get<bool>("IsContiguous");
860 }
861 }
862
863
864 template <class Matrix, class Vector>
865 Teuchos::RCP<const Teuchos::ParameterList>
867 {
868 using std::string;
869 using Teuchos::tuple;
870 using Teuchos::ParameterList;
871 using Teuchos::EnhancedNumberValidator;
872 using Teuchos::setStringToIntegralParameter;
873 using Teuchos::setIntParameter;
874 using Teuchos::stringToIntegralParameterEntryValidator;
875
876 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
877
878 if( is_null(valid_params) ){
879 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
880
881 Teuchos::RCP<EnhancedNumberValidator<SLUD::int_t> > col_row_validator
882 = Teuchos::rcp( new EnhancedNumberValidator<SLUD::int_t>() );
883 col_row_validator->setMin(1);
884
885 pl->set("npcol", data_.grid.npcol,
886 "Number of columns in the processor grid. "
887 "Must be set with nprow", col_row_validator);
888 pl->set("nprow", data_.grid.nprow,
889 "Number of rows in the SuperLU_DIST processor grid. "
890 "Must be set together with npcol", col_row_validator);
891
892 // validator will catch any value besides NOTRANS
893 setStringToIntegralParameter<SLUD::trans_t>("Trans", "NOTRANS",
894 "Solve for the transpose system or not",
895 tuple<string>("NOTRANS"),
896 tuple<string>("Do not solve with transpose"),
897 tuple<SLUD::trans_t>(SLUD::NOTRANS),
898 pl.getRawPtr());
899
900 // Equillbration
901 pl->set("Equil", false, "Whether to equilibrate the system before solve");
902
903 // TODO: uncomment when supported
904 // setStringToIntegralParameter<SLUD::IterRefine_t>("IterRefine", "NOREFINE",
905 // "Type of iterative refinement to use",
906 // tuple<string>("NOREFINE", "DOUBLE"),
907 // tuple<string>("Do not use iterative refinement",
908 // "Do double iterative refinement"),
909 // tuple<SLUD::IterRefine_t>(SLUD::NOREFINE,
910 // SLUD::DOUBLE),
911 // pl.getRawPtr());
912
913 // Tiny pivot handling
914 pl->set("ReplaceTinyPivot", true,
915 "Specifies whether to replace tiny diagonals during LU factorization");
916
917 // Row permutation
918 setStringToIntegralParameter<SLUD::rowperm_t>("RowPerm", "NOROWPERM",
919 "Specifies how to permute the rows of the "
920 "matrix for sparsity preservation",
921 tuple<string>("NOROWPERM", "LargeDiag_MC64"),
922 tuple<string>("Natural ordering",
923 "Duff/Koster algorithm"),
924 tuple<SLUD::rowperm_t>(SLUD::NOROWPERM,
925 SLUD::LargeDiag_MC64),
926 pl.getRawPtr());
927
928 setIntParameter("LargeDiag_MC64-Options", 4, "Options for RowPerm-LargeDiag_MC64", pl.getRawPtr());
929
930 // Column permutation
931 setStringToIntegralParameter<SLUD::colperm_t>("ColPerm", "PARMETIS",
932 "Specifies how to permute the columns of the "
933 "matrix for sparsity preservation",
934 tuple<string>("NATURAL", "PARMETIS"),
935 tuple<string>("Natural ordering",
936 "ParMETIS ordering on A^T + A"),
937 tuple<SLUD::colperm_t>(SLUD::NATURAL,
938 SLUD::PARMETIS),
939 pl.getRawPtr());
940
941 pl->set("IsContiguous", true, "Whether GIDs contiguous");
942
943 valid_params = pl;
944 }
945
946 return valid_params;
947 }
948
949
950 template <class Matrix, class Vector>
951 void
953 SLUD::int_t& nprow,
954 SLUD::int_t& npcol) const {
955 TEUCHOS_TEST_FOR_EXCEPTION( nprocs < 1,
956 std::invalid_argument,
957 "Number of MPI processes must be at least 1" );
958 SLUD::int_t c, r = 1;
959 while( r*r <= nprocs ) r++;
960 nprow = npcol = --r; // fall back to square grid
961 c = nprocs / r;
962 while( (r--)*c != nprocs ){
963 c = nprocs / r; // note integer division
964 }
965 ++r;
966 // prefer the square grid over a single row (which will only happen
967 // in the case of a prime nprocs
968 if( r > 1 || nprocs < 9){ // nprocs < 9 is a heuristic for the small cases
969 nprow = r;
970 npcol = c;
971 }
972 }
973
974
975 template <class Matrix, class Vector>
976 bool
978 // Extract the necessary information from mat and call SLU function
979 using Teuchos::Array;
980 using Teuchos::ArrayView;
981 using Teuchos::ptrInArg;
982 using Teuchos::as;
983
984 using SLUD::int_t;
985 const int numProcs = this->getComm()->getSize();
986 const int nprow = data_.grid.nprow;
987 const int npcol = data_.grid.npcol;
988
989#ifdef HAVE_AMESOS2_TIMERS
990 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
991#endif
992
993 // Cleanup old store memory if it's non-NULL
994 if( data_.A.Store != NULL ){
995 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
996 data_.A.Store = NULL;
997 }
998
999
1000 // is_contiguous : input is contiguous
1001 int_t nnz_ret = 0;
1002 int_t l_nnz, l_rows, g_rows, g_cols, fst_global_row;
1003 if (!is_contiguous_ && numProcs == nprow*npcol) {
1004#ifdef HAVE_AMESOS2_TIMERS
1005 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
1006#endif
1007 // reinex GIDs
1008 superlu_rowmap_ = this->matrixA_->getRowMap(); // use original map to redistribute vectors in solve
1009 Teuchos::RCP<const MatrixAdapter<Matrix> > contig_mat = this->matrixA_->reindex(superlu_contig_rowmap_, superlu_contig_colmap_, current_phase);
1010 l_nnz = as<int_t>(contig_mat->getLocalNNZ());
1011 l_rows = as<int_t>(contig_mat->getLocalNumRows());
1012 g_rows = as<int_t>(contig_mat->getGlobalNumRows());
1013 g_cols = g_rows; // we deal with square matrices
1014 fst_global_row = as<int_t>(superlu_contig_rowmap_->getMinGlobalIndex());
1015
1016 // fill arrays
1017 if (current_phase == PREORDERING)
1018 {
1019 Kokkos::resize(nzvals_temp_, l_nnz);
1020 Kokkos::resize(nzvals_view_, l_nnz);
1021 Kokkos::resize(colind_view_, l_nnz);
1022 Kokkos::resize(rowptr_view_, l_rows + 1);
1023 }
1025 host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get(
1026 contig_mat.ptr(),
1027 nzvals_temp_, colind_view_, rowptr_view_,
1028 nnz_ret,
1029 ptrInArg(*(contig_mat->getRowMap())),
1032 Kokkos::deep_copy(nzvals_view_, nzvals_temp_);
1033 } else {
1034#ifdef HAVE_AMESOS2_TIMERS
1035 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
1036#endif
1037 Teuchos::RCP<const MatrixAdapter<Matrix> > redist_mat
1038 = this->matrixA_->get(ptrInArg(*superlu_rowmap_));
1039
1040 // ssame as A's target
1041 superlu_contig_rowmap_ = superlu_rowmap_;
1042
1043 l_nnz = as<int_t>(redist_mat->getLocalNNZ());
1044 l_rows = as<int_t>(redist_mat->getLocalNumRows());
1045 g_rows = as<int_t>(redist_mat->getGlobalNumRows());
1046 g_cols = g_rows; // we deal with square matrices
1047 fst_global_row = as<int_t>(superlu_rowmap_->getMinGlobalIndex());
1048
1049 Kokkos::resize(nzvals_view_, l_nnz);
1050 Kokkos::resize(colind_view_, l_nnz);
1051 Kokkos::resize(rowptr_view_, l_rows + 1);
1052 {
1054 host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get(
1055 redist_mat.ptr(),
1056 nzvals_view_, colind_view_, rowptr_view_,
1057 nnz_ret,
1058 ptrInArg(*superlu_rowmap_),
1059 ROOTED,
1060 ARBITRARY);
1061 }
1062 }
1063
1064 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != l_nnz,
1065 std::runtime_error,
1066 "Did not get the expected number of non-zero vals ("
1067 +std::to_string(nnz_ret)+" vs "+std::to_string(l_nnz)+")");
1068
1069 // Get the SLU data type for this type of matrix
1070 SLUD::Dtype_t dtype = type_map::dtype;
1071
1072 if( in_grid_ ){
1073 function_map::create_CompRowLoc_Matrix(&(data_.A),
1074 g_rows, g_cols,
1075 l_nnz, l_rows, fst_global_row,
1076 nzvals_view_.data(),
1077 colind_view_.data(),
1078 rowptr_view_.data(),
1079 SLUD::SLU_NR_loc,
1080 dtype, SLUD::SLU_GE);
1081 }
1082
1083 return true;
1084}
1085
1086
1087 template<class Matrix, class Vector>
1088 const char* Superludist<Matrix,Vector>::name = "SuperLU_DIST";
1089
1090
1091} // end namespace Amesos2
1092
1093#endif // AMESOS2_SUPERLUDIST_DEF_HPP
Provides definition of SuperLU_DIST types as well as conversions and type traits.
@ DISTRIBUTED_NO_OVERLAP
Definition Amesos2_TypeDecl.hpp:91
@ ROOTED
Definition Amesos2_TypeDecl.hpp:93
@ ARBITRARY
Definition Amesos2_TypeDecl.hpp:109
@ SORTED_INDICES
Definition Amesos2_TypeDecl.hpp:108
Utility functions for Amesos2.
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers.
Definition Amesos2_SolverCore_decl.hpp:72
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Return a const parameter list of all of the valid parameters that this->setParameterList(....
Definition Amesos2_SolverCore_def.hpp:545
super_type & setParameters(const Teuchos::RCP< Teuchos::ParameterList > &parameterList) override
Set/update internal variables and solver options.
Definition Amesos2_SolverCore_def.hpp:513
Teuchos::RCP< const MatrixAdapter< Matrix > > matrixA_
The LHS operator.
Definition Amesos2_SolverCore_decl.hpp:421
global_size_type globalNumCols_
Number of global columns in matrixA_.
Definition Amesos2_SolverCore_decl.hpp:445
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns a pointer to the Teuchos::Comm communicator with this operator.
Definition Amesos2_SolverCore_decl.hpp:329
global_size_type globalNumRows_
Number of global rows in matrixA_.
Definition Amesos2_SolverCore_decl.hpp:442
Amesos2 interface to the distributed memory version of SuperLU.
Definition Amesos2_Superludist_decl.hpp:57
bool in_grid_
true if this processor is in SuperLU_DISTS's 2D process grid
Definition Amesos2_Superludist_decl.hpp:306
~Superludist()
Destructor.
Definition Amesos2_Superludist_def.hpp:208
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition Amesos2_Superludist_def.hpp:780
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition Amesos2_Superludist_def.hpp:789
int numericFactorization_impl()
SuperLU_DIST specific numeric factorization.
Definition Amesos2_Superludist_def.hpp:463
Teuchos::RCP< const map_type > superlu_rowmap_
Maps rows of the matrix to processors in the SuperLU_DIST processor grid.
Definition Amesos2_Superludist_decl.hpp:312
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal solver structures.
Definition Amesos2_Superludist_def.hpp:977
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition Amesos2_Superludist_def.hpp:358
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
SuperLU_DIST specific solve.
Definition Amesos2_Superludist_def.hpp:614
void get_default_grid_size(int nprocs, SLUD::int_t &nprow, SLUD::int_t &npcol) const
Definition Amesos2_Superludist_def.hpp:952
Superludist(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition Amesos2_Superludist_def.hpp:35
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using SuperLU_DIST.
Definition Amesos2_Superludist_def.hpp:414
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition Amesos2_Superludist_def.hpp:866
void computeRowPermutationLargeDiagMC64(SLUD::SuperMatrix &GA)
Compute the row permutation for option LargeDiag-MC64.
Definition Amesos2_Superludist_def.hpp:303
EPhase
Used to indicate a phase in the direct solution.
Definition Amesos2_TypeDecl.hpp:31
A templated MultiVector class adapter for Amesos2.
Definition Amesos2_MultiVecAdapter_decl.hpp:142
Helper class for getting 1-D copies of multivectors.
Definition Amesos2_MultiVecAdapter_decl.hpp:233
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition Amesos2_Util.hpp:644
Helper class for putting 1-D data arrays into multivectors.
Definition Amesos2_MultiVecAdapter_decl.hpp:339