Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_Superludist_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Amesos2: Templated Direct Sparse Solver Package
4//
5// Copyright 2011 NTESS and the Amesos2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
18#ifndef AMESOS2_SUPERLUDIST_DEF_HPP
19#define AMESOS2_SUPERLUDIST_DEF_HPP
20
21#include <Teuchos_Tuple.hpp>
22#include <Teuchos_StandardParameterEntryValidators.hpp>
23#include <Teuchos_DefaultMpiComm.hpp>
24#include <Teuchos_Details_MpiTypeTraits.hpp>
25
28#include "Amesos2_Util.hpp"
29
30
31namespace Amesos2 {
32
33
34 template <class Matrix, class Vector>
35 Superludist<Matrix,Vector>::Superludist(Teuchos::RCP<const Matrix> A,
36 Teuchos::RCP<Vector> X,
37 Teuchos::RCP<const Vector> B)
38 : SolverCore<Amesos2::Superludist,Matrix,Vector>(A, X, B)
39 , bvals_()
40 , xvals_()
41 , in_grid_(false)
42 , force_symbfact_(false)
43 , is_contiguous_(true)
44 , debug_level_(0)
45 {
46 using Teuchos::Comm;
47 // It's OK to depend on MpiComm explicitly here, because
48 // SuperLU_DIST requires MPI anyway.
49 using Teuchos::MpiComm;
50 using Teuchos::outArg;
51 using Teuchos::ParameterList;
52 using Teuchos::parameterList;
53 using Teuchos::RCP;
54 using Teuchos::rcp;
55 using Teuchos::rcp_dynamic_cast;
56 using Teuchos::REDUCE_SUM;
57 using Teuchos::reduceAll;
58 typedef global_ordinal_type GO;
59
61 // Set up the SuperLU_DIST processor grid //
63
64 RCP<const Comm<int> > comm = this->getComm ();
65 myRank = comm->getRank ();
66 numProcs = comm->getSize ();
67
68 SLUD::int_t nprow, npcol;
69 get_default_grid_size (numProcs, nprow, npcol);
70 {
71 // FIXME (mfh 16 Dec 2014) getComm() just returns
72 // matrixA_->getComm(), so it's not clear why we need to ask for
73 // the matrix's communicator separately here.
74 RCP<const Comm<int> > matComm = this->matrixA_->getComm ();
75 TEUCHOS_TEST_FOR_EXCEPTION(
76 matComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
77 "constructor: The matrix's communicator is null!");
78 RCP<const MpiComm<int> > matMpiComm =
79 rcp_dynamic_cast<const MpiComm<int> > (matComm);
80 // FIXME (mfh 16 Dec 2014) If the matrix's communicator is a
81 // SerialComm, we probably could just use MPI_COMM_SELF here.
82 // I'm not sure if SuperLU_DIST is smart enough to handle that
83 // case, though.
84 TEUCHOS_TEST_FOR_EXCEPTION(
85 matMpiComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
86 "constructor: The matrix's communicator is not an MpiComm!");
87 TEUCHOS_TEST_FOR_EXCEPTION(
88 matMpiComm->getRawMpiComm ().is_null (), std::logic_error, "Amesos2::"
89 "Superlustdist constructor: The matrix's communicator claims to be a "
90 "Teuchos::MpiComm<int>, but its getRawPtrComm() method returns "
91 "Teuchos::null! This means that the underlying MPI_Comm doesn't even "
92 "exist, which likely implies that the Teuchos::MpiComm was constructed "
93 "incorrectly. It means something different than if the MPI_Comm were "
94 "MPI_COMM_NULL.");
95 MPI_Comm rawMpiComm = (* (matMpiComm->getRawMpiComm ())) ();
96 data_.mat_comm = rawMpiComm;
97 // This looks a bit like ScaLAPACK's grid initialization (which
98 // technically takes place in the BLACS, not in ScaLAPACK
99 // proper). See http://netlib.org/scalapack/slug/node34.html.
100 // The main difference is that SuperLU_DIST depends explicitly
101 // on MPI, while the BLACS hides its communication protocol.
102 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
103 }
104
106 // Set some default parameters. //
107 // //
108 // Must do this after grid has been created in //
109 // case user specifies the nprow and npcol parameters //
111 SLUD::set_default_options_dist(&data_.options);
112
113 RCP<ParameterList> default_params =
114 parameterList (* (this->getValidParameters ()));
115 this->setParameters (default_params);
116
117 // Set some internal options
118 data_.options.Fact = SLUD::DOFACT;
119 data_.equed = SLUD::NOEQUIL; // No equilibration has yet been performed
120 data_.options.SolveInitialized = SLUD::NO;
121 data_.options.RefineInitialized = SLUD::NO;
122 data_.rowequ = false;
123 data_.colequ = false;
124 data_.perm_r.resize(this->globalNumRows_);
125 data_.perm_c.resize(this->globalNumCols_);
126 data_.largediag_mc64_job = 4;
127 for (global_size_type i = 0; i < this->globalNumRows_; i++)
128 data_.perm_r[i] = i;
129 for (global_size_type i = 0; i < this->globalNumCols_; i++)
130 data_.perm_c[i] = i;
131
133 // Set up a communicator for the parallel column ordering and //
134 // parallel symbolic factorization. //
136 data_.symb_comm = MPI_COMM_NULL;
137
138 // domains is the next power of 2 less than nprow*npcol. This
139 // value will be used for creating an MPI communicator for the
140 // pre-ordering and symbolic factorization methods.
141 data_.domains = (int) ( pow(2.0, floor(log10((double)nprow*npcol)/log10(2.0))) );
142
143 const int color = (myRank < data_.domains) ? 0 : MPI_UNDEFINED;
144 MPI_Comm_split (data_.mat_comm, color, myRank, &(data_.symb_comm));
145
147 // Set up a row Map that only includes processes that are in the
148 // SuperLU process grid. This will be used for redistributing A.
150
151 // mfh 16 Dec 2014: We could use createWeightedContigMapWithNode
152 // with myProcParticipates as the weight, but that costs an extra
153 // all-reduce.
154
155 // Set to 1 if I am in the grid, and I get some of the matrix rows.
156 int myProcParticipates = 0;
157 if (myRank < nprow * npcol) {
158 in_grid_ = true;
159 myProcParticipates = 1;
160 }
161
162 // Compute how many processes in the communicator belong to the
163 // process grid.
164 int numParticipatingProcs = 0;
165 reduceAll<int, int> (*comm, REDUCE_SUM, myProcParticipates,
166 outArg (numParticipatingProcs));
167 TEUCHOS_TEST_FOR_EXCEPTION(
168 this->globalNumRows_ != 0 && numParticipatingProcs == 0,
169 std::logic_error, "Amesos2::Superludist constructor: The matrix has "
170 << this->globalNumRows_ << " > 0 global row(s), but no processes in the "
171 "communicator want to participate in its factorization! nprow = "
172 << nprow << " and npcol = " << npcol << ".");
173
174 // Divide up the rows among the participating processes.
175 size_t myNumRows = 0;
176 {
177 const GO GNR = static_cast<GO> (this->globalNumRows_);
178 const GO quotient = (numParticipatingProcs == 0) ? static_cast<GO> (0) :
179 GNR / static_cast<GO> (numParticipatingProcs);
180 const GO remainder =
181 GNR - quotient * static_cast<GO> (numParticipatingProcs);
182 const GO lclNumRows = (static_cast<GO> (myRank) < remainder) ?
183 (quotient + static_cast<GO> (1)) : quotient;
184 myNumRows = static_cast<size_t> (lclNumRows);
185 }
186
187 // TODO: might only need to initialize if parallel symbolic factorization is requested.
188 const GO indexBase = this->matrixA_->getRowMap ()->getIndexBase ();
190 rcp (new map_type (this->globalNumRows_, myNumRows, indexBase, comm));
191 superlu_contig_rowmap_ = Teuchos::rcp (new map_type (0, 0, indexBase, comm));
192 superlu_contig_colmap_ = Teuchos::rcp (new map_type (0, 0, indexBase, comm));
193
195 // Do some other initialization //
197
198 data_.A.Store = NULL;
199 function_map::LUstructInit(this->globalNumRows_, this->globalNumCols_, &(data_.lu));
200 SLUD::PStatInit(&(data_.stat));
201 // We do not use ScalePermstructInit because we will use our own
202 // arrays for storing perm_r and perm_c
203 data_.scale_perm.perm_r = data_.perm_r.getRawPtr();
204 data_.scale_perm.perm_c = data_.perm_c.getRawPtr();
205 }
206
207
208 template <class Matrix, class Vector>
210 {
211 /* Free SuperLU_DIST data_types
212 * - Matrices
213 * - Vectors
214 * - Stat object
215 * - ScalePerm, LUstruct, grid, and solve objects
216 *
217 * Note: the function definitions are the same regardless whether
218 * complex or real, so we arbitrarily use the D namespace
219 */
220 if ( this->status_.getNumPreOrder() > 0 ){
222#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
223 SUPERLU_FREE( data_.sizes );
224 SUPERLU_FREE( data_.fstVtxSep );
225#else
226 free( data_.sizes );
227 free( data_.fstVtxSep );
228#endif
229 }
230
231 // Cleanup old matrix store memory if it's non-NULL. Our
232 // Teuchos::Array's will destroy rowind, colptr, and nzval for us
233 if( data_.A.Store != NULL ){
234 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
235 }
236
237 // LU data is initialized in numericFactorization_impl()
238 if ( this->status_.getNumNumericFact() > 0 ){
239 function_map::Destroy_LU(this->globalNumRows_, &(data_.grid), &(data_.lu));
240 }
241 function_map::LUstructFree(&(data_.lu));
242
243 // If a symbolic factorization is ever performed without a
244 // follow-up numericfactorization, there are some arrays in the
245 // Pslu_freeable struct which will never be free'd by
246 // SuperLU_DIST.
247 if ( this->status_.symbolicFactorizationDone() &&
248 !this->status_.numericFactorizationDone() ){
249 if ( data_.pslu_freeable.xlsub != NULL ){
250#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
251 SUPERLU_FREE( data_.pslu_freeable.xlsub );
252 SUPERLU_FREE( data_.pslu_freeable.lsub );
253#else
254 free( data_.pslu_freeable.xlsub );
255 free( data_.pslu_freeable.lsub );
256#endif
257 }
258 if ( data_.pslu_freeable.xusub != NULL ){
259#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
260 SUPERLU_FREE( data_.pslu_freeable.xusub );
261 SUPERLU_FREE( data_.pslu_freeable.usub );
262#else
263 free( data_.pslu_freeable.xusub );
264 free( data_.pslu_freeable.usub );
265#endif
266 }
267 if ( data_.pslu_freeable.supno_loc != NULL ){
268#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
269 SUPERLU_FREE( data_.pslu_freeable.supno_loc );
270 SUPERLU_FREE( data_.pslu_freeable.xsup_beg_loc );
271 SUPERLU_FREE( data_.pslu_freeable.xsup_end_loc );
272#else
273 free( data_.pslu_freeable.supno_loc );
274 free( data_.pslu_freeable.xsup_beg_loc );
275 free( data_.pslu_freeable.xsup_end_loc );
276#endif
277 }
278#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
279 SUPERLU_FREE( data_.pslu_freeable.globToLoc );
280#else
281 free( data_.pslu_freeable.globToLoc );
282#endif
283 }
284
285 SLUD::PStatFree( &(data_.stat) ) ;
286
287 // Teuchos::Arrays will free R, C, perm_r, and perm_c
288 // SLUD::D::ScalePermstructFree(&(data_.scale_perm));
289
290 if ( data_.options.SolveInitialized == SLUD::YES )
291 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
292
293 // gridexit of an older version frees SuperLU_MPI_DOUBLE_COMPLE,
294 // which could cause an issue if there are still active instances of superludist?
295 SLUD::superlu_gridexit(&(data_.grid)); // TODO: are there any
296 // cases where grid
297 // wouldn't be initialized?
298
299 if ( data_.symb_comm != MPI_COMM_NULL ) MPI_Comm_free(&(data_.symb_comm));
300 }
301
302 template<class Matrix, class Vector>
303 void
305 {
306 int job = data_.largediag_mc64_job;
307 if (job == 5)
308 {
309 data_.R1.resize(data_.A.nrow);
310 data_.C1.resize(data_.A.ncol);
311 }
312
313 SLUD::NCformat *GAstore = (SLUD::NCformat*) GA.Store;
314 SLUD::int_t* colptr = GAstore->colptr;
315 SLUD::int_t* rowind = GAstore->rowind;
316 SLUD::int_t nnz = GAstore->nnz;
317 slu_type *a_GA = (slu_type *) GAstore->nzval;
318 MPI_Datatype mpi_dtype = Teuchos::Details::MpiTypeTraits<magnitude_type>::getType();
319 MPI_Datatype mpi_itype = Teuchos::Details::MpiTypeTraits<SLUD::int_t>::getType();
320
321 int iinfo = 0;
322 if ( !data_.grid.iam ) { /* Process 0 finds a row permutation */
323 iinfo = function_map::ldperm_dist(job, data_.A.nrow, nnz, colptr, rowind, a_GA,
324 data_.perm_r.getRawPtr(), data_.R1.getRawPtr(), data_.C1.getRawPtr());
325
326 MPI_Bcast( &iinfo, 1, MPI_INT, 0, data_.grid.comm );
327 if ( iinfo == 0 ) {
328 MPI_Bcast( data_.perm_r.getRawPtr(), data_.A.nrow, mpi_itype, 0, data_.grid.comm );
329 if ( job == 5 && data_.options.Equil ) {
330 MPI_Bcast( data_.R1.getRawPtr(), data_.A.nrow, mpi_dtype, 0, data_.grid.comm );
331 MPI_Bcast( data_.C1.getRawPtr(), data_.A.ncol, mpi_dtype, 0, data_.grid.comm );
332 }
333 }
334 } else {
335 MPI_Bcast( &iinfo, 1, mpi_int_t, 0, data_.grid.comm );
336 if ( iinfo == 0 ) {
337 MPI_Bcast( data_.perm_r.getRawPtr(), data_.A.nrow, mpi_itype, 0, data_.grid.comm );
338 if ( job == 5 && data_.options.Equil ) {
339 MPI_Bcast( data_.R1.getRawPtr(), data_.A.nrow, mpi_dtype, 0, data_.grid.comm );
340 MPI_Bcast( data_.C1.getRawPtr(), data_.A.ncol, mpi_dtype, 0, data_.grid.comm );
341 }
342 }
343 }
344 TEUCHOS_TEST_FOR_EXCEPTION( iinfo != 0,
345 std::runtime_error,
346 "SuperLU_DIST pre-ordering failed to compute row perm with "
347 << iinfo << std::endl);
348
349 if (job == 5)
350 {
351 for (SLUD::int_t i = 0; i < data_.A.nrow; ++i) data_.R1[i] = exp(data_.R1[i]);
352 for (SLUD::int_t i = 0; i < data_.A.ncol; ++i) data_.C1[i] = exp(data_.C1[i]);
353 }
354 }
355
356
357 template<class Matrix, class Vector>
358 int
360 {
361 if (debug_level_ > 0 && myRank == 0) {
362 std::cout << " * Superludist::preOrdering ( size(int_t)= " << sizeof(SLUD::int_t)
363 << ", size(info_t) = * " << sizeof(SLUD::info_t)
364 << ", size(perm_t) = " << sizeof(SLUD::perm_int_t) << ") * " <<std::endl;
365 }
366 if (data_.options.RowPerm == SLUD::NOROWPERM) {
367 SLUD::int_t slu_rows_ub = Teuchos::as<SLUD::int_t>(this->globalNumRows_);
368 for( SLUD::int_t i = 0; i < slu_rows_ub; ++i ) data_.perm_r[i] = i;
369 }
370 else if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
371 if (!force_symbfact_)
372 // defer to numerical factorization because row permutation requires the matrix values
373 return (EXIT_SUCCESS + 1);
374 }
375 // loadA_impl(); // Refresh matrix values
376
377 if( in_grid_ ){
378 // If this function has been called at least once, then the
379 // sizes, and fstVtxSep arrays were allocated in
380 // get_perm_c_parmetis. Delete them before calling that
381 // function again. These arrays will also be dealloc'd in the
382 // deconstructor.
383 if( this->status_.getNumPreOrder() > 0 ){
384#if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
385 SUPERLU_FREE( data_.sizes );
386 SUPERLU_FREE( data_.fstVtxSep );
387#else
388 free( data_.sizes );
389 free( data_.fstVtxSep );
390#endif
391 }
392 float info = 0.0;
393 {
394#ifdef HAVE_AMESOS2_TIMERS
395 Teuchos::TimeMonitor preOrderTime( this->timers_.preOrderTime_ );
396#endif
397 info = SLUD::get_perm_c_parmetis( &(data_.A),
398 data_.perm_r.getRawPtr(), data_.perm_c.getRawPtr(),
399 data_.grid.nprow * data_.grid.npcol, data_.domains,
400 &(data_.sizes), &(data_.fstVtxSep),
401 &(data_.grid), &(data_.symb_comm) );
402 }
403 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
404 std::runtime_error,
405 "SuperLU_DIST pre-ordering ran out of memory after allocating "
406 << info << " bytes of memory" );
407 }
408
409 // Ordering will be applied directly before numeric factorization,
410 // after we have a chance to get updated coefficients from the
411 // matrix
412
413 return EXIT_SUCCESS;
414 }
415
416
417
418 template <class Matrix, class Vector>
419 int
421 {
422 // loadA_impl(); // Refresh matrix values
423 if (debug_level_ > 0 && myRank == 0) {
424 std::cout << " * Superludist::symbolicFactorization * " << std::endl;
425 }
426 if (!force_symbfact_) {
427 if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
428 // defer to numerical factorization because row permutation requires the matrix values
429 return (EXIT_SUCCESS + 1);
430 }
431 }
432
433 if( in_grid_ ){
434
435 float info = 0.0;
436 {
437#ifdef HAVE_AMESOS2_TIMERS
438 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
439#endif
440
441#if (SUPERLU_DIST_MAJOR_VERSION > 7)
442 info = SLUD::symbfact_dist(&(data_.options), (data_.grid.nprow) * (data_.grid.npcol),
443 data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
444 data_.perm_r.getRawPtr(), data_.sizes,
445 data_.fstVtxSep, &(data_.pslu_freeable),
446 &(data_.grid.comm), &(data_.symb_comm),
447 &(data_.mem_usage));
448
449#else
450 info = SLUD::symbfact_dist((data_.grid.nprow) * (data_.grid.npcol),
451 data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
452 data_.perm_r.getRawPtr(), data_.sizes,
453 data_.fstVtxSep, &(data_.pslu_freeable),
454 &(data_.grid.comm), &(data_.symb_comm),
455 &(data_.mem_usage));
456#endif
457 }
458 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
459 std::runtime_error,
460 "SuperLU_DIST symbolic factorization ran out of memory after"
461 " allocating " << info << " bytes of memory" );
462 }
463 same_symbolic_ = false;
464 same_solve_struct_ = false;
465
466 return EXIT_SUCCESS;
467 }
468
469
470 template <class Matrix, class Vector>
471 int
473 using Teuchos::as;
474 if (debug_level_ > 0 && myRank == 0) {
475 std::cout << " * Superludist::numericFactorization * " << std::endl;
476 }
477
478 // loadA_impl(); // Refresh the matrix values
479 SLUD::SuperMatrix GA; /* Global A in NC format */
480 bool need_value = false;
481
482 if( in_grid_ ) {
483 if( data_.options.Equil == SLUD::YES ) {
484 SLUD::info_t info = 0;
485
486 // Compute scaling
487 data_.R.resize(this->globalNumRows_);
488 data_.C.resize(this->globalNumCols_);
489 function_map::gsequ_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
490 &(data_.rowcnd), &(data_.colcnd), &(data_.amax), &info, &(data_.grid));
491
492 // Apply the scalings
493 function_map::laqgs_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
494 data_.rowcnd, data_.colcnd, data_.amax,
495 &(data_.equed));
496
497 data_.rowequ = (data_.equed == SLUD::ROW) || (data_.equed == SLUD::BOTH);
498 data_.colequ = (data_.equed == SLUD::COL) || (data_.equed == SLUD::BOTH);
499
500 // Compute and apply the row permutation
501 if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
502 // Create a column-order copy of A
503 need_value = true;
504 SLUD::D::pdCompRow_loc_to_CompCol_global(true, &data_.A, &data_.grid, &GA);
505
506 // Compute row permutation
507 computeRowPermutationLargeDiagMC64(GA);
508
509 // Here we do symbolic factorization
510 force_symbfact_ = true;
511 preOrdering_impl();
512 symbolicFactorization_impl();
513 force_symbfact_ = false;
514
515 // Apply row-permutation scaling for job=5
516 // Here we do it manually to bypass the threshold check in laqgs_loc
517 if (data_.largediag_mc64_job == 5)
518 {
519 SLUD::NRformat_loc *Astore = (SLUD::NRformat_loc*) data_.A.Store;
520 slu_type *a = (slu_type*) Astore->nzval;
521 SLUD::int_t m_loc = Astore->m_loc;
522 SLUD::int_t fst_row = Astore->fst_row;
523 SLUD::int_t i, j, irow = fst_row, icol;
524
525 /* Scale the distributed matrix further.
526 A <-- diag(R1)*A*diag(C1) */
527 SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
528 for (j = 0; j < m_loc; ++j) {
529 for (i = rowptr_view_.data()[j]; i < rowptr_view_.data()[j+1]; ++i) {
530 icol = colind_view_.data()[i];
531 a[i] = mult_op(a[i], data_.R1[irow] * data_.C1[icol]);
532 }
533 ++irow;
534 }
535
536 /* Multiply together the scaling factors */
537 if ( data_.rowequ ) for (i = 0; i < data_.A.nrow; ++i) data_.R[i] *= data_.R1[i];
538 else for (i = 0; i < data_.A.nrow; ++i) data_.R[i] = data_.R1[i];
539 if ( data_.colequ ) for (i = 0; i < data_.A.ncol; ++i) data_.C[i] *= data_.C1[i];
540 else for (i = 0; i < data_.A.ncol; ++i) data_.C[i] = data_.C1[i];
541
542 data_.rowequ = data_.colequ = 1;
543 }
544 }
545 }
546
547 // Apply the column ordering, so that AC is the column-permuted A, and compute etree
548 size_t nnz_loc = ((SLUD::NRformat_loc*)data_.A.Store)->nnz_loc;
549 for( size_t i = 0; i < nnz_loc; ++i ) colind_view_(i) = data_.perm_c[colind_view_(i)];
550
551 // Distribute data from the symbolic factorization
552 if( same_symbolic_ ){
553 // Note: with the SamePattern_SameRowPerm options, it does not
554 // matter that the glu_freeable member has never been
555 // initialized, because it is never accessed. It is a
556 // placeholder arg. The real work is done in data_.lu
557#if (SUPERLU_DIST_MAJOR_VERSION > 7)
558 data_.options.Fact = SLUD::SamePattern_SameRowPerm;
559 function_map::pdistribute(&(data_.options),
560 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
561 &(data_.A), &(data_.scale_perm),
562 &(data_.glu_freeable), &(data_.lu),
563 &(data_.grid));
564#else
565 function_map::pdistribute(SLUD::SamePattern_SameRowPerm,
566 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
567 &(data_.A), &(data_.scale_perm),
568 &(data_.glu_freeable), &(data_.lu),
569 &(data_.grid));
570#endif
571 } else {
572#if (SUPERLU_DIST_MAJOR_VERSION > 7)
573 data_.options.Fact = SLUD::DOFACT;
574 function_map::dist_psymbtonum(&(data_.options),
575 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
576 &(data_.A), &(data_.scale_perm),
577 &(data_.pslu_freeable), &(data_.lu),
578 &(data_.grid));
579#else
580 function_map::dist_psymbtonum(SLUD::DOFACT,
581 as<SLUD::int_t>(this->globalNumRows_), // aka "n"
582 &(data_.A), &(data_.scale_perm),
583 &(data_.pslu_freeable), &(data_.lu),
584 &(data_.grid));
585#endif
586 }
587
588 // Retrieve the normI of A (required by gstrf).
589 bool notran = (data_.options.Trans == SLUD::NOTRANS);
590 magnitude_type anorm = function_map::plangs((notran ? (char *)"1" : (char *)"I"), &(data_.A), &(data_.grid));
591
592 int info = 0;
593 {
594#ifdef HAVE_AMESOS2_TIMERS
595 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
596#endif
597 function_map::gstrf(&(data_.options), this->globalNumRows_,
598 this->globalNumCols_, anorm, &(data_.lu),
599 &(data_.grid), &(data_.stat), &info);
600 }
601
602 // Check output
603 TEUCHOS_TEST_FOR_EXCEPTION( info > 0,
604 std::runtime_error,
605 "L and U factors have been computed but U("
606 << info << "," << info << ") is exactly zero "
607 "(i.e. U is singular)");
608 }
609
610 if (need_value)
611 SLUD::Destroy_CompCol_Matrix_dist(&GA);
612
613 // The other option, that info_st < 0, denotes invalid parameters
614 // to the function, but we'll assume for now that that won't
615 // happen.
616
617 data_.options.Fact = SLUD::FACTORED;
618 same_symbolic_ = true;
619
620 return EXIT_SUCCESS;
621 }
622
623
624 template <class Matrix, class Vector>
625 int
627 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
628 {
629 using Teuchos::as;
630
631 // local_len_rhs is how many of the multivector rows belong to
632 // this processor in the SuperLU_DIST processor grid.
633 const size_t local_len_rhs = superlu_rowmap_->getLocalNumElements();
634 const global_size_type nrhs = X->getGlobalNumVectors();
635 const global_ordinal_type first_global_row_b = superlu_contig_rowmap_->getMinGlobalIndex();
636
637 // make sure our multivector storage is sized appropriately
638 bvals_.resize(nrhs * local_len_rhs);
639 xvals_.resize(nrhs * local_len_rhs);
640
641 // We assume the global length of the two vectors have already been
642 // checked for compatibility
643
644 { // get the values from B
645#ifdef HAVE_AMESOS2_TIMERS
646 Teuchos::TimeMonitor convTimer(this->timers_.vecConvTime_);
647#endif
648 {
649 // The input dense matrix for B should be distributed in the
650 // same manner as the superlu_dist matrix. That is, if a
651 // processor has m_loc rows of A, then it should also have
652 // m_loc rows of B (and the same rows). We accomplish this by
653 // distributing the multivector rows with the same Map that
654 // the matrix A's rows are distributed.
655#ifdef HAVE_AMESOS2_TIMERS
656 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
657#endif
658 // get grid-distributed mv data. The multivector data will be
659 // distributed across the processes in the SuperLU_DIST grid.
660 typedef Util::get_1d_copy_helper<MultiVecAdapter<Vector>,slu_type> copy_helper;
661 copy_helper::do_get(B,
662 bvals_(),
663 local_len_rhs,
664 Teuchos::ptrInArg(*superlu_rowmap_));
665 }
666 } // end block for conversion time
667
668 if( in_grid_ ){
669 // if( data_.options.trans == SLUD::NOTRANS ){
670 // if( data_.rowequ ){ // row equilibration has been done on AC
671 // // scale bxvals_ by diag(R)
672 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
673 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
674 // }
675 // } else if( data_.colequ ){ // column equilibration has been done on AC
676 // // scale bxvals_ by diag(C)
677 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
678 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
679 // }
680
681 // Initialize the SOLVEstruct_t.
682 //
683 // We are able to reuse the solve struct if we have not changed
684 // the sparsity pattern of L and U since the last solve
685 if( !same_solve_struct_ ){
686 if( data_.options.SolveInitialized == SLUD::YES ){
687 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
688 }
689 function_map::SolveInit(&(data_.options), &(data_.A), data_.perm_r.getRawPtr(),
690 data_.perm_c.getRawPtr(), as<SLUD::int_t>(nrhs), &(data_.lu),
691 &(data_.grid), &(data_.solve_struct));
692 // Flag that we can reuse this solve_struct unless another
693 // symbolicFactorization is called between here and the next
694 // solve.
695 same_solve_struct_ = true;
696 }
697
698 // Apply row-scaling if requested
699 if (data_.options.Equil == SLUD::YES && data_.rowequ) {
700 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
701 SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
702 for(global_size_type j = 0; j < nrhs; ++j) {
703 for(size_t i = 0; i < local_len_rhs; ++i) {
704 bvals_[i + j*ld] = mult_op(bvals_[i + j*ld], data_.R[first_global_row_b + i]);
705 }
706 }
707 }
708
709 // Solve
710 int ierr = 0; // returned error code
711 {
712#ifdef HAVE_AMESOS2_TIMERS
713 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
714#endif
715
716#if (SUPERLU_DIST_MAJOR_VERSION > 7)
717 function_map::gstrs(&(data_.options), as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
718 &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
719 as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
720 as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
721 &(data_.solve_struct), &(data_.stat), &ierr);
722#else
723 function_map::gstrs(as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
724 &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
725 as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
726 as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
727 &(data_.solve_struct), &(data_.stat), &ierr);
728#endif
729 } // end block for solve time
730
731 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
732 std::runtime_error,
733 "Argument " << -ierr << " to gstrs had an illegal value" );
734
735 // "Un-scale" the solution so that it is a solution of the original system
736 // if( data_.options.trans == SLUD::NOTRANS ){
737 // if( data_.colequ ){ // column equilibration has been done on AC
738 // // scale bxvals_ by diag(C)
739 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
740 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
741 // }
742 // } else if( data_.rowequ ){ // row equilibration has been done on AC
743 // // scale bxvals_ by diag(R)
744 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
745 // SLUD::slu_mt_mult<slu_type,magnitude_type>());
746 // }
747 { // permute B to a solution of the original system
748#ifdef HAVE_AMESOS2_TIMERS
749 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
750#endif
751 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
752 function_map::permute_Dense_Matrix(as<SLUD::int_t>(first_global_row_b),
753 as<SLUD::int_t>(local_len_rhs),
754 data_.solve_struct.row_to_proc,
755 data_.solve_struct.inv_perm_c,
756 bvals_.getRawPtr(), ld,
757 xvals_.getRawPtr(), ld,
758 as<int>(nrhs),
759 &(data_.grid));
760 }
761
762 // Apply col-scaling if requested
763 if (data_.options.Equil == SLUD::YES && data_.colequ) {
764 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
765 SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
766 for(global_size_type j = 0; j < nrhs; ++j) {
767 for(size_t i = 0; i < local_len_rhs; ++i) {
768 xvals_[i + j*ld] = mult_op(xvals_[i + j*ld], data_.C[first_global_row_b + i]);
769 }
770 }
771 }
772 }
773
774 /* Update X's global values */
775 {
776#ifdef HAVE_AMESOS2_TIMERS
777 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
778#endif
779 typedef Util::put_1d_data_helper<MultiVecAdapter<Vector>,slu_type> put_helper;
780 put_helper::do_put(X,
781 xvals_(),
782 local_len_rhs,
783 Teuchos::ptrInArg(*superlu_rowmap_));
784 }
785
786 return EXIT_SUCCESS;
787 }
788
789
790 template <class Matrix, class Vector>
791 bool
793 {
794 // SuperLU_DIST requires square matrices
795 return( this->globalNumRows_ == this->globalNumCols_ );
796 }
797
798
799 template <class Matrix, class Vector>
800 void
801 Superludist<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
802 {
803 using Teuchos::as;
804 using Teuchos::RCP;
805 using Teuchos::getIntegralValue;
806 using Teuchos::ParameterEntryValidator;
807
808 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
809
810 if( parameterList->isParameter("npcol") || parameterList->isParameter("nprow") ){
811 TEUCHOS_TEST_FOR_EXCEPTION( !(parameterList->isParameter("nprow") &&
812 parameterList->isParameter("npcol")),
813 std::invalid_argument,
814 "nprow and npcol must be set together" );
815
816 SLUD::int_t nprow = parameterList->template get<SLUD::int_t>("nprow");
817 SLUD::int_t npcol = parameterList->template get<SLUD::int_t>("npcol");
818
819 TEUCHOS_TEST_FOR_EXCEPTION( nprow * npcol > this->getComm()->getSize(),
820 std::invalid_argument,
821 "nprow and npcol combination invalid" );
822
823 if( (npcol != data_.grid.npcol) || (nprow != data_.grid.nprow) ){
824 // De-allocate the default grid that was initialized in the constructor
825 SLUD::superlu_gridexit(&(data_.grid));
826 // Create a new grid
827 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
828 } // else our grid has not changed size since the last initialization
829 }
830
831 TEUCHOS_TEST_FOR_EXCEPTION( this->control_.useTranspose_,
832 std::invalid_argument,
833 "SuperLU_DIST does not support solving the tranpose system" );
834
835 data_.options.Trans = SLUD::NOTRANS; // should always be set this way;
836
837 // Equilbration option
838 bool equil = parameterList->get<bool>("Equil", false);
839 data_.options.Equil = equil ? SLUD::YES : SLUD::NO;
840
841 if( parameterList->isParameter("RowPerm") ){
842 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
843 parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
844
845 data_.options.RowPerm = getIntegralValue<SLUD::rowperm_t>(*parameterList, "RowPerm");
846 }
847
848 if( parameterList->isParameter("LargeDiag_MC64-Options") ){
849 data_.largediag_mc64_job = parameterList->template get<int>("LargeDiag_MC64-Options");
850 }
851
852 if( parameterList->isParameter("ColPerm") ){
853 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
854 parameterList->getEntry("ColPerm").setValidator(colperm_validator);
855
856 data_.options.ColPerm = getIntegralValue<SLUD::colperm_t>(*parameterList, "ColPerm");
857 }
858
859 // TODO: Uncomment when supported
860 // if( parameterList->isParameter("IterRefine") ){
861 // RCP<const ParameterEntryValidator> iter_refine_validator = valid_params->getEntry("IterRefine").validator();
862 // parameterList->getEntry("IterRefine").setValidator(iter_refine_validator);
863 // data_.options.IterRefine = getIntegralValue<SLUD::IterRefine_t>(*parameterList, "IterRefine");
864 // }
865 data_.options.IterRefine = SLUD::NOREFINE;
866
867 bool replace_tiny = parameterList->get<bool>("ReplaceTinyPivot", true);
868 data_.options.ReplaceTinyPivot = replace_tiny ? SLUD::YES : SLUD::NO;
869
870 if( parameterList->isParameter("IsContiguous") ){
871 is_contiguous_ = parameterList->get<bool>("IsContiguous");
872 }
873 if( parameterList->isParameter("DebugLevel") ){
874 debug_level_ = parameterList->get<int>("DebugLevel");
875 }
876 }
877
878
879 template <class Matrix, class Vector>
880 Teuchos::RCP<const Teuchos::ParameterList>
882 {
883 using std::string;
884 using Teuchos::tuple;
885 using Teuchos::ParameterList;
886 using Teuchos::EnhancedNumberValidator;
887 using Teuchos::setStringToIntegralParameter;
888 using Teuchos::setIntParameter;
889 using Teuchos::stringToIntegralParameterEntryValidator;
890
891 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
892
893 if( is_null(valid_params) ){
894 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
895
896 Teuchos::RCP<EnhancedNumberValidator<SLUD::int_t> > col_row_validator
897 = Teuchos::rcp( new EnhancedNumberValidator<SLUD::int_t>() );
898 col_row_validator->setMin(1);
899
900 pl->set("npcol", data_.grid.npcol,
901 "Number of columns in the processor grid. "
902 "Must be set with nprow", col_row_validator);
903 pl->set("nprow", data_.grid.nprow,
904 "Number of rows in the SuperLU_DIST processor grid. "
905 "Must be set together with npcol", col_row_validator);
906
907 // validator will catch any value besides NOTRANS
908 setStringToIntegralParameter<SLUD::trans_t>("Trans", "NOTRANS",
909 "Solve for the transpose system or not",
910 tuple<string>("NOTRANS"),
911 tuple<string>("Do not solve with transpose"),
912 tuple<SLUD::trans_t>(SLUD::NOTRANS),
913 pl.getRawPtr());
914
915 // Equillbration
916 pl->set("Equil", false, "Whether to equilibrate the system before solve");
917
918 // TODO: uncomment when supported
919 // setStringToIntegralParameter<SLUD::IterRefine_t>("IterRefine", "NOREFINE",
920 // "Type of iterative refinement to use",
921 // tuple<string>("NOREFINE", "DOUBLE"),
922 // tuple<string>("Do not use iterative refinement",
923 // "Do double iterative refinement"),
924 // tuple<SLUD::IterRefine_t>(SLUD::NOREFINE,
925 // SLUD::DOUBLE),
926 // pl.getRawPtr());
927
928 // Tiny pivot handling
929 pl->set("ReplaceTinyPivot", true,
930 "Specifies whether to replace tiny diagonals during LU factorization");
931
932 // Row permutation
933 setStringToIntegralParameter<SLUD::rowperm_t>("RowPerm", "NOROWPERM",
934 "Specifies how to permute the rows of the "
935 "matrix for sparsity preservation",
936 tuple<string>("NOROWPERM", "LargeDiag_MC64"),
937 tuple<string>("Natural ordering",
938 "Duff/Koster algorithm"),
939 tuple<SLUD::rowperm_t>(SLUD::NOROWPERM,
940 SLUD::LargeDiag_MC64),
941 pl.getRawPtr());
942
943 setIntParameter("LargeDiag_MC64-Options", 4, "Options for RowPerm-LargeDiag_MC64", pl.getRawPtr());
944
945 // Column permutation
946 setStringToIntegralParameter<SLUD::colperm_t>("ColPerm", "PARMETIS",
947 "Specifies how to permute the columns of the "
948 "matrix for sparsity preservation",
949 tuple<string>("NATURAL", "PARMETIS"),
950 tuple<string>("Natural ordering",
951 "ParMETIS ordering on A^T + A"),
952 tuple<SLUD::colperm_t>(SLUD::NATURAL,
953 SLUD::PARMETIS),
954 pl.getRawPtr());
955
956 pl->set("IsContiguous", true, "Whether GIDs contiguous");
957 pl->set("DebugLevel", 0, "Message levels for debuging");
958
959 valid_params = pl;
960 }
961
962 return valid_params;
963 }
964
965
966 template <class Matrix, class Vector>
967 void
969 SLUD::int_t& nprow,
970 SLUD::int_t& npcol) const {
971 TEUCHOS_TEST_FOR_EXCEPTION( nprocs < 1,
972 std::invalid_argument,
973 "Number of MPI processes must be at least 1" );
974 SLUD::int_t c, r = 1;
975 while( r*r <= nprocs ) r++;
976 nprow = npcol = --r; // fall back to square grid
977 c = nprocs / r;
978 while( (r--)*c != nprocs ){
979 c = nprocs / r; // note integer division
980 }
981 ++r;
982 // prefer the square grid over a single row (which will only happen
983 // in the case of a prime nprocs
984 if( r > 1 || nprocs < 9){ // nprocs < 9 is a heuristic for the small cases
985 nprow = r;
986 npcol = c;
987 }
988 }
989
990
991 template <class Matrix, class Vector>
992 bool
994 // Extract the necessary information from mat and call SLU function
995 using Teuchos::Array;
996 using Teuchos::ArrayView;
997 using Teuchos::ptrInArg;
998 using Teuchos::as;
999
1000 using SLUD::int_t;
1001 const int nprow = data_.grid.nprow;
1002 const int npcol = data_.grid.npcol;
1003
1004#ifdef HAVE_AMESOS2_TIMERS
1005 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
1006#endif
1007
1008 // Cleanup old store memory if it's non-NULL
1009 if( data_.A.Store != NULL ){
1010 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
1011 data_.A.Store = NULL;
1012 }
1013
1014 // is_contiguous : input is contiguous
1015 int_t nnz_ret = 0;
1016 int_t l_nnz, l_rows, g_rows, g_cols, fst_global_row;
1017 if (!is_contiguous_ && numProcs == nprow*npcol) {
1018#ifdef HAVE_AMESOS2_TIMERS
1019 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
1020#endif
1021 // reinex GIDs
1022 superlu_rowmap_ = this->matrixA_->getRowMap(); // use original map to redistribute vectors in solve
1023 Teuchos::RCP<const MatrixAdapter<Matrix> > contig_mat = this->matrixA_->reindex(superlu_contig_rowmap_, superlu_contig_colmap_, current_phase);
1024 l_nnz = as<int_t>(contig_mat->getLocalNNZ());
1025 l_rows = as<int_t>(contig_mat->getLocalNumRows());
1026 g_rows = as<int_t>(contig_mat->getGlobalNumRows());
1027 g_cols = g_rows; // we deal with square matrices
1028 fst_global_row = as<int_t>(superlu_contig_rowmap_->getMinGlobalIndex());
1029
1030 // fill arrays
1031 if (current_phase == PREORDERING)
1032 {
1033 Kokkos::resize(nzvals_temp_, l_nnz);
1034 Kokkos::resize(nzvals_view_, l_nnz);
1035 Kokkos::resize(colind_view_, l_nnz);
1036 Kokkos::resize(rowptr_view_, l_rows + 1);
1037 }
1039 host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get(
1040 contig_mat.ptr(),
1041 nzvals_temp_, colind_view_, rowptr_view_,
1042 nnz_ret,
1043 ptrInArg(*(contig_mat->getRowMap())),
1046 Kokkos::deep_copy(nzvals_view_, nzvals_temp_);
1047 } else {
1048#ifdef HAVE_AMESOS2_TIMERS
1049 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
1050#endif
1051 Teuchos::RCP<const MatrixAdapter<Matrix> > redist_mat
1052 = this->matrixA_->get(ptrInArg(*superlu_rowmap_));
1053
1054 // same as A's target
1055 superlu_contig_rowmap_ = superlu_rowmap_;
1056
1057 l_nnz = as<int_t>(redist_mat->getLocalNNZ());
1058 l_rows = as<int_t>(redist_mat->getLocalNumRows());
1059 g_rows = as<int_t>(redist_mat->getGlobalNumRows());
1060 g_cols = g_rows; // we deal with square matrices
1061 fst_global_row = as<int_t>(superlu_rowmap_->getMinGlobalIndex());
1062
1063 Kokkos::resize(nzvals_view_, l_nnz);
1064 Kokkos::resize(colind_view_, l_nnz);
1065 Kokkos::resize(rowptr_view_, l_rows + 1);
1066 {
1068 host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get(
1069 redist_mat.ptr(),
1070 nzvals_view_, colind_view_, rowptr_view_,
1071 nnz_ret,
1072 ptrInArg(*superlu_rowmap_),
1073 ROOTED,
1074 ARBITRARY);
1075 }
1076 }
1077
1078 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != l_nnz,
1079 std::runtime_error,
1080 "Did not get the expected number of non-zero vals ("
1081 +std::to_string(nnz_ret)+" vs "+std::to_string(l_nnz)+")");
1082
1083 // Get the SLU data type for this type of matrix
1084 SLUD::Dtype_t dtype = type_map::dtype;
1085
1086 if( in_grid_ ){
1087 function_map::create_CompRowLoc_Matrix(&(data_.A),
1088 g_rows, g_cols,
1089 l_nnz, l_rows, fst_global_row,
1090 nzvals_view_.data(),
1091 colind_view_.data(),
1092 rowptr_view_.data(),
1093 SLUD::SLU_NR_loc,
1094 dtype, SLUD::SLU_GE);
1095 }
1096
1097 return true;
1098}
1099
1100
1101 template<class Matrix, class Vector>
1102 const char* Superludist<Matrix,Vector>::name = "SuperLU_DIST";
1103
1104
1105} // end namespace Amesos2
1106
1107#endif // AMESOS2_SUPERLUDIST_DEF_HPP
Provides definition of SuperLU_DIST types as well as conversions and type traits.
@ DISTRIBUTED_NO_OVERLAP
Definition Amesos2_TypeDecl.hpp:91
@ ROOTED
Definition Amesos2_TypeDecl.hpp:93
@ ARBITRARY
Definition Amesos2_TypeDecl.hpp:109
@ SORTED_INDICES
Definition Amesos2_TypeDecl.hpp:108
Utility functions for Amesos2.
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers.
Definition Amesos2_SolverCore_decl.hpp:72
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Return a const parameter list of all of the valid parameters that this->setParameterList(....
Definition Amesos2_SolverCore_def.hpp:537
super_type & setParameters(const Teuchos::RCP< Teuchos::ParameterList > &parameterList) override
Set/update internal variables and solver options.
Definition Amesos2_SolverCore_def.hpp:505
Teuchos::RCP< const MatrixAdapter< Matrix > > matrixA_
The LHS operator.
Definition Amesos2_SolverCore_decl.hpp:421
global_size_type globalNumCols_
Number of global columns in matrixA_.
Definition Amesos2_SolverCore_decl.hpp:445
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns a pointer to the Teuchos::Comm communicator with this operator.
Definition Amesos2_SolverCore_decl.hpp:329
global_size_type globalNumRows_
Number of global rows in matrixA_.
Definition Amesos2_SolverCore_decl.hpp:442
Amesos2 interface to the distributed memory version of SuperLU.
Definition Amesos2_Superludist_decl.hpp:57
~Superludist()
Destructor.
Definition Amesos2_Superludist_def.hpp:209
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition Amesos2_Superludist_def.hpp:792
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition Amesos2_Superludist_def.hpp:801
int numericFactorization_impl()
SuperLU_DIST specific numeric factorization.
Definition Amesos2_Superludist_def.hpp:472
Teuchos::RCP< const map_type > superlu_rowmap_
Maps rows of the matrix to processors in the SuperLU_DIST processor grid.
Definition Amesos2_Superludist_decl.hpp:314
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal solver structures.
Definition Amesos2_Superludist_def.hpp:993
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition Amesos2_Superludist_def.hpp:359
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
SuperLU_DIST specific solve.
Definition Amesos2_Superludist_def.hpp:626
int myRank
true if this processor is in SuperLU_DISTS's 2D process grid
Definition Amesos2_Superludist_decl.hpp:306
void get_default_grid_size(int nprocs, SLUD::int_t &nprow, SLUD::int_t &npcol) const
Definition Amesos2_Superludist_def.hpp:968
Superludist(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition Amesos2_Superludist_def.hpp:35
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using SuperLU_DIST.
Definition Amesos2_Superludist_def.hpp:420
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition Amesos2_Superludist_def.hpp:881
void computeRowPermutationLargeDiagMC64(SLUD::SuperMatrix &GA)
Compute the row permutation for option LargeDiag-MC64.
Definition Amesos2_Superludist_def.hpp:304
EPhase
Used to indicate a phase in the direct solution.
Definition Amesos2_TypeDecl.hpp:31
A templated MultiVector class adapter for Amesos2.
Definition Amesos2_MultiVecAdapter_decl.hpp:142
Helper class for getting 1-D copies of multivectors.
Definition Amesos2_MultiVecAdapter_decl.hpp:233
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition Amesos2_Util.hpp:600
Helper class for putting 1-D data arrays into multivectors.
Definition Amesos2_MultiVecAdapter_decl.hpp:339