Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_Superlu_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Amesos2: Templated Direct Sparse Solver Package
4//
5// Copyright 2011 NTESS and the Amesos2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
19#ifndef AMESOS2_SUPERLU_DEF_HPP
20#define AMESOS2_SUPERLU_DEF_HPP
21
22#include <Teuchos_Tuple.hpp>
23#include <Teuchos_ParameterList.hpp>
24#include <Teuchos_StandardParameterEntryValidators.hpp>
25
28
29#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
30// TODO: This 'using namespace SLU' is not a good thing.
31// It was added because kernels does not namespace SuperLU but Amesos2 does.
32// Declaring the namespace SLU allows us to reuse that file without duplication.
33// We need to determine a uniform system to avoid this this but for now, at least
34// this issue is only present when TRSV is enabled.
35using namespace SLU;
36#include "KokkosSparse_sptrsv_superlu.hpp"
37#endif
38
39namespace Amesos2 {
40
41
42template <class Matrix, class Vector>
44 Teuchos::RCP<const Matrix> A,
45 Teuchos::RCP<Vector> X,
46 Teuchos::RCP<const Vector> B )
47 : SolverCore<Amesos2::Superlu,Matrix,Vector>(A, X, B)
48#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
49 , sptrsv_invert_diag_(true)
50 , sptrsv_invert_offdiag_ (false)
51 , sptrsv_u_in_csr_ (true)
52 , sptrsv_merge_supernodes_ (false)
53 , sptrsv_use_spmv_ (false)
54#endif
55 , is_contiguous_(true) // default is set by params
56 , use_triangular_solves_(false) // default is set by params
57 , use_metis_(false)
58 , symmetrize_metis_(true)
59{
60 // ilu_set_default_options is called later in set parameter list if required.
61 // This is not the ideal way, but the other option to always call
62 // ilu_set_default_options here and assuming it won't have any side effect
63 // in the TPL is more dangerous. It is not a good idea to rely on external
64 // libraries' internal "features".
65 SLU::set_default_options(&(data_.options));
66 // Override some default options
67 data_.options.PrintStat = SLU::NO;
68
69 SLU::StatInit(&(data_.stat));
70
71 Kokkos::resize(data_.perm_r, this->globalNumRows_);
72 Kokkos::resize(data_.perm_c, this->globalNumCols_);
73 Kokkos::resize(data_.etree, this->globalNumCols_);
74 Kokkos::resize(data_.R, this->globalNumRows_);
75 Kokkos::resize(data_.C, this->globalNumCols_);
76
77 data_.relax = SLU::sp_ienv(2); // Query optimal relax param from superlu
78 data_.panel_size = SLU::sp_ienv(1); // Query optimal panel size
79
80 data_.equed = 'N'; // No equilibration
81 data_.rowequ = false;
82 data_.colequ = false;
83 data_.A.Store = NULL;
84 data_.L.Store = NULL;
85 data_.U.Store = NULL;
86 data_.X.Store = NULL;
87 data_.B.Store = NULL;
88
89 ILU_Flag_=false; // default: turn off ILU
90}
91
92
93template <class Matrix, class Vector>
95{
96 /* Free Superlu data_types
97 * - Matrices
98 * - Vectors
99 * - Stat object
100 */
101 SLU::StatFree( &(data_.stat) ) ;
102
103 // Storage is initialized in numericFactorization_impl()
104 if ( data_.A.Store != NULL ){
105 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
106 }
107
108 // only root allocated these SuperMatrices.
109 if ( data_.L.Store != NULL ){ // will only be true for this->root_
110 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
111 SLU::Destroy_CompCol_Matrix( &(data_.U) );
112 }
113}
114
115template <class Matrix, class Vector>
116std::string
118{
119 std::ostringstream oss;
120 oss << "SuperLU solver interface";
121 if (ILU_Flag_) {
122 oss << ", \"ILUTP\" : {";
123 oss << "drop tol = " << data_.options.ILU_DropTol;
124 oss << ", fill factor = " << data_.options.ILU_FillFactor;
125 oss << ", fill tol = " << data_.options.ILU_FillTol;
126 switch(data_.options.ILU_MILU) {
127 case SLU::SMILU_1 :
128 oss << ", MILU 1";
129 break;
130 case SLU::SMILU_2 :
131 oss << ", MILU 2";
132 break;
133 case SLU::SMILU_3 :
134 oss << ", MILU 3";
135 break;
136 case SLU::SILU :
137 default:
138 oss << ", regular ILU";
139 }
140 switch(data_.options.ILU_Norm) {
141 case SLU::ONE_NORM :
142 oss << ", 1-norm";
143 break;
144 case SLU::TWO_NORM :
145 oss << ", 2-norm";
146 break;
147 case SLU::INF_NORM :
148 default:
149 oss << ", infinity-norm";
150 }
151 oss << "}";
152 } else {
153 oss << ", direct solve";
154 }
155 return oss.str();
156 /*
157
158 // ILU parameters
159 if( parameterList->isParameter("RowPerm") ){
160 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
161 parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
162 data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
163 }
164
165
166 */
167}
168
169template<class Matrix, class Vector>
170int
172{
173 /*
174 * Get column permutation vector perm_c[], according to permc_spec:
175 * permc_spec = NATURAL: natural ordering
176 * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
177 * permc_spec = MMD_ATA: minimum degree on structure of A'*A
178 * permc_spec = COLAMD: approximate minimum degree column ordering
179 * permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
180 */
181 int permc_spec = data_.options.ColPerm;
182 if (!use_metis_ && permc_spec != SLU::MY_PERMC && this->root_ ){
183#ifdef HAVE_AMESOS2_TIMERS
184 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
185#endif
186
187 SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.data());
188 }
189
190 return(0);
191}
192
193
194template <class Matrix, class Vector>
195int
197{
198 /*
199 * SuperLU performs symbolic factorization and numeric factorization
200 * together, but does leave some options for reusing symbolic
201 * structures that have been created on previous factorizations. If
202 * our Amesos2 user calls this function, that is an indication that
203 * the symbolic structure of the matrix is no longer valid, and
204 * SuperLU should do the factorization from scratch.
205 *
206 * This can be accomplished by setting the options.Fact flag to
207 * DOFACT, as well as setting our own internal flag to false.
208 */
209#ifdef HAVE_AMESOS2_TIMERS
210 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
211#endif
212 same_symbolic_ = false;
213 data_.options.Fact = SLU::DOFACT;
214
215 if (use_metis_) {
216#ifdef HAVE_AMESOS2_TIMERS
217 Teuchos::RCP< Teuchos::Time > MetisTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for METIS");
218 Teuchos::TimeMonitor numFactTimer(*MetisTimer_);
219#endif
220 size_t n = this->globalNumRows_;
221 size_t nz = this->globalNumNonZeros_;
222 host_size_type_array host_rowind_view_ = host_rows_view_;
223 host_ordinal_type_array host_colptr_view_ = host_col_ptr_view_;
224
225 /* symmetrize the matrix (A + A') if needed */
226 if (symmetrize_metis_) {
227 int new_nz = 0;
228 int *new_col_ptr;
229 int *new_row_ind;
230
231 // NOTE: both size_type and ordinal_type are defined as int
232 // Consider using "symmetrize_graph" in KokkosKernels, if this becomes significant in time.
233 SLU::at_plus_a(n, nz, host_col_ptr_view_.data(), host_rows_view_.data(),
234 &new_nz, &new_col_ptr, &new_row_ind);
235
236 // reallocate (so that we won't overwrite the original)
237 // and copy for output
238 nz = new_nz;
239 Kokkos::resize (host_rowind_view_, nz);
240 Kokkos::realloc(host_colptr_view_, n+1);
241 for (size_t i = 0; i <= n; i++) {
242 host_colptr_view_(i) = new_col_ptr[i];
243 }
244 for (size_t i = 0; i < nz; i++) {
245 host_rowind_view_(i) = new_row_ind[i];
246 }
247
248 // free
249 SLU::SUPERLU_FREE(new_col_ptr);
250 if (new_nz > 0) {
251 SLU::SUPERLU_FREE(new_row_ind);
252 }
253 }
254
255 // reorder will convert both graph and perm/iperm to the internal METIS integer type
256 using metis_int_array_type = host_ordinal_type_array;
257 metis_int_array_type metis_perm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing("metis_perm"), n);
258 metis_int_array_type metis_iperm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing("metis_iperm"), n);
259
260 // call METIS
261 size_t new_nnz = 0;
262 Amesos2::Util::reorder(
263 host_colptr_view_, host_rowind_view_,
264 metis_perm, metis_iperm, new_nnz, false);
265
266 for (size_t i = 0; i < n; i++) {
267 data_.perm_r(i) = metis_iperm(i);
268 data_.perm_c(i) = metis_iperm(i);
269 }
270
271 // SLU will use user-specified METIS ordering
272 data_.options.ColPerm = SLU::MY_PERMC;
273 data_.options.RowPerm = SLU::MY_PERMR;
274 // Turn off Equil not to mess up METIS ordering?
275 //data_.options.Equil = SLU::NO;
276 }
277 return(0);
278}
279
280
281template <class Matrix, class Vector>
282int
284{
285 using Teuchos::as;
286
287 // Cleanup old L and U matrices if we are not reusing a symbolic
288 // factorization. Stores and other data will be allocated in gstrf.
289 // Only rank 0 has valid pointers
290 if ( !same_symbolic_ && data_.L.Store != NULL ){
291 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
292 SLU::Destroy_CompCol_Matrix( &(data_.U) );
293 data_.L.Store = NULL;
294 data_.U.Store = NULL;
295 }
296
297 if( same_symbolic_ ) data_.options.Fact = SLU::SamePattern_SameRowPerm;
298
299 int info = 0;
300 int info2 = 0; // for Equil
301 if ( this->root_ ) {
302
303#ifdef HAVE_AMESOS2_DEBUG
304 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.ncol != as<int>(this->globalNumCols_),
305 std::runtime_error,
306 "Error in converting to SuperLU SuperMatrix: wrong number of global columns." );
307 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.nrow != as<int>(this->globalNumRows_),
308 std::runtime_error,
309 "Error in converting to SuperLU SuperMatrix: wrong number of global rows." );
310#endif
311
312 if( data_.options.Equil == SLU::YES ){
313 magnitude_type rowcnd, colcnd, amax;
314
315 // calculate row and column scalings
316 function_map::gsequ(&(data_.A), data_.R.data(),
317 data_.C.data(), &rowcnd, &colcnd,
318 &amax, &info2);
319
320 if (info2 == 0) {
321 // apply row and column scalings if necessary
322 function_map::laqgs(&(data_.A), data_.R.data(),
323 data_.C.data(), rowcnd, colcnd,
324 amax, &(data_.equed));
325
326 // check what types of equilibration was actually done
327 data_.rowequ = (data_.equed == 'R') || (data_.equed == 'B');
328 data_.colequ = (data_.equed == 'C') || (data_.equed == 'B');
329 }
330 }
331
332 if (info2 == 0) {
333 // Apply the column permutation computed in preOrdering. Place the
334 // column-permuted matrix in AC
335 SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.data(),
336 data_.etree.data(), &(data_.AC));
337
338 { // Do factorization
339#ifdef HAVE_AMESOS2_TIMERS
340 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
341#endif
342
343#ifdef HAVE_AMESOS2_VERBOSE_DEBUG
344 std::cout << "Superlu:: Before numeric factorization" << std::endl;
345 std::cout << "nzvals_ : " << nzvals_.toString() << std::endl;
346 std::cout << "rowind_ : " << rowind_.toString() << std::endl;
347 std::cout << "colptr_ : " << colptr_.toString() << std::endl;
348#endif
349
350 if(ILU_Flag_==false) {
351 function_map::gstrf(&(data_.options), &(data_.AC),
352 data_.relax, data_.panel_size, data_.etree.data(),
353 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
354 &(data_.L), &(data_.U),
355#ifdef HAVE_AMESOS2_SUPERLU5_API
356 &(data_.lu),
357#endif
358 &(data_.stat), &info);
359 }
360 else {
361 function_map::gsitrf(&(data_.options), &(data_.AC),
362 data_.relax, data_.panel_size, data_.etree.data(),
363 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
364 &(data_.L), &(data_.U),
365#ifdef HAVE_AMESOS2_SUPERLU5_API
366 &(data_.lu),
367#endif
368 &(data_.stat), &info);
369 }
370
371 if (data_.options.ConditionNumber == SLU::YES) {
372 char norm[1];
373 if (data_.options.Trans == SLU::NOTRANS) {
374 *(unsigned char *)norm = '1';
375 } else {
376 *(unsigned char *)norm = 'I';
377 }
378
379 data_.anorm = function_map::langs(norm, &(data_.A));
380 function_map::gscon(norm, &(data_.L), &(data_.U),
381 data_.anorm, &(data_.rcond),
382 &(data_.stat), &info);
383 }
384 }
385 // Cleanup. AC data will be alloc'd again for next factorization (if at all)
386 SLU::Destroy_CompCol_Permuted( &(data_.AC) );
387
388 // Set the number of non-zero values in the L and U factors
389 this->setNnzLU( as<size_t>(((SLU::SCformat*)data_.L.Store)->nnz +
390 ((SLU::NCformat*)data_.U.Store)->nnz) );
391 } // end of if (info2 == 0)
392 } // end of if (this->root)
393
394 if( data_.options.Equil == SLU::YES ) { // error check for Equil
395 /* All processes should have the same error code */
396 Teuchos::broadcast(*(this->getComm()), 0, &info2);
397
398 TEUCHOS_TEST_FOR_EXCEPTION
399 (info2 < 0, std::runtime_error,
400 "SuperLU's gsequ function returned with status " << info2 << " < 0."
401 " This means that argument " << (-info2) << " given to the function"
402 " had an illegal value.");
403 if (info2 > 0) {
404 if (info2 <= data_.A.nrow) {
405 TEUCHOS_TEST_FOR_EXCEPTION
406 (true, std::runtime_error, "SuperLU's gsequ function returned with "
407 "info = " << info2 << " > 0, and info <= A.nrow = " << data_.A.nrow
408 << ". This means that row " << info2 << " of A is exactly zero.");
409 }
410 else if (info2 > data_.A.ncol) {
411 TEUCHOS_TEST_FOR_EXCEPTION
412 (true, std::runtime_error, "SuperLU's gsequ function returned with "
413 "info = " << info2 << " > 0, and info > A.ncol = " << data_.A.ncol
414 << ". This means that column " << (info2 - data_.A.nrow) << " of "
415 "A is exactly zero.");
416 }
417 else {
418 TEUCHOS_TEST_FOR_EXCEPTION
419 (true, std::runtime_error, "SuperLU's gsequ function returned "
420 "with info = " << info2 << " > 0, but its value is not in the "
421 "range permitted by the documentation. This should never happen "
422 "(it appears to be SuperLU's fault).");
423 }
424 }
425 }
426
427 /* All processes should have the same error code */
428 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
429
430 global_size_type info_st = as<global_size_type>(info);
431 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
432 std::runtime_error,
433 "Factorization complete, but matrix is singular. Division by zero eminent");
434 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
435 std::runtime_error,
436 "Memory allocation failure in Superlu factorization");
437
438 data_.options.Fact = SLU::FACTORED;
439 same_symbolic_ = true;
440
441 if(use_triangular_solves_) {
442#ifdef HAVE_AMESOS2_VERBOSE_DEBUG
443 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
444 if (this->getComm()->getRank()) {
445 std::cout << " > Metis : " << (use_metis_ ? "YES" : "NO") << std::endl;
446 std::cout << " > Equil : " << (data_.options.Equil == SLU::YES ? "YES" : "NO") << std::endl;
447 std::cout << " > Cond Number : " << (data_.options.ConditionNumber == SLU::YES ? "YES" : "NO") << std::endl;
448 std::cout << " > Invert diag : " << sptrsv_invert_diag_ << std::endl;
449 std::cout << " > Invert off-diag : " << sptrsv_invert_offdiag_ << std::endl;
450 std::cout << " > U in CSR : " << sptrsv_u_in_csr_ << std::endl;
451 std::cout << " > Merge : " << sptrsv_merge_supernodes_ << std::endl;
452 std::cout << " > Use SpMV : " << sptrsv_use_spmv_ << std::endl;
453 }
454 //std::cout << myRank << " : siize(A) " << (data_.A.nrow) << " x " << (data_.A.ncol) << std::endl;
455 //std::cout << myRank << " : nnz(A) " << ((SLU::NCformat*)data_.A.Store)->nnz << std::endl;
456 //std::cout << myRank << " : nnz(L) " << ((SLU::SCformat*)data_.L.Store)->nnz << std::endl;
457 //std::cout << myRank << " : nnz(U) " << ((SLU::SCformat*)data_.U.Store)->nnz << std::endl;
458 #endif
459#endif
460#ifdef HAVE_AMESOS2_TIMERS
461 Teuchos::RCP< Teuchos::Time > SpTrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv setup");
462 Teuchos::TimeMonitor numFactTimer(*SpTrsvTimer_);
463#endif
464 triangular_solve_factor();
465 }
466
467 return(info);
468}
469
470
471template <class Matrix, class Vector>
472int
474 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
475{
476 using Teuchos::as;
477#ifdef HAVE_AMESOS2_TIMERS
478 Teuchos::RCP< Teuchos::Time > Amesos2SolveTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for Amesos2");
479 Teuchos::TimeMonitor solveTimer(*Amesos2SolveTimer_);
480#endif
481
482 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
483 const size_t nrhs = X->getGlobalNumVectors();
484
485 bool bDidAssignX = false; // will be set below
486 bool bDidAssignB = false; // will be set below
487 { // Get values from RHS B
488#ifdef HAVE_AMESOS2_TIMERS
489 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
490#endif
491
492 // In general we may want to write directly to the x space without a copy.
493 // So we 'get' x which may be a direct view assignment to the MV.
494 const bool initialize_data = true;
495 const bool do_not_initialize_data = false;
496 if(use_triangular_solves_) { // to device
497#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
498 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
499 device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
500 as<size_t>(ld_rhs),
501 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
502 this->rowIndexBase_);
503 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
504 device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
505 as<size_t>(ld_rhs),
506 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
507 this->rowIndexBase_);
508#endif
509 }
510 else { // to host
511 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
512 host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
513 as<size_t>(ld_rhs),
514 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
515 this->rowIndexBase_);
516 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
517 host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
518 as<size_t>(ld_rhs),
519 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
520 this->rowIndexBase_);
521 }
522 }
523
524 // If equilibration was applied at numeric, then gssvx and gsisx are going to
525 // modify B, so we can't use the optimized assignment to B since we'll change
526 // the source test vector and then fail the subsequent cycle. We need a copy.
527 // If bDidAssignB is false, then we skip the copy since it was copied already.
528 if(bDidAssignB && data_.equed != 'N') {
529 if(use_triangular_solves_) { // to device
530#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
531 device_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
532 device_bValues_.extent(0), device_bValues_.extent(1));
533 Kokkos::deep_copy(copyB, device_bValues_);
534 device_bValues_ = copyB;
535#endif
536 }
537 else {
538 host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
539 host_bValues_.extent(0), host_bValues_.extent(1));
540 Kokkos::deep_copy(copyB, host_bValues_);
541 host_bValues_ = copyB;
542 }
543 }
544
545 int ierr = 0; // returned error code
546
547 magnitude_type rpg, rcond;
548 if ( this->root_ ) {
549 // Note: the values of B and X (after solution) are adjusted
550 // appropriately within gssvx for row and column scalings.
551 { // Do solve!
552#ifdef HAVE_AMESOS2_TIMERS
553 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
554#endif
555
556 if(use_triangular_solves_) {
557 triangular_solve();
558 }
559 else {
560 Kokkos::resize(data_.ferr, nrhs);
561 Kokkos::resize(data_.berr, nrhs);
562
563 {
564 #ifdef HAVE_AMESOS2_TIMERS
565 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
566 #endif
567 SLU::Dtype_t dtype = type_map::dtype;
568 int i_ld_rhs = as<int>(ld_rhs);
569 function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
570 convert_bValues_, host_bValues_, i_ld_rhs,
571 SLU::SLU_DN, dtype, SLU::SLU_GE);
572 function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
573 convert_xValues_, host_xValues_, i_ld_rhs,
574 SLU::SLU_DN, dtype, SLU::SLU_GE);
575 }
576
577 if(ILU_Flag_==false) {
578 function_map::gssvx(&(data_.options), &(data_.A),
579 data_.perm_c.data(), data_.perm_r.data(),
580 data_.etree.data(), &(data_.equed), data_.R.data(),
581 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
582 &(data_.X), &rpg, &rcond, data_.ferr.data(),
583 data_.berr.data(),
584 #ifdef HAVE_AMESOS2_SUPERLU5_API
585 &(data_.lu),
586 #endif
587 &(data_.mem_usage), &(data_.stat), &ierr);
588 }
589 else {
590 function_map::gsisx(&(data_.options), &(data_.A),
591 data_.perm_c.data(), data_.perm_r.data(),
592 data_.etree.data(), &(data_.equed), data_.R.data(),
593 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
594 &(data_.X), &rpg, &rcond,
595 #ifdef HAVE_AMESOS2_SUPERLU5_API
596 &(data_.lu),
597 #endif
598 &(data_.mem_usage), &(data_.stat), &ierr);
599 }
600
601 // Cleanup X and B stores
602 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
603 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
604 data_.X.Store = NULL;
605 data_.B.Store = NULL;
606 }
607
608 } // do solve
609
610 // convert back to Kokkos views
611 {
612#ifdef HAVE_AMESOS2_TIMERS
613 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
614#endif
615 function_map::convert_back_Dense_Matrix(convert_bValues_, host_bValues_);
616 function_map::convert_back_Dense_Matrix(convert_xValues_, host_xValues_);
617 }
618
619 // Cleanup X and B stores
620 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
621 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
622 data_.X.Store = NULL;
623 data_.B.Store = NULL;
624 }
625
626 /* All processes should have the same error code */
627 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
628
629 global_size_type ierr_st = as<global_size_type>(ierr);
630 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
631 std::invalid_argument,
632 "Argument " << -ierr << " to SuperLU xgssvx had illegal value" );
633 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st <= this->globalNumCols_,
634 std::runtime_error,
635 "Factorization complete, but U is exactly singular" );
636 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st > this->globalNumCols_ + 1,
637 std::runtime_error,
638 "SuperLU allocated " << ierr - this->globalNumCols_ << " bytes of "
639 "memory before allocation failure occured." );
640
641 /* Update X's global values */
642
643 // if bDidAssignX, then we solved straight to the adapter's X memory space without
644 // requiring additional memory allocation, so the x data is already in place.
645 if(!bDidAssignX) {
646#ifdef HAVE_AMESOS2_TIMERS
647 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
648#endif
649
650 if(use_triangular_solves_) { // to device
651#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
652 Util::put_1d_data_helper_kokkos_view<
653 MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, device_xValues_,
654 as<size_t>(ld_rhs),
655 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
656 this->rowIndexBase_);
657#endif
658 }
659 else {
660 Util::put_1d_data_helper_kokkos_view<
661 MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, host_xValues_,
662 as<size_t>(ld_rhs),
663 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
664 this->rowIndexBase_);
665 }
666 }
667
668 return(ierr);
669}
670
671
672template <class Matrix, class Vector>
673bool
675{
676 // The Superlu factorization routines can handle square as well as
677 // rectangular matrices, but Superlu can only apply the solve routines to
678 // square matrices, so we check the matrix for squareness.
679 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
680}
681
682
683template <class Matrix, class Vector>
684void
685Superlu<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
686{
687 using Teuchos::RCP;
688 using Teuchos::getIntegralValue;
689 using Teuchos::ParameterEntryValidator;
690
691 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
692
693 ILU_Flag_ = parameterList->get<bool>("ILU_Flag",false);
694 if (ILU_Flag_) {
695 SLU::ilu_set_default_options(&(data_.options));
696 // Override some default options
697 data_.options.PrintStat = SLU::NO;
698 }
699
700 data_.options.Trans = this->control_.useTranspose_ ? SLU::TRANS : SLU::NOTRANS;
701 // The SuperLU transpose option can override the Amesos2 option
702 if( parameterList->isParameter("Trans") ){
703 RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry("Trans").validator();
704 parameterList->getEntry("Trans").setValidator(trans_validator);
705
706 data_.options.Trans = getIntegralValue<SLU::trans_t>(*parameterList, "Trans");
707 }
708
709 if( parameterList->isParameter("IterRefine") ){
710 RCP<const ParameterEntryValidator> refine_validator = valid_params->getEntry("IterRefine").validator();
711 parameterList->getEntry("IterRefine").setValidator(refine_validator);
712
713 data_.options.IterRefine = getIntegralValue<SLU::IterRefine_t>(*parameterList, "IterRefine");
714 }
715
716 if( parameterList->isParameter("ColPerm") ){
717 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
718 parameterList->getEntry("ColPerm").setValidator(colperm_validator);
719
720 data_.options.ColPerm = getIntegralValue<SLU::colperm_t>(*parameterList, "ColPerm");
721 }
722
723 data_.options.DiagPivotThresh = parameterList->get<double>("DiagPivotThresh", 1.0);
724
725 bool equil = parameterList->get<bool>("Equil", true);
726 data_.options.Equil = equil ? SLU::YES : SLU::NO;
727
728 bool condNum = parameterList->get<bool>("ConditionNumber", false);
729 data_.options.ConditionNumber = condNum ? SLU::YES : SLU::NO;
730
731 bool symmetric_mode = parameterList->get<bool>("SymmetricMode", false);
732 data_.options.SymmetricMode = symmetric_mode ? SLU::YES : SLU::NO;
733
734 // ILU parameters
735 if( parameterList->isParameter("RowPerm") ){
736 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
737 parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
738 data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
739 }
740
741 /*if( parameterList->isParameter("ILU_DropRule") ) {
742 RCP<const ParameterEntryValidator> droprule_validator = valid_params->getEntry("ILU_DropRule").validator();
743 parameterList->getEntry("ILU_DropRule").setValidator(droprule_validator);
744 data_.options.ILU_DropRule = getIntegralValue<SLU::rule_t>(*parameterList, "ILU_DropRule");
745 }*/
746
747 data_.options.ILU_DropTol = parameterList->get<double>("ILU_DropTol", 0.0001);
748
749 data_.options.ILU_FillFactor = parameterList->get<double>("ILU_FillFactor", 10.0);
750
751 if( parameterList->isParameter("ILU_Norm") ) {
752 RCP<const ParameterEntryValidator> norm_validator = valid_params->getEntry("ILU_Norm").validator();
753 parameterList->getEntry("ILU_Norm").setValidator(norm_validator);
754 data_.options.ILU_Norm = getIntegralValue<SLU::norm_t>(*parameterList, "ILU_Norm");
755 }
756
757 if( parameterList->isParameter("ILU_MILU") ) {
758 RCP<const ParameterEntryValidator> milu_validator = valid_params->getEntry("ILU_MILU").validator();
759 parameterList->getEntry("ILU_MILU").setValidator(milu_validator);
760 data_.options.ILU_MILU = getIntegralValue<SLU::milu_t>(*parameterList, "ILU_MILU");
761 }
762
763 data_.options.ILU_FillTol = parameterList->get<double>("ILU_FillTol", 0.01);
764
765 is_contiguous_ = parameterList->get<bool>("IsContiguous", true);
766
767 use_metis_ = parameterList->get<bool>("UseMetis", false);
768 symmetrize_metis_ = parameterList->get<bool>("SymmetrizeMetis", true);
769
770 use_triangular_solves_ = parameterList->get<bool>("Enable_KokkosKernels_TriangularSolves", false);
771 if(use_triangular_solves_) {
772#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
773 // specify whether to invert diagonal blocks
774 sptrsv_invert_diag_ = parameterList->get<bool>("SpTRSV_Invert_Diag", true);
775 // specify whether to apply diagonal-inversion to off-diagonal blocks (optional, default is false)
776 sptrsv_invert_offdiag_ = parameterList->get<bool>("SpTRSV_Invert_OffDiag", false);
777 // specify whether to store U in CSR format
778 sptrsv_u_in_csr_ = parameterList->get<bool>("SpTRSV_U_CSR", true);
779 // specify whether to merge supernodes with similar sparsity structures
780 sptrsv_merge_supernodes_ = parameterList->get<bool>("SpTRSV_Merge_Supernodes", false);
781 // specify whether to use spmv for sptrsv
782 sptrsv_use_spmv_ = parameterList->get<bool>("SpTRSV_Use_SpMV", false);
783#else
784 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
785 "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_SUPERLU were not configured." );
786#endif
787 }
788}
789
790
791template <class Matrix, class Vector>
792Teuchos::RCP<const Teuchos::ParameterList>
794{
795 using std::string;
796 using Teuchos::tuple;
797 using Teuchos::ParameterList;
798 using Teuchos::EnhancedNumberValidator;
799 using Teuchos::setStringToIntegralParameter;
800 using Teuchos::stringToIntegralParameterEntryValidator;
801
802 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
803
804 if( is_null(valid_params) ){
805 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
806
807 setStringToIntegralParameter<SLU::trans_t>("Trans", "NOTRANS",
808 "Solve for the transpose system or not",
809 tuple<string>("TRANS","NOTRANS","CONJ"),
810 tuple<string>("Solve with transpose",
811 "Do not solve with transpose",
812 "Solve with the conjugate transpose"),
813 tuple<SLU::trans_t>(SLU::TRANS,
814 SLU::NOTRANS,
815 SLU::CONJ),
816 pl.getRawPtr());
817
818 setStringToIntegralParameter<SLU::IterRefine_t>("IterRefine", "NOREFINE",
819 "Type of iterative refinement to use",
820 tuple<string>("NOREFINE", "SLU_SINGLE", "SLU_DOUBLE"),
821 tuple<string>("Do not use iterative refinement",
822 "Do single iterative refinement",
823 "Do double iterative refinement"),
824 tuple<SLU::IterRefine_t>(SLU::NOREFINE,
825 SLU::SLU_SINGLE,
826 SLU::SLU_DOUBLE),
827 pl.getRawPtr());
828
829 // Note: MY_PERMC not yet supported
830 setStringToIntegralParameter<SLU::colperm_t>("ColPerm", "COLAMD",
831 "Specifies how to permute the columns of the "
832 "matrix for sparsity preservation",
833 tuple<string>("NATURAL", "MMD_AT_PLUS_A",
834 "MMD_ATA", "COLAMD"),
835 tuple<string>("Natural ordering",
836 "Minimum degree ordering on A^T + A",
837 "Minimum degree ordering on A^T A",
838 "Approximate minimum degree column ordering"),
839 tuple<SLU::colperm_t>(SLU::NATURAL,
840 SLU::MMD_AT_PLUS_A,
841 SLU::MMD_ATA,
842 SLU::COLAMD),
843 pl.getRawPtr());
844
845 Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
846 = Teuchos::rcp( new EnhancedNumberValidator<double>(0.0, 1.0) );
847 pl->set("DiagPivotThresh", 1.0,
848 "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
849 diag_pivot_thresh_validator); // partial pivoting
850
851 pl->set("Equil", true, "Whether to equilibrate the system before solve");
852 pl->set("ConditionNumber", false, "Whether to approximate condition number");
853
854 pl->set("SymmetricMode", false,
855 "Specifies whether to use the symmetric mode. "
856 "Gives preference to diagonal pivots and uses "
857 "an (A^T + A)-based column permutation.");
858
859 // ILU parameters
860
861 setStringToIntegralParameter<SLU::rowperm_t>("RowPerm", "LargeDiag",
862 "Type of row permutation strategy to use",
863 tuple<string>("NOROWPERM","LargeDiag","MY_PERMR"),
864 tuple<string>("Use natural ordering",
865 "Use weighted bipartite matching algorithm",
866 "Use the ordering given in perm_r input"),
867 tuple<SLU::rowperm_t>(SLU::NOROWPERM,
868#if SUPERLU_MAJOR_VERSION > 5 || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION > 2) || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION == 2 && SUPERLU_PATCH_VERSION >= 2)
869 SLU::LargeDiag_MC64,
870#else
871 SLU::LargeDiag,
872#endif
873 SLU::MY_PERMR),
874 pl.getRawPtr());
875
876 /*setStringToIntegralParameter<SLU::rule_t>("ILU_DropRule", "DROP_BASIC",
877 "Type of dropping strategy to use",
878 tuple<string>("DROP_BASIC","DROP_PROWS",
879 "DROP_COLUMN","DROP_AREA",
880 "DROP_DYNAMIC","DROP_INTERP"),
881 tuple<string>("ILUTP(t)","ILUTP(p,t)",
882 "Variant of ILUTP(p,t) for j-th column",
883 "Variant of ILUTP to control memory",
884 "Dynamically adjust threshold",
885 "Compute second dropping threshold by interpolation"),
886 tuple<SLU::rule_t>(SLU::DROP_BASIC,SLU::DROP_PROWS,SLU::DROP_COLUMN,
887 SLU::DROP_AREA,SLU::DROP_DYNAMIC,SLU::DROP_INTERP),
888 pl.getRawPtr());*/
889
890 pl->set("ILU_DropTol", 0.0001, "ILUT drop tolerance");
891
892 pl->set("ILU_FillFactor", 10.0, "ILUT fill factor");
893
894 setStringToIntegralParameter<SLU::norm_t>("ILU_Norm", "INF_NORM",
895 "Type of norm to use",
896 tuple<string>("ONE_NORM","TWO_NORM","INF_NORM"),
897 tuple<string>("1-norm","2-norm","inf-norm"),
898 tuple<SLU::norm_t>(SLU::ONE_NORM,SLU::TWO_NORM,SLU::INF_NORM),
899 pl.getRawPtr());
900
901 setStringToIntegralParameter<SLU::milu_t>("ILU_MILU", "SILU",
902 "Type of modified ILU to use",
903 tuple<string>("SILU","SMILU_1","SMILU_2","SMILU_3"),
904 tuple<string>("Regular ILU","MILU 1","MILU 2","MILU 3"),
905 tuple<SLU::milu_t>(SLU::SILU,SLU::SMILU_1,SLU::SMILU_2,
906 SLU::SMILU_3),
907 pl.getRawPtr());
908
909 pl->set("ILU_FillTol", 0.01, "ILUT fill tolerance");
910
911 pl->set("ILU_Flag", false, "ILU flag: if true, run ILU routines");
912
913 pl->set("IsContiguous", true, "Whether GIDs contiguous");
914
915 // call METIS before SuperLU
916 pl->set("UseMetis", false, "Whether to call METIS before SuperLU");
917 pl->set("SymmetrizeMetis", true, "Whether to symmetrize matrix before METIS");
918
919 pl->set("Enable_KokkosKernels_TriangularSolves", false, "Whether to use triangular solves.");
920#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
921 pl->set("SpTRSV_Invert_Diag", true, "specify whether to invert diagonal blocks for supernodal sparse-trianguular solve");
922 pl->set("SpTRSV_Invert_OffDiag", false, "specify whether to apply diagonal-inversion to off-diagonal blocks for supernodal sparse-trianguular solve");
923 pl->set("SpTRSV_U_CSR", true, "specify whether to store U in CSR forma for supernodal sparse-trianguular solve");
924 pl->set("SpTRSV_Merge_Supernodes", false, "specify whether to merge supernodes with similar sparsity structures for supernodal sparse-trianguular solve");
925 pl->set("SpTRSV_Use_SpMV", false, "specify whether to use spmv for supernodal sparse-trianguular solve");
926#endif
927
928 valid_params = pl;
929 }
930
931 return valid_params;
932}
933
934
935template <class Matrix, class Vector>
936bool
938{
939 using Teuchos::as;
940
941#ifdef HAVE_AMESOS2_TIMERS
942 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
943#endif
944
945 // SuperLU does not need the matrix at symbolicFactorization()
946 if( current_phase == SYMBFACT ) return false;
947
948 // Cleanup old store memory if it's non-NULL (should only ever be non-NULL at root_)
949 if( data_.A.Store != NULL ){
950 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
951 data_.A.Store = NULL;
952 }
953
954 // Only the root image needs storage allocated
955 if( this->root_ ){
956 Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
957 Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
958 Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
959 }
960
961 int nnz_ret = 0;
962 {
963#ifdef HAVE_AMESOS2_TIMERS
964 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
965#endif
966
967 TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
968 std::runtime_error,
969 "Row and column maps have different indexbase ");
970
972 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,
973 host_size_type_array>::do_get(this->matrixA_.ptr(),
974 host_nzvals_view_, host_rows_view_,
975 host_col_ptr_view_, nnz_ret,
976 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
977 ARBITRARY,
978 this->rowIndexBase_);
979 }
980
981 // Get the SLU data type for this type of matrix
982 SLU::Dtype_t dtype = type_map::dtype;
983
984 if( this->root_ ){
985 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
986 std::runtime_error,
987 "Did not get the expected number of non-zero vals");
988
989 function_map::template create_CompCol_Matrix<host_value_type_array>( &(data_.A),
990 this->globalNumRows_, this->globalNumCols_,
991 nnz_ret,
992 convert_nzvals_, host_nzvals_view_,
993 host_rows_view_.data(),
994 host_col_ptr_view_.data(),
995 SLU::SLU_NC, dtype, SLU::SLU_GE);
996 }
997
998 return true;
999}
1000
1001template <class Matrix, class Vector>
1002void
1004{
1005#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1006 size_t ld_rhs = this->matrixA_->getGlobalNumRows();
1007
1008 // convert etree to parents
1009 SLU::SCformat * Lstore = (SLU::SCformat*)(data_.L.Store);
1010 int nsuper = 1 + Lstore->nsuper; // # of supernodal columns
1011 Kokkos::resize(data_.parents, nsuper);
1012 for (int s = 0; s < nsuper; s++) {
1013 int j = Lstore->sup_to_col[s+1]-1; // the last column index of this supernode
1014 if (data_.etree[j] == static_cast<int>(ld_rhs)) {
1015 data_.parents(s) = -1;
1016 } else {
1017 data_.parents(s) = Lstore->col_to_sup[data_.etree[j]];
1018 }
1019 }
1020
1021 deep_copy_or_assign_view(device_trsv_perm_r_, data_.perm_r); // will use device to permute
1022 deep_copy_or_assign_view(device_trsv_perm_c_, data_.perm_c); // will use device to permute
1023 if (data_.options.Equil == SLU::YES) {
1024 deep_copy_or_assign_view(device_trsv_R_, data_.R); // will use device to scale
1025 deep_copy_or_assign_view(device_trsv_C_, data_.C); // will use device to scale
1026 }
1027
1028 bool condition_flag = false;
1029 if (data_.options.ConditionNumber == SLU::YES) {
1030 using STM = Teuchos::ScalarTraits<magnitude_type>;
1031 const magnitude_type eps = STM::eps ();
1032
1033 SCformat *Lstore2 = (SCformat*)(data_.L.Store);
1034 int nsuperL = 1 + Lstore2->nsuper;
1035 int *nb = Lstore2->sup_to_col;
1036 int max_cols = 0;
1037 for (int i = 0; i < nsuperL; i++) {
1038 if (nb[i+1] - nb[i] > max_cols) {
1039 max_cols = nb[i+1] - nb[i];
1040 }
1041 }
1042
1043 // when rcond is small, it is ill-conditioned and flag is true
1044 const magnitude_type multiply_fact (10.0); // larger the value, more likely flag is true, and no invert
1045 condition_flag = (((double)max_cols * nsuper) * eps * multiply_fact >= data_.rcond);
1046
1047#ifdef HAVE_AMESOS2_VERBOSE_DEBUG
1048 int n = data_.perm_r.extent(0);
1049 std::cout << this->getComm()->getRank()
1050 << " : anorm = " << data_.anorm << ", rcond = " << data_.rcond << ", n = " << n
1051 << ", num super cols = " << nsuper << ", max super cols = " << max_cols
1052 << " -> " << ((double)max_cols * nsuper) * eps / data_.rcond
1053 << (((double)max_cols * nsuper) * eps * multiply_fact < data_.rcond ? " (okay)" : " (warn)") << std::endl;
1054#endif
1055 }
1056
1057 // Create handles for U and U^T solves
1058 if (sptrsv_use_spmv_ && !condition_flag) {
1059 device_khL_.create_sptrsv_handle(
1060 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs, true);
1061 device_khU_.create_sptrsv_handle(
1062 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs, false);
1063 } else {
1064 device_khL_.create_sptrsv_handle(
1065 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs, true);
1066 device_khU_.create_sptrsv_handle(
1067 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs, false);
1068 }
1069
1070 // specify whether to store U in CSR format
1071 device_khU_.set_sptrsv_column_major (!sptrsv_u_in_csr_);
1072
1073 // specify whether to merge supernodes with similar sparsity structure
1074 device_khL_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1075 device_khU_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1076
1077 // invert only if flag is not on
1078 bool sptrsv_invert_diag = (!condition_flag && sptrsv_invert_diag_);
1079 bool sptrsv_invert_offdiag = (!condition_flag && sptrsv_invert_offdiag_);
1080
1081 // specify whether to invert diagonal blocks
1082 device_khL_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1083 device_khU_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1084
1085 // specify wheather to apply diagonal-inversion to off-diagonal blocks (optional, default is false)
1086 device_khL_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1087 device_khU_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1088
1089 // set etree
1090 device_khL_.set_sptrsv_etree(data_.parents.data());
1091 device_khU_.set_sptrsv_etree(data_.parents.data());
1092
1093 // set permutation
1094 device_khL_.set_sptrsv_perm(data_.perm_r.data());
1095 device_khU_.set_sptrsv_perm(data_.perm_c.data());
1096
1097 int block_size = -1; // TODO: add needed params
1098 if (block_size >= 0) {
1099 std::cout << " Block Size : " << block_size << std::endl;
1100 device_khL_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1101 device_khU_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1102 }
1103
1104 // Do symbolic analysis
1105 {
1106#ifdef HAVE_AMESOS2_TIMERS
1107 Teuchos::RCP< Teuchos::Time > SymboTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv symbolic");
1108 Teuchos::TimeMonitor numFactTimer(*SymboTimer_);
1109#endif
1110 KokkosSparse::Experimental::sptrsv_symbolic
1111 (&device_khL_, &device_khU_, data_.L, data_.U);
1112 }
1113
1114 // Do numerical compute
1115 {
1116#ifdef HAVE_AMESOS2_TIMERS
1117 Teuchos::RCP< Teuchos::Time > NumerTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv numeric");
1118 Teuchos::TimeMonitor numFactTimer(*NumerTimer_);
1119#endif
1120 KokkosSparse::Experimental::sptrsv_compute
1121 (&device_khL_, &device_khU_, data_.L, data_.U);
1122 }
1123#endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1124}
1125
1126template <class Matrix, class Vector>
1127void
1128Superlu<Matrix,Vector>::triangular_solve() const
1129{
1130#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1131 size_t ld_rhs = device_xValues_.extent(0);
1132 size_t nrhs = device_xValues_.extent(1);
1133
1134 Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
1135 Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
1136
1137 // forward pivot
1138 auto local_device_bValues = device_bValues_;
1139 auto local_device_trsv_perm_r = device_trsv_perm_r_;
1140 auto local_device_trsv_rhs = device_trsv_rhs_;
1141
1142 if (data_.rowequ) {
1143 auto local_device_trsv_R = device_trsv_R_;
1144 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1145 KOKKOS_LAMBDA(size_t j) {
1146 for(size_t k = 0; k < nrhs; ++k) {
1147 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_trsv_R(j) * local_device_bValues(j,k);
1148 }
1149 });
1150 } else {
1151 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1152 KOKKOS_LAMBDA(size_t j) {
1153 for(size_t k = 0; k < nrhs; ++k) {
1154 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_bValues(j,k);
1155 }
1156 });
1157 }
1158
1159 for(size_t k = 0; k < nrhs; ++k) { // sptrsv_solve does not batch
1160 auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
1161 auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
1162
1163 // do L solve= - numeric (only rhs is modified) on the default device/host space
1164 {
1165#ifdef HAVE_AMESOS2_TIMERS
1166 Teuchos::RCP< Teuchos::Time > SpLtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for L solve");
1167 Teuchos::TimeMonitor solveTimer(*SpLtrsvTimer_);
1168#endif
1169 KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
1170 }
1171 // do L^T solve - numeric (only rhs is modified) on the default device/host space
1172 {
1173#ifdef HAVE_AMESOS2_TIMERS
1174 Teuchos::RCP< Teuchos::Time > SpUtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for U solve");
1175 Teuchos::TimeMonitor solveTimer(*SpUtrsvTimer_);
1176#endif
1177 KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
1178 }
1179 } // end loop over rhs vectors
1180
1181 // backward pivot
1182 auto local_device_xValues = device_xValues_;
1183 auto local_device_trsv_perm_c = device_trsv_perm_c_;
1184 if (data_.colequ) {
1185 auto local_device_trsv_C = device_trsv_C_;
1186 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1187 KOKKOS_LAMBDA(size_t j) {
1188 for(size_t k = 0; k < nrhs; ++k) {
1189 local_device_xValues(j,k) = local_device_trsv_C(j) * local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1190 }
1191 });
1192 } else {
1193 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1194 KOKKOS_LAMBDA(size_t j) {
1195 for(size_t k = 0; k < nrhs; ++k) {
1196 local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1197 }
1198 });
1199 }
1200#endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1201}
1202
1203template<class Matrix, class Vector>
1204const char* Superlu<Matrix,Vector>::name = "SuperLU";
1205
1206
1207} // end namespace Amesos2
1208
1209#endif // AMESOS2_SUPERLU_DEF_HPP
Amesos2 Superlu declarations.
@ ROOTED
Definition Amesos2_TypeDecl.hpp:93
@ CONTIGUOUS_AND_ROOTED
Definition Amesos2_TypeDecl.hpp:94
@ ARBITRARY
Definition Amesos2_TypeDecl.hpp:109
A Matrix adapter interface for Amesos2.
Definition Amesos2_MatrixAdapter_decl.hpp:42
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers.
Definition Amesos2_SolverCore_decl.hpp:72
Amesos2 interface to the SuperLU package.
Definition Amesos2_Superlu_decl.hpp:43
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition Amesos2_Superlu_def.hpp:674
int numericFactorization_impl()
Superlu specific numeric factorization.
Definition Amesos2_Superlu_def.hpp:283
std::string description() const override
Returns a short description of this Solver.
Definition Amesos2_Superlu_def.hpp:117
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition Amesos2_Superlu_def.hpp:171
static const char * name
Name of this solver interface.
Definition Amesos2_Superlu_decl.hpp:50
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition Amesos2_Superlu_def.hpp:793
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Superlu specific solve.
Definition Amesos2_Superlu_def.hpp:473
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Superlu.
Definition Amesos2_Superlu_def.hpp:196
~Superlu()
Destructor.
Definition Amesos2_Superlu_def.hpp:94
Superlu(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition Amesos2_Superlu_def.hpp:43
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition Amesos2_Superlu_def.hpp:685
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition Amesos2_Superlu_def.hpp:937
EPhase
Used to indicate a phase in the direct solution.
Definition Amesos2_TypeDecl.hpp:31
A templated MultiVector class adapter for Amesos2.
Definition Amesos2_MultiVecAdapter_decl.hpp:142
A generic helper class for getting a CCS representation of a Matrix.
Definition Amesos2_Util.hpp:633