Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_ShyLUBasker_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Amesos2: Templated Direct Sparse Solver Package
4//
5// Copyright 2011 NTESS and the Amesos2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
20#ifndef AMESOS2_SHYLUBASKER_DEF_HPP
21#define AMESOS2_SHYLUBASKER_DEF_HPP
22
23#include <Teuchos_Tuple.hpp>
24#include <Teuchos_ParameterList.hpp>
25#include <Teuchos_StandardParameterEntryValidators.hpp>
26
29
30namespace Amesos2 {
31
32
33template <class Matrix, class Vector>
34ShyLUBasker<Matrix,Vector>::ShyLUBasker(
35 Teuchos::RCP<const Matrix> A,
36 Teuchos::RCP<Vector> X,
37 Teuchos::RCP<const Vector> B )
38 : SolverCore<Amesos2::ShyLUBasker,Matrix,Vector>(A, X, B)
39 , is_contiguous_(true)
40 , use_gather_(true)
41{
42
43 //Nothing
44
45 // Override some default options
46 // TODO: use data_ here to init
47#if defined(HAVE_AMESOS2_KOKKOS)
48 /*
49 static_assert(std::is_same<kokkos_exe,Kokkos::OpenMP>::value,
50 "Kokkos node type not supported by experimental ShyLUBasker Amesos2");
51 */
52 ShyLUbasker = new ::BaskerNS::BaskerTrilinosInterface<local_ordinal_type, shylubasker_dtype, Exe_Space>();
53 ShyLUbasker->Options.no_pivot = BASKER_FALSE;
54 ShyLUbasker->Options.static_delayed_pivot = 0;
55 ShyLUbasker->Options.symmetric = BASKER_FALSE;
56 ShyLUbasker->Options.realloc = BASKER_TRUE;
57 ShyLUbasker->Options.verbose = BASKER_FALSE;
58 ShyLUbasker->Options.prune = BASKER_TRUE;
59 ShyLUbasker->Options.btf_matching = 2; // use cardinary matching from Trilinos, globally
60 ShyLUbasker->Options.blk_matching = 0; // NOT use max-weight matching from Basker on each diagonal block
61 ShyLUbasker->Options.matrix_scaling = 0; // use matrix scaling on a big A block
62 ShyLUbasker->Options.min_block_size = 0; // no merging small blocks
63 ShyLUbasker->Options.amd_dom = BASKER_TRUE; // use block-wise AMD
64 ShyLUbasker->Options.use_metis = BASKER_TRUE; // use scotch/metis for ND (TODO: should METIS optional?)
65 ShyLUbasker->Options.use_nodeNDP = BASKER_TRUE; // use nodeNDP to compute ND partition
66 ShyLUbasker->Options.run_nd_on_leaves = BASKER_TRUE; // run ND on the final leaf-nodes
67 ShyLUbasker->Options.run_amd_on_leaves = BASKER_FALSE; // run AMD on the final leaf-nodes
68 ShyLUbasker->Options.transpose = BASKER_FALSE;
69 ShyLUbasker->Options.threaded_solve = BASKER_FALSE;
70 ShyLUbasker->Options.replace_zero_pivot = BASKER_TRUE;
71 ShyLUbasker->Options.replace_tiny_pivot = BASKER_FALSE;
72 ShyLUbasker->Options.verbose_matrix_out = BASKER_FALSE;
73
74 ShyLUbasker->Options.user_fill = (double)BASKER_FILL_USER;
75 ShyLUbasker->Options.use_sequential_diag_facto = BASKER_FALSE;
76#ifdef KOKKOS_ENABLE_OPENMP // TODO: check for KOKKOS_ENABLE_THREADS when ready
77 num_threads = Kokkos::OpenMP::impl_max_hardware_threads();
78#else
79 num_threads = 1;
80#endif
81 ShyLUbasker->Options.worker_threads = false;
82
83#else
84 TEUCHOS_TEST_FOR_EXCEPTION(1 != 0,
85 std::runtime_error,
86 "Amesos2_ShyLUBasker Exception: Do not have Kokkos enabled for ShyLUBasker");
87#endif
88}
89
90
91template <class Matrix, class Vector>
92ShyLUBasker<Matrix,Vector>::~ShyLUBasker( )
93{
94 /* ShyLUBasker will cleanup its own internal memory*/
95#if defined(HAVE_AMESOS2_KOKKOS)
96 ShyLUbasker->Finalize();
97 delete ShyLUbasker;
98#endif
99}
100
101template <class Matrix, class Vector>
102bool
104 return (this->root_ && (this->matrixA_->getComm()->getSize() == 1) && is_contiguous_);
105}
106
107template<class Matrix, class Vector>
108int
110{
111 /* TODO: Define what it means for ShyLUBasker
112 */
113#ifdef HAVE_AMESOS2_TIMERS
114 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
115#endif
116
117 return(0);
118}
119
120
121template <class Matrix, class Vector>
122int
124{
125
126 int info = 0;
127 if(this->root_)
128 {
129 int nthreads = num_threads;
130 if (ShyLUbasker->Options.worker_threads) {
131 if (nthreads > 1) {
132 // keep one worker-thread / subdomain (where originally subdomain = num_threads)
133 nthreads /= 2;
134 } else {
135 // turn off worker threads if one thread
136 ShyLUbasker->Options.worker_threads = false;
137 }
138 }
139 ShyLUbasker->SetThreads(nthreads);
140
141
142 // NDE: Special case
143 // Rather than going through the Amesos2 machinery to convert the matrixA_ CRS pointer data to CCS and store in Teuchos::Arrays,
144 // in this special case we pass the CRS raw pointers directly to ShyLUBasker which copies+transposes+sorts the data for CCS format
145 // loadA_impl is essentially an empty function in this case, as the raw pointers are handled here and similarly in Symbolic
146
147 if ( single_proc_optimization() ) {
148
149 host_ordinal_type_array sp_rowptr;
150 host_ordinal_type_array sp_colind;
151 // this needs to be checked during loadA_impl...
152 this->matrixA_->returnRowPtr_kokkos_view(sp_rowptr);
153 TEUCHOS_TEST_FOR_EXCEPTION(sp_rowptr.data() == nullptr,
154 std::runtime_error, "Amesos2 Runtime Error: sp_rowptr returned null ");
155 this->matrixA_->returnColInd_kokkos_view(sp_colind);
156 TEUCHOS_TEST_FOR_EXCEPTION(sp_colind.data() == nullptr,
157 std::runtime_error, "Amesos2 Runtime Error: sp_colind returned null ");
158
159 host_value_type_array hsp_values;
160 this->matrixA_->returnValues_kokkos_view(hsp_values);
161 shylubasker_dtype * sp_values = function_map::convert_scalar(hsp_values.data());
162 //shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
163 TEUCHOS_TEST_FOR_EXCEPTION(sp_values == nullptr,
164 std::runtime_error, "Amesos2 Runtime Error: sp_values returned null ");
165
166 // In this case, colptr_, rowind_, nzvals_ are invalid
167 info = ShyLUbasker->Symbolic(this->globalNumRows_,
168 this->globalNumCols_,
169 this->globalNumNonZeros_,
170 sp_rowptr.data(),
171 sp_colind.data(),
172 sp_values,
173 true); // true = _crs_transpose_needed
174
175 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
176 std::runtime_error, "Error in ShyLUBasker Symbolic");
177 }
178 else
179 { //follow original code path if conditions not met
180 // In this case, loadA_impl updates colptr_, rowind_, nzvals_
181 shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
182 info = ShyLUbasker->Symbolic(this->globalNumRows_,
183 this->globalNumCols_,
184 this->globalNumNonZeros_,
185 colptr_view_.data(),
186 rowind_view_.data(),
187 sp_values);
188
189 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
190 std::runtime_error, "Error in ShyLUBasker Symbolic");
191 }
192 } // end if (this->root_)
193 /*No symbolic factoriztion*/
194
195 /* All processes should have the same error code */
196 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
197 return(info);
198}
199
200
201template <class Matrix, class Vector>
202int
204{
205 using Teuchos::as;
206
207 int info = 0;
208 if ( this->root_ ){
209 { // Do factorization
210#ifdef HAVE_AMESOS2_TIMERS
211 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
212#endif
213
214 // NDE: Special case
215 // Rather than going through the Amesos2 machinery to convert the matrixA_ CRS pointer data to CCS and store in Teuchos::Arrays,
216 // in this special case we pass the CRS raw pointers directly to ShyLUBasker which copies+transposes+sorts the data for CCS format
217 // loadA_impl is essentially an empty function in this case, as the raw pointers are handled here and similarly in Symbolic
218
219 if ( single_proc_optimization() ) {
220
221 host_ordinal_type_array sp_rowptr;
222 host_ordinal_type_array sp_colind;
223 this->matrixA_->returnRowPtr_kokkos_view(sp_rowptr);
224 TEUCHOS_TEST_FOR_EXCEPTION(sp_rowptr.data() == nullptr,
225 std::runtime_error, "Amesos2 Runtime Error: sp_rowptr returned null ");
226 this->matrixA_->returnColInd_kokkos_view(sp_colind);
227 TEUCHOS_TEST_FOR_EXCEPTION(sp_colind.data() == nullptr,
228 std::runtime_error, "Amesos2 Runtime Error: sp_colind returned null ");
229
230 host_value_type_array hsp_values;
231 this->matrixA_->returnValues_kokkos_view(hsp_values);
232 shylubasker_dtype * sp_values = function_map::convert_scalar(hsp_values.data());
233 //shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
234
235 TEUCHOS_TEST_FOR_EXCEPTION(sp_values == nullptr,
236 std::runtime_error, "Amesos2 Runtime Error: sp_values returned null ");
237
238 // In this case, colptr_, rowind_, nzvals_ are invalid
239 info = ShyLUbasker->Factor( this->globalNumRows_,
240 this->globalNumCols_,
241 this->globalNumNonZeros_,
242 sp_rowptr.data(),
243 sp_colind.data(),
244 sp_values);
245 }
246 else
247 {
248 // In this case, loadA_impl updates colptr_, rowind_, nzvals_
249 shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
250 info = ShyLUbasker->Factor(this->globalNumRows_,
251 this->globalNumCols_,
252 this->globalNumNonZeros_,
253 colptr_view_.data(),
254 rowind_view_.data(),
255 sp_values);
256 }
257
258 //ShyLUbasker->DEBUG_PRINT();
259
260 local_ordinal_type blnnz = local_ordinal_type(0);
261 local_ordinal_type bunnz = local_ordinal_type(0);
262 ShyLUbasker->GetLnnz(blnnz); // Add exception handling?
263 ShyLUbasker->GetUnnz(bunnz);
264
265 // This is set after numeric factorization complete as pivoting can be used;
266 // In this case, a discrepancy between symbolic and numeric nnz total can occur.
267 this->setNnzLU( as<size_t>( blnnz + bunnz ) );
268
269 } // end scope for timer
270 } // end if (this->root_)
271
272 /* All processes should have the same error code */
273 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
274
275 //global_size_type info_st = as<global_size_type>(info);
276 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
277 std::runtime_error, " ShyLUBasker::numericFactorization failed.");
278
279 return(info);
280}
281
282
283template <class Matrix, class Vector>
284int
286 const Teuchos::Ptr<MultiVecAdapter<Vector> > X,
287 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
288{
289 int ierr = 0; // returned error code
290
291 using Teuchos::as;
292
293 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
294 const size_t nrhs = X->getGlobalNumVectors();
295
296 const bool ShyluBaskerTransposeRequest = this->control_.useTranspose_;
297 const bool initialize_data = true;
298 const bool do_not_initialize_data = false;
299 bool use_gather = use_gather_; // user param
300 use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1); // only with multiple MPIs
301 use_gather = (use_gather && (std::is_same<vector_scalar_type, float>::value ||
302 std::is_same<vector_scalar_type, double>::value)); // only for double or float vectors
303 {
304#ifdef HAVE_AMESOS2_TIMERS
305 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
306#endif
307 if ( single_proc_optimization() && nrhs == 1 ) {
308
309 // no msp creation
310 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
311 host_solve_array_t>::do_get(initialize_data, B, bValues_, as<size_t>(ld_rhs));
312
313 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
314 host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_, as<size_t>(ld_rhs));
315
316 } // end if ( single_proc_optimization() && nrhs == 1 )
317 else {
318 if (use_gather) {
319 int rval = B->gather(bValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
320 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
321 if (rval == 0) {
322 X->gather(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
323 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
324 } else {
325 use_gather = false;
326 }
327 }
328 if (!use_gather) {
329 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
330 host_solve_array_t>::do_get(initialize_data, B, bValues_,
331 as<size_t>(ld_rhs),
332 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
333 this->rowIndexBase_);
334
335 // See Amesos2_Tacho_def.hpp for notes on why we 'get' x here.
336 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
337 host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_,
338 as<size_t>(ld_rhs),
339 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
340 this->rowIndexBase_);
341 }
342 }
343 }
344
345 if ( this->root_ ) { // do solve
346#ifdef HAVE_AMESOS2_TIMERS
347 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
348#endif
349
350 shylubasker_dtype * pxValues = function_map::convert_scalar(xValues_.data());
351 shylubasker_dtype * pbValues = function_map::convert_scalar(bValues_.data());
352 if (!ShyluBaskerTransposeRequest)
353 ierr = ShyLUbasker->Solve(nrhs, pbValues, pxValues);
354 else
355 ierr = ShyLUbasker->Solve(nrhs, pbValues, pxValues, true);
356 }
357 /* All processes should have the same error code */
358 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
359
360 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0,
361 std::runtime_error,
362 "Encountered zero diag element at: " << ierr);
363 TEUCHOS_TEST_FOR_EXCEPTION( ierr == -1,
364 std::runtime_error,
365 "Could not alloc needed working memory for solve" );
366 {
367#ifdef HAVE_AMESOS2_TIMERS
368 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
369#endif
370 if (use_gather) {
371 int rval = X->scatter(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
372 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
373 if (rval != 0) use_gather = false;
374 }
375 if (!use_gather) {
376 Util::put_1d_data_helper_kokkos_view<
377 MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, xValues_,
378 as<size_t>(ld_rhs),
379 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
380 }
381 }
382 return(ierr);
383}
384
385
386template <class Matrix, class Vector>
387bool
389{
390 // The ShyLUBasker can only handle square for right now
391 return( this->globalNumRows_ == this->globalNumCols_ );
392}
393
394
395template <class Matrix, class Vector>
396void
397ShyLUBasker<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
398{
399 using Teuchos::RCP;
400 using Teuchos::getIntegralValue;
401 using Teuchos::ParameterEntryValidator;
402
403 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
404
405 if(parameterList->isParameter("IsContiguous"))
406 {
407 is_contiguous_ = parameterList->get<bool>("IsContiguous");
408 }
409
410 if(parameterList->isParameter("UseCustomGather"))
411 {
412 use_gather_ = parameterList->get<bool>("UseCustomGather");
413 }
414
415 if(parameterList->isParameter("num_threads"))
416 {
417 num_threads = parameterList->get<int>("num_threads");
418 }
419 if(parameterList->isParameter("worker_threads"))
420 {
421 ShyLUbasker->Options.worker_threads = parameterList->get<bool>("worker_threads");
422 }
423 if(parameterList->isParameter("pivot"))
424 {
425 ShyLUbasker->Options.no_pivot = (!parameterList->get<bool>("pivot"));
426 }
427 if(parameterList->isParameter("delayed pivot"))
428 {
429 ShyLUbasker->Options.static_delayed_pivot = (parameterList->get<int>("delayed pivot"));
430 }
431 if(parameterList->isParameter("pivot_tol"))
432 {
433 ShyLUbasker->Options.pivot_tol = parameterList->get<double>("pivot_tol");
434 }
435 if(parameterList->isParameter("symmetric"))
436 {
437 ShyLUbasker->Options.symmetric = parameterList->get<bool>("symmetric");
438 }
439 if(parameterList->isParameter("realloc"))
440 {
441 ShyLUbasker->Options.realloc = parameterList->get<bool>("realloc");
442 }
443 if(parameterList->isParameter("verbose"))
444 {
445 ShyLUbasker->Options.verbose = parameterList->get<bool>("verbose");
446 }
447 if(parameterList->isParameter("verbose_matrix"))
448 {
449 ShyLUbasker->Options.verbose_matrix_out = parameterList->get<bool>("verbose_matrix");
450 }
451 if(parameterList->isParameter("btf"))
452 {
453 ShyLUbasker->Options.btf = parameterList->get<bool>("btf");
454 }
455 if(parameterList->isParameter("use_metis"))
456 {
457 ShyLUbasker->Options.use_metis = parameterList->get<bool>("use_metis");
458 }
459 if(parameterList->isParameter("use_nodeNDP"))
460 {
461 ShyLUbasker->Options.use_nodeNDP = parameterList->get<bool>("use_nodeNDP");
462 }
463 if(parameterList->isParameter("run_nd_on_leaves"))
464 {
465 ShyLUbasker->Options.run_nd_on_leaves = parameterList->get<bool>("run_nd_on_leaves");
466 }
467 if(parameterList->isParameter("run_amd_on_leaves"))
468 {
469 ShyLUbasker->Options.run_amd_on_leaves = parameterList->get<bool>("run_amd_on_leaves");
470 }
471 if(parameterList->isParameter("amd_on_blocks"))
472 {
473 ShyLUbasker->Options.amd_dom = parameterList->get<bool>("amd_on_blocks");
474 }
475 if(parameterList->isParameter("transpose"))
476 {
477 // NDE: set transpose vs non-transpose mode as bool; track separate shylubasker objects
478 const auto transpose = parameterList->get<bool>("transpose");
479 if (transpose == true)
480 this->control_.useTranspose_ = true;
481 }
482 if(parameterList->isParameter("threaded_solve"))
483 {
484 ShyLUbasker->Options.threaded_solve = parameterList->get<bool>("threaded_solve");
485 }
486 if(parameterList->isParameter("use_sequential_diag_facto"))
487 {
488 ShyLUbasker->Options.use_sequential_diag_facto = parameterList->get<bool>("use_sequential_diag_facto");
489 }
490 if(parameterList->isParameter("user_fill"))
491 {
492 ShyLUbasker->Options.user_fill = parameterList->get<double>("user_fill");
493 }
494 if(parameterList->isParameter("prune"))
495 {
496 ShyLUbasker->Options.prune = parameterList->get<bool>("prune");
497 }
498 if(parameterList->isParameter("replace_zero_pivot"))
499 {
500 ShyLUbasker->Options.replace_zero_pivot = parameterList->get<bool>("replace_zero_pivot");
501 }
502 if(parameterList->isParameter("replace_tiny_pivot"))
503 {
504 ShyLUbasker->Options.replace_tiny_pivot = parameterList->get<bool>("replace_tiny_pivot");
505 }
506 if(parameterList->isParameter("btf_matching"))
507 {
508 ShyLUbasker->Options.btf_matching = parameterList->get<int>("btf_matching");
509 if (ShyLUbasker->Options.btf_matching == 1 || ShyLUbasker->Options.btf_matching == 2) {
510 ShyLUbasker->Options.matching = true;
511 } else {
512 ShyLUbasker->Options.matching = false;
513 }
514 }
515 if(parameterList->isParameter("blk_matching"))
516 {
517 ShyLUbasker->Options.blk_matching = parameterList->get<int>("blk_matching");
518 }
519 if(parameterList->isParameter("matrix_scaling"))
520 {
521 ShyLUbasker->Options.matrix_scaling = parameterList->get<int>("matrix_scaling");
522 }
523 if(parameterList->isParameter("min_block_size"))
524 {
525 ShyLUbasker->Options.min_block_size = parameterList->get<int>("min_block_size");
526 }
527}
528
529template <class Matrix, class Vector>
530Teuchos::RCP<const Teuchos::ParameterList>
532{
533 using Teuchos::ParameterList;
534
535 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
536
537 if( is_null(valid_params) )
538 {
539 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
540 pl->set("IsContiguous", true,
541 "Are GIDs contiguous");
542 pl->set("UseCustomGather", true,
543 "Use Matrix-gather routine");
544 pl->set("num_threads", 1,
545 "Number of threads");
546 pl->set("pivot", false,
547 "Should not pivot");
548 pl->set("delayed pivot", 0,
549 "Apply static delayed pivot on a big block");
550 pl->set("pivot_tol", .0001,
551 "Tolerance before pivot, currently not used");
552 pl->set("symmetric", false,
553 "Should Symbolic assume symmetric nonzero pattern");
554 pl->set("realloc" , false,
555 "Should realloc space if not enough");
556 pl->set("verbose", false,
557 "Information about factoring");
558 pl->set("verbose_matrix", false,
559 "Give Permuted Matrices");
560 pl->set("btf", true,
561 "Use BTF ordering");
562 pl->set("prune", false,
563 "Use prune on BTF blocks (Not Supported)");
564 pl->set("btf_matching", 2,
565 "Matching option for BTF: 0 = none, 1 = Basker, 2 = Trilinos (default), (3 = MC64 if enabled)");
566 pl->set("blk_matching", 0,
567 "Matching optioon for block: 0 = none, 1 or anything else = Basker (default), (2 = MC64 if enabled)");
568 pl->set("matrix_scaling", 0,
569 "Use matrix scaling to biig A BTF block: 0 = no-scaling, 1 = symmetric diagonal scaling, 2 = row-max, and then col-max scaling");
570 pl->set("min_block_size", 0,
571 "Size of the minimum diagonal blocks");
572 pl->set("replace_zero_pivot", true,
573 "Replace zero pivots during the numerical factorization");
574 pl->set("replace_tiny_pivot", false,
575 "Replace tiny pivots during the numerical factorization");
576 pl->set("use_metis", true,
577 "Use METIS for ND");
578 pl->set("use_nodeNDP", true,
579 "Use nodeND to compute ND partition");
580 pl->set("run_nd_on_leaves", false,
581 "Run ND on the final leaf-nodes for ND factorization");
582 pl->set("run_amd_on_leaves", false,
583 "Run AMD on the final leaf-nodes for ND factorization");
584 pl->set("amd_on_blocks", true,
585 "Run AMD on each diagonal blocks");
586 pl->set("transpose", false,
587 "Solve the transpose A");
588 pl->set("threaded_solve", false,
589 "Use threads for forward/backward solves");
590 pl->set("worker_threads", false,
591 "Use worker thread for ND factorization");
592 pl->set("use_sequential_diag_facto", false,
593 "Use sequential algorithm to factor each diagonal block");
594 pl->set("user_fill", (double)BASKER_FILL_USER,
595 "User-provided padding for the fill ratio");
596 valid_params = pl;
597 }
598 return valid_params;
599}
600
601
602template <class Matrix, class Vector>
603bool
605{
606 using Teuchos::as;
607 if(current_phase == SOLVE || current_phase == PREORDERING ) return( false );
608
609 #ifdef HAVE_AMESOS2_TIMERS
610 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
611 #endif
612
613
614 // NDE: Can clean up duplicated code with the #ifdef guards
615 if ( single_proc_optimization() ) {
616 // NDE: Nothing is done in this special case - CRS raw pointers are passed to SHYLUBASKER and transpose of copies handled there
617 // In this case, colptr_, rowind_, nzvals_ are invalid
618 }
619 else
620 {
621
622 // Only the root image needs storage allocated
623 if( this->root_ && current_phase == SYMBFACT )
624 {
625 Kokkos::resize(nzvals_view_, this->globalNumNonZeros_);
626 Kokkos::resize(rowind_view_, this->globalNumNonZeros_);
627 Kokkos::resize(colptr_view_, this->globalNumCols_ + 1); //this will be wrong for case of gapped col ids, e.g. 0,2,4,9; num_cols = 10 ([0,10)) but num GIDs = 4...
628 }
629
630 local_ordinal_type nnz_ret = -1;
631 bool use_gather = use_gather_; // user param
632 use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1); // only with multiple MPIs
633 use_gather = (use_gather && (std::is_same<scalar_type, float>::value || std::is_same<scalar_type, double>::value)); // only for double or float
634 {
635 #ifdef HAVE_AMESOS2_TIMERS
636 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
637 #endif
638 if (use_gather) {
639 bool column_major = true;
640 if (!is_contiguous_) {
641 auto contig_mat = this->matrixA_->reindex(this->contig_rowmap_, this->contig_colmap_, current_phase);
642 nnz_ret = contig_mat->gather(nzvals_view_, rowind_view_, colptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
643 this->transpose_map, this->nzvals_t, column_major, current_phase);
644 } else {
645 nnz_ret = this->matrixA_->gather(nzvals_view_, rowind_view_, colptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
646 this->transpose_map, this->nzvals_t, column_major, current_phase);
647 }
648 // gather failed (e.g., not implemened for KokkosCrsMatrix)
649 // in case of the failure, it falls back to the original "do_get"
650 if (nnz_ret < 0) use_gather = false;
651 }
652 if (!use_gather) {
654 MatrixAdapter<Matrix>, host_value_type_array, host_ordinal_type_array, host_ordinal_type_array>
655 ::do_get(this->matrixA_.ptr(), nzvals_view_, rowind_view_, colptr_view_, nnz_ret,
656 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
657 ARBITRARY,
658 this->rowIndexBase_); // copies from matrixA_ to ShyLUBasker ConcreteSolver cp, ri, nzval members
659 }
660 }
661
662 // gather return the total nnz_ret on every MPI process
663 if (use_gather || this->root_) {
664 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<local_ordinal_type>(this->globalNumNonZeros_),
665 std::runtime_error,
666 "Amesos2_ShyLUBasker loadA_impl: Did not get the expected number of non-zero vals("
667 +std::to_string(nnz_ret)+" vs "+std::to_string(this->globalNumNonZeros_)+")");
668 }
669 } //end alternative path
670 return true;
671}
672
673
674template<class Matrix, class Vector>
675const char* ShyLUBasker<Matrix,Vector>::name = "ShyLUBasker";
676
677
678} // end namespace Amesos2
679
680#endif // AMESOS2_SHYLUBASKER_DEF_HPP
Amesos2 ShyLUBasker declarations.
@ ROOTED
Definition Amesos2_TypeDecl.hpp:93
@ CONTIGUOUS_AND_ROOTED
Definition Amesos2_TypeDecl.hpp:94
@ ARBITRARY
Definition Amesos2_TypeDecl.hpp:109
A Matrix adapter interface for Amesos2.
Definition Amesos2_MatrixAdapter_decl.hpp:42
Amesos2 interface to the Baker package.
Definition Amesos2_ShyLUBasker_decl.hpp:43
EPhase
Used to indicate a phase in the direct solution.
Definition Amesos2_TypeDecl.hpp:31
void transpose(ArrayView< Scalar > vals, ArrayView< GlobalOrdinal > indices, ArrayView< GlobalSizeT > ptr, ArrayView< Scalar > trans_vals, ArrayView< GlobalOrdinal > trans_indices, ArrayView< GlobalSizeT > trans_ptr)
A templated MultiVector class adapter for Amesos2.
Definition Amesos2_MultiVecAdapter_decl.hpp:142
A generic helper class for getting a CCS representation of a Matrix.
Definition Amesos2_Util.hpp:633