Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_ShyLUBasker_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Amesos2: Templated Direct Sparse Solver Package
4//
5// Copyright 2011 NTESS and the Amesos2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
20#ifndef AMESOS2_SHYLUBASKER_DEF_HPP
21#define AMESOS2_SHYLUBASKER_DEF_HPP
22
23#include <Teuchos_Tuple.hpp>
24#include <Teuchos_ParameterList.hpp>
25#include <Teuchos_StandardParameterEntryValidators.hpp>
26
29
30namespace Amesos2 {
31
32
33template <class Matrix, class Vector>
34ShyLUBasker<Matrix,Vector>::ShyLUBasker(
35 Teuchos::RCP<const Matrix> A,
36 Teuchos::RCP<Vector> X,
37 Teuchos::RCP<const Vector> B )
38 : SolverCore<Amesos2::ShyLUBasker,Matrix,Vector>(A, X, B)
39 , schur_out_ptr(nullptr)
40 , is_contiguous_(true)
41 , use_gather_(true)
42{
43
44 //Nothing
45
46 // Override some default options
47 // TODO: use data_ here to init
48#if defined(HAVE_AMESOS2_KOKKOS)
49 /*
50 static_assert(std::is_same<kokkos_exe,Kokkos::OpenMP>::value,
51 "Kokkos node type not supported by experimental ShyLUBasker Amesos2");
52 */
53 ShyLUbasker = new ::BaskerNS::BaskerTrilinosInterface<local_ordinal_type, shylubasker_dtype, Exe_Space>();
54 ShyLUbasker->Options.no_pivot = BASKER_FALSE;
55 ShyLUbasker->Options.static_delayed_pivot = 0;
56 ShyLUbasker->Options.symmetric = BASKER_FALSE;
57 ShyLUbasker->Options.realloc = BASKER_TRUE;
58 ShyLUbasker->Options.verbose = BASKER_FALSE;
59 ShyLUbasker->Options.prune = BASKER_TRUE;
60 ShyLUbasker->Options.btf_matching = 2; // use cardinary matching from Trilinos, globally
61 ShyLUbasker->Options.blk_matching = 0; // NOT use max-weight matching from Basker on each diagonal block
62 ShyLUbasker->Options.matrix_scaling = 0; // use matrix scaling on a big A block
63 ShyLUbasker->Options.min_block_size = 0; // no merging small blocks
64 ShyLUbasker->Options.amd_dom = BASKER_TRUE; // use block-wise AMD
65 ShyLUbasker->Options.use_metis = BASKER_TRUE; // use scotch/metis for ND (TODO: should METIS optional?)
66 ShyLUbasker->Options.use_nodeNDP = BASKER_TRUE; // use nodeNDP to compute ND partition
67 ShyLUbasker->Options.run_nd_on_leaves = BASKER_TRUE; // run ND on the final leaf-nodes
68 ShyLUbasker->Options.run_amd_on_leaves = BASKER_FALSE; // run AMD on the final leaf-nodes
69 ShyLUbasker->Options.transpose = BASKER_FALSE;
70 ShyLUbasker->Options.threaded_solve = BASKER_FALSE;
71 ShyLUbasker->Options.replace_zero_pivot = BASKER_TRUE;
72 ShyLUbasker->Options.replace_tiny_pivot = BASKER_FALSE;
73 ShyLUbasker->Options.verbose_matrix_out = BASKER_FALSE;
74
75 ShyLUbasker->Options.user_fill = (double)BASKER_FILL_USER;
76 ShyLUbasker->Options.use_sequential_diag_facto = BASKER_FALSE;
77#ifdef KOKKOS_ENABLE_OPENMP // TODO: check for KOKKOS_ENABLE_THREADS when ready
78 num_threads = Kokkos::OpenMP::impl_max_hardware_threads();
79#else
80 num_threads = 1;
81#endif
82 ShyLUbasker->Options.worker_threads = false;
83 // partial factorization
84 ShyLUbasker->Options.partial_facto = 0;
85 ShyLUbasker->Options.only_forward_solve = false;
86 ShyLUbasker->Options.only_backward_solve = false;
87
88#else
89 TEUCHOS_TEST_FOR_EXCEPTION(1 != 0,
90 std::runtime_error,
91 "Amesos2_ShyLUBasker Exception: Do not have Kokkos enabled for ShyLUBasker");
92#endif
93}
94
95
96template <class Matrix, class Vector>
97ShyLUBasker<Matrix,Vector>::~ShyLUBasker( )
98{
99 /* ShyLUBasker will cleanup its own internal memory*/
100#if defined(HAVE_AMESOS2_KOKKOS)
101 ShyLUbasker->Finalize();
102 delete ShyLUbasker;
103#endif
104}
105
106template <class Matrix, class Vector>
107bool
109 return (this->root_ && (this->matrixA_->getComm()->getSize() == 1) && is_contiguous_);
110}
111
112template<class Matrix, class Vector>
113int
115{
116 /* TODO: Define what it means for ShyLUBasker
117 */
118#ifdef HAVE_AMESOS2_TIMERS
119 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
120#endif
121
122 return(0);
123}
124
125
126template <class Matrix, class Vector>
127int
129{
130
131 int info = 0;
132 if(this->root_)
133 {
134 int nthreads = num_threads;
135 if (ShyLUbasker->Options.partial_facto != 0) {
136 // User needs to provide 2x threads, to avoid dead-lock due to busy-wait
137 TEUCHOS_TEST_FOR_EXCEPTION
138 (nthreads < 2, std::runtime_error,
139 "ShyLU-Basker dense Schur option requires # of threads (2x # of leaves) to be greater than 1 (" << nthreads << ")");
140 }
141 if (ShyLUbasker->Options.worker_threads) {
142 if (nthreads > 1) {
143 // keep one worker-thread / subdomain (where originally subdomain = num_threads)
144 if (ShyLUbasker->Options.verbose && this->root_) {
145 std::cout << "Amesos2::ShyLUBasker:: reduce num threads from " << nthreads << " to " << nthreads/2
146 << " for worker threads" << std::endl;
147 }
148 nthreads /= 2;
149 } else {
150 // turn off worker threads if one thread
151 ShyLUbasker->Options.worker_threads = false;
152 }
153 }
154 ShyLUbasker->SetThreads(nthreads);
155
156
157 // NDE: Special case
158 // Rather than going through the Amesos2 machinery to convert the matrixA_ CRS pointer data to CCS and store in Teuchos::Arrays,
159 // in this special case we pass the CRS raw pointers directly to ShyLUBasker which copies+transposes+sorts the data for CCS format
160 // loadA_impl is essentially an empty function in this case, as the raw pointers are handled here and similarly in Symbolic
161
162 if ( single_proc_optimization() ) {
163
164 host_ordinal_type_array sp_rowptr;
165 host_ordinal_type_array sp_colind;
166 // this needs to be checked during loadA_impl...
167 this->matrixA_->returnRowPtr_kokkos_view(sp_rowptr);
168 TEUCHOS_TEST_FOR_EXCEPTION(sp_rowptr.data() == nullptr,
169 std::runtime_error, "Amesos2 Runtime Error: sp_rowptr returned null ");
170 this->matrixA_->returnColInd_kokkos_view(sp_colind);
171 TEUCHOS_TEST_FOR_EXCEPTION(sp_colind.data() == nullptr,
172 std::runtime_error, "Amesos2 Runtime Error: sp_colind returned null ");
173
174 host_value_type_array hsp_values;
175 this->matrixA_->returnValues_kokkos_view(hsp_values);
176 shylubasker_dtype * sp_values = function_map::convert_scalar(hsp_values.data());
177 //shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
178 TEUCHOS_TEST_FOR_EXCEPTION(sp_values == nullptr,
179 std::runtime_error, "Amesos2 Runtime Error: sp_values returned null ");
180
181 // In this case, colptr_, rowind_, nzvals_ are invalid
182 if (ShyLUbasker->Options.partial_facto != 0) {
183 shylubasker_dtype * sp_schur_out = function_map::convert_scalar(schur_out.data());
184 info = ShyLUbasker->Symbolic(this->globalNumRows_,
185 this->globalNumCols_,
186 this->globalNumNonZeros_,
187 sp_rowptr.data(),
188 sp_colind.data(),
189 sp_values,
190 schur_part.data(),
191 sp_schur_out,
192 true); // true = _crs_transpose_needed
193 } else {
194 info = ShyLUbasker->Symbolic(this->globalNumRows_,
195 this->globalNumCols_,
196 this->globalNumNonZeros_,
197 sp_rowptr.data(),
198 sp_colind.data(),
199 sp_values,
200 true); // true = _crs_transpose_needed
201 }
202 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
203 std::runtime_error, "Error in ShyLUBasker Symbolic");
204 }
205 else
206 { //follow original code path if conditions not met
207 // In this case, loadA_impl updates colptr_, rowind_, nzvals_
208 shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
209 if (ShyLUbasker->Options.partial_facto != 0) {
210 shylubasker_dtype * sp_schur_out = function_map::convert_scalar(schur_out.data());
211 info = ShyLUbasker->Symbolic(this->globalNumRows_,
212 this->globalNumCols_,
213 this->globalNumNonZeros_,
214 colptr_view_.data(),
215 rowind_view_.data(),
216 sp_values,
217 schur_part.data(),
218 sp_schur_out);
219 } else {
220 info = ShyLUbasker->Symbolic(this->globalNumRows_,
221 this->globalNumCols_,
222 this->globalNumNonZeros_,
223 colptr_view_.data(),
224 rowind_view_.data(),
225 sp_values);
226 }
227 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
228 std::runtime_error, "Error in ShyLUBasker Symbolic");
229 }
230 } // end if (this->root_)
231 /*No symbolic factoriztion*/
232
233 /* All processes should have the same error code */
234 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
235 return(info);
236}
237
238
239template <class Matrix, class Vector>
240int
242{
243 using Teuchos::as;
244
245 int info = 0;
246 if ( this->root_ ){
247 { // Do factorization
248#ifdef HAVE_AMESOS2_TIMERS
249 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
250#endif
251
252 // NDE: Special case
253 // Rather than going through the Amesos2 machinery to convert the matrixA_ CRS pointer data to CCS and store in Teuchos::Arrays,
254 // in this special case we pass the CRS raw pointers directly to ShyLUBasker which copies+transposes+sorts the data for CCS format
255 // loadA_impl is essentially an empty function in this case, as the raw pointers are handled here and similarly in Symbolic
256
257 if ( single_proc_optimization() ) {
258
259 host_ordinal_type_array sp_rowptr;
260 host_ordinal_type_array sp_colind;
261 this->matrixA_->returnRowPtr_kokkos_view(sp_rowptr);
262 TEUCHOS_TEST_FOR_EXCEPTION(sp_rowptr.data() == nullptr,
263 std::runtime_error, "Amesos2 Runtime Error: sp_rowptr returned null ");
264 this->matrixA_->returnColInd_kokkos_view(sp_colind);
265 TEUCHOS_TEST_FOR_EXCEPTION(sp_colind.data() == nullptr,
266 std::runtime_error, "Amesos2 Runtime Error: sp_colind returned null ");
267
268 host_value_type_array hsp_values;
269 this->matrixA_->returnValues_kokkos_view(hsp_values);
270 shylubasker_dtype * sp_values = function_map::convert_scalar(hsp_values.data());
271 //shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
272
273 TEUCHOS_TEST_FOR_EXCEPTION(sp_values == nullptr,
274 std::runtime_error, "Amesos2 Runtime Error: sp_values returned null ");
275
276 // In this case, colptr_, rowind_, nzvals_ are invalid
277 info = ShyLUbasker->Factor( this->globalNumRows_,
278 this->globalNumCols_,
279 this->globalNumNonZeros_,
280 sp_rowptr.data(),
281 sp_colind.data(),
282 sp_values);
283 }
284 else
285 {
286 // In this case, loadA_impl updates colptr_, rowind_, nzvals_
287 shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
288 info = ShyLUbasker->Factor(this->globalNumRows_,
289 this->globalNumCols_,
290 this->globalNumNonZeros_,
291 colptr_view_.data(),
292 rowind_view_.data(),
293 sp_values);
294 }
295 if (ShyLUbasker->Options.partial_facto != 0) {
296 if (schur_out_ptr != nullptr) {
297 // copy schur out if the output pointer is valid (assuming enough space has been allocated)
298 for (size_t i = 0; i < schur_size; i++) {
299 for (size_t j = 0; j < schur_size; j++) {
300 schur_out_ptr[i+j*schur_size] = schur_out[i+j*schur_size];
301 }
302 }
303 }
304 }
305
306 //ShyLUbasker->DEBUG_PRINT();
307
308 local_ordinal_type blnnz = local_ordinal_type(0);
309 local_ordinal_type bunnz = local_ordinal_type(0);
310 ShyLUbasker->GetLnnz(blnnz); // Add exception handling?
311 ShyLUbasker->GetUnnz(bunnz);
312
313 // This is set after numeric factorization complete as pivoting can be used;
314 // In this case, a discrepancy between symbolic and numeric nnz total can occur.
315 this->setNnzLU( as<size_t>( blnnz + bunnz ) );
316
317 } // end scope for timer
318 } // end if (this->root_)
319
320 /* All processes should have the same error code */
321 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
322
323 //global_size_type info_st = as<global_size_type>(info);
324 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
325 std::runtime_error, " ShyLUBasker::numericFactorization failed.");
326
327 return(info);
328}
329
330
331template <class Matrix, class Vector>
332int
334 const Teuchos::Ptr<MultiVecAdapter<Vector> > X,
335 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
336{
337 int ierr = 0; // returned error code
338
339 using Teuchos::as;
340
341 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
342 const size_t nrhs = X->getGlobalNumVectors();
343
344 const bool ShyluBaskerTransposeRequest = this->control_.useTranspose_;
345 const bool initialize_data = true;
346 const bool do_not_initialize_data = false;
347 bool use_gather = use_gather_; // user param
348 use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1); // only with multiple MPIs
349 use_gather = (use_gather && (std::is_same<vector_scalar_type, float>::value ||
350 std::is_same<vector_scalar_type, double>::value)); // only for double or float vectors
351 {
352#ifdef HAVE_AMESOS2_TIMERS
353 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
354#endif
355 if ( single_proc_optimization() && nrhs == 1 ) {
356
357 // no msp creation
358 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
359 host_solve_array_t>::do_get(initialize_data, B, bValues_, as<size_t>(ld_rhs));
360
361 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
362 host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_, as<size_t>(ld_rhs));
363
364 } // end if ( single_proc_optimization() && nrhs == 1 )
365 else {
366 if (use_gather) {
367 int rval = B->gather(bValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
368 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
369 if (rval == 0) {
370 X->gather(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
371 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
372 } else {
373 use_gather = false;
374 }
375 }
376 if (!use_gather) {
377 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
378 host_solve_array_t>::do_get(initialize_data, B, bValues_,
379 as<size_t>(ld_rhs),
380 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
381 this->rowIndexBase_);
382
383 // See Amesos2_Tacho_def.hpp for notes on why we 'get' x here.
384 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
385 host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_,
386 as<size_t>(ld_rhs),
387 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
388 this->rowIndexBase_);
389 }
390 }
391 }
392
393 if ( this->root_ ) { // do solve
394#ifdef HAVE_AMESOS2_TIMERS
395 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
396#endif
397
398 shylubasker_dtype * pxValues = function_map::convert_scalar(xValues_.data());
399 shylubasker_dtype * pbValues = function_map::convert_scalar(bValues_.data());
400 if (!ShyluBaskerTransposeRequest)
401 ierr = ShyLUbasker->Solve(nrhs, pbValues, pxValues);
402 else
403 ierr = ShyLUbasker->Solve(nrhs, pbValues, pxValues, true);
404 }
405 /* All processes should have the same error code */
406 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
407
408 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0,
409 std::runtime_error,
410 "Encountered zero diag element at: " << ierr);
411 TEUCHOS_TEST_FOR_EXCEPTION( ierr == -1,
412 std::runtime_error,
413 "Could not alloc needed working memory for solve" );
414 {
415#ifdef HAVE_AMESOS2_TIMERS
416 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
417#endif
418 if (use_gather) {
419 int rval = X->scatter(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
420 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
421 if (rval != 0) use_gather = false;
422 }
423 if (!use_gather) {
424 Util::put_1d_data_helper_kokkos_view<
425 MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, xValues_,
426 as<size_t>(ld_rhs),
427 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
428 }
429 }
430 return(ierr);
431}
432
433
434template <class Matrix, class Vector>
435bool
437{
438 // The ShyLUBasker can only handle square for right now
439 return( this->globalNumRows_ == this->globalNumCols_ );
440}
441
442
443template <class Matrix, class Vector>
444void
445ShyLUBasker<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
446{
447 using Teuchos::RCP;
448 using Teuchos::getIntegralValue;
449 using Teuchos::ParameterEntryValidator;
450
451 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
452
453 if(parameterList->isParameter("IsContiguous"))
454 {
455 is_contiguous_ = parameterList->get<bool>("IsContiguous");
456 }
457
458 if(parameterList->isParameter("UseCustomGather"))
459 {
460 use_gather_ = parameterList->get<bool>("UseCustomGather");
461 }
462
463 if(parameterList->isParameter("num_threads"))
464 {
465 num_threads = parameterList->get<int>("num_threads");
466 }
467 if(parameterList->isParameter("worker_threads"))
468 {
469 ShyLUbasker->Options.worker_threads = parameterList->get<bool>("worker_threads");
470 }
471 if(parameterList->isParameter("pivot"))
472 {
473 ShyLUbasker->Options.no_pivot = (!parameterList->get<bool>("pivot"));
474 }
475 if(parameterList->isParameter("delayed pivot"))
476 {
477 ShyLUbasker->Options.static_delayed_pivot = (parameterList->get<int>("delayed pivot"));
478 }
479 if(parameterList->isParameter("pivot_tol"))
480 {
481 ShyLUbasker->Options.pivot_tol = parameterList->get<double>("pivot_tol");
482 }
483 if(parameterList->isParameter("symmetric"))
484 {
485 ShyLUbasker->Options.symmetric = parameterList->get<bool>("symmetric");
486 }
487 if(parameterList->isParameter("realloc"))
488 {
489 ShyLUbasker->Options.realloc = parameterList->get<bool>("realloc");
490 }
491 if(parameterList->isParameter("verbose"))
492 {
493 ShyLUbasker->Options.verbose = parameterList->get<bool>("verbose");
494 }
495 if(parameterList->isParameter("verbose_matrix"))
496 {
497 ShyLUbasker->Options.verbose_matrix_out = parameterList->get<bool>("verbose_matrix");
498 }
499 if(parameterList->isParameter("btf"))
500 {
501 ShyLUbasker->Options.btf = parameterList->get<bool>("btf");
502 }
503 if(parameterList->isParameter("use_metis"))
504 {
505 ShyLUbasker->Options.use_metis = parameterList->get<bool>("use_metis");
506 }
507 if(parameterList->isParameter("use_nodeNDP"))
508 {
509 ShyLUbasker->Options.use_nodeNDP = parameterList->get<bool>("use_nodeNDP");
510 }
511 if(parameterList->isParameter("run_nd_on_leaves"))
512 {
513 ShyLUbasker->Options.run_nd_on_leaves = parameterList->get<bool>("run_nd_on_leaves");
514 }
515 if(parameterList->isParameter("run_amd_on_leaves"))
516 {
517 ShyLUbasker->Options.run_amd_on_leaves = parameterList->get<bool>("run_amd_on_leaves");
518 }
519 if(parameterList->isParameter("amd_on_blocks"))
520 {
521 ShyLUbasker->Options.amd_dom = parameterList->get<bool>("amd_on_blocks");
522 }
523 if(parameterList->isParameter("transpose"))
524 {
525 // NDE: set transpose vs non-transpose mode as bool; track separate shylubasker objects
526 const auto transpose = parameterList->get<bool>("transpose");
527 if (transpose == true)
528 this->control_.useTranspose_ = true;
529 }
530 if(parameterList->isParameter("threaded_solve"))
531 {
532 ShyLUbasker->Options.threaded_solve = parameterList->get<bool>("threaded_solve");
533 }
534 if(parameterList->isParameter("use_sequential_diag_facto"))
535 {
536 ShyLUbasker->Options.use_sequential_diag_facto = parameterList->get<bool>("use_sequential_diag_facto");
537 }
538 if(parameterList->isParameter("user_fill"))
539 {
540 ShyLUbasker->Options.user_fill = parameterList->get<double>("user_fill");
541 }
542 if(parameterList->isParameter("prune"))
543 {
544 ShyLUbasker->Options.prune = parameterList->get<bool>("prune");
545 }
546 if(parameterList->isParameter("replace_zero_pivot"))
547 {
548 ShyLUbasker->Options.replace_zero_pivot = parameterList->get<bool>("replace_zero_pivot");
549 }
550 if(parameterList->isParameter("replace_tiny_pivot"))
551 {
552 ShyLUbasker->Options.replace_tiny_pivot = parameterList->get<bool>("replace_tiny_pivot");
553 }
554 if(parameterList->isParameter("btf_matching"))
555 {
556 ShyLUbasker->Options.btf_matching = parameterList->get<int>("btf_matching");
557 if (ShyLUbasker->Options.btf_matching == 1 || ShyLUbasker->Options.btf_matching == 2) {
558 ShyLUbasker->Options.matching = true;
559 } else {
560 ShyLUbasker->Options.matching = false;
561 }
562 }
563 if(parameterList->isParameter("blk_matching"))
564 {
565 ShyLUbasker->Options.blk_matching = parameterList->get<int>("blk_matching");
566 }
567 if(parameterList->isParameter("matrix_scaling"))
568 {
569 ShyLUbasker->Options.matrix_scaling = parameterList->get<int>("matrix_scaling");
570 }
571 if(parameterList->isParameter("min_block_size"))
572 {
573 ShyLUbasker->Options.min_block_size = parameterList->get<int>("min_block_size");
574 }
575
576 if(parameterList->isParameter("PartialFacto"))
577 {
578 ShyLUbasker->Options.partial_facto = parameterList->get<int>("PartialFacto");
579 }
580 if(parameterList->isParameter("SchurPart"))
581 {
582 // copy schur-part to the internal view (in case the user-pointer go out of scope?)
583 auto schur_part_ptr = parameterList->get<const local_ordinal_type*>("SchurPart");
584 Kokkos::resize(schur_part, this->globalNumCols_);
585 schur_size = 0;
586 for (global_size_type i=0; i<this->globalNumCols_; i++) {
587 schur_part(i) = schur_part_ptr[i];
588 if (schur_part(i) == 1) schur_size ++;
589 }
590 // allocate internal storage to store the schur complement (user may not want it and may not provide a valid pointer?)
591 Kokkos::resize(schur_out, schur_size*schur_size);
592 }
593 if(parameterList->isParameter("SchurOut"))
594 {
595 // store schur-part to the internal view (if user wants the output, then the pointer should stay, so no need for internal view?)
596 schur_out_ptr = parameterList->get<scalar_type*>("SchurOut");
597 }
598 if(parameterList->isParameter("OnlyForwardSolve"))
599 {
600 ShyLUbasker->Options.only_forward_solve = parameterList->get<bool>("OnlyForwardSolve");
601 }
602 if(parameterList->isParameter("OnlyBackwardSolve"))
603 {
604 ShyLUbasker->Options.only_backward_solve = parameterList->get<bool>("OnlyBackwardSolve");
605 }
606}
607
608template <class Matrix, class Vector>
609Teuchos::RCP<const Teuchos::ParameterList>
611{
612 using Teuchos::ParameterList;
613
614 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
615
616 if( is_null(valid_params) )
617 {
618 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
619 pl->set("IsContiguous", true,
620 "Are GIDs contiguous");
621 pl->set("UseCustomGather", true,
622 "Use Matrix-gather routine");
623 pl->set("num_threads", 1,
624 "Number of threads");
625 pl->set("pivot", false,
626 "Should not pivot");
627 pl->set("delayed pivot", 0,
628 "Apply static delayed pivot on a big block");
629 pl->set("pivot_tol", .0001,
630 "Tolerance before pivot, currently not used");
631 pl->set("symmetric", false,
632 "Should Symbolic assume symmetric nonzero pattern");
633 pl->set("realloc" , false,
634 "Should realloc space if not enough");
635 pl->set("verbose", false,
636 "Information about factoring");
637 pl->set("verbose_matrix", false,
638 "Give Permuted Matrices");
639 pl->set("btf", true,
640 "Use BTF ordering");
641 pl->set("prune", false,
642 "Use prune on BTF blocks (Not Supported)");
643 pl->set("btf_matching", 2,
644 "Matching option for BTF: 0 = none, 1 = Basker, 2 = Trilinos (default), (3 = MC64 if enabled)");
645 pl->set("blk_matching", 0,
646 "Matching optioon for block: 0 = none, 1 or anything else = Basker (default), (2 = MC64 if enabled)");
647 pl->set("matrix_scaling", 0,
648 "Use matrix scaling to biig A BTF block: 0 = no-scaling, 1 = symmetric diagonal scaling, 2 = row-max, and then col-max scaling");
649 pl->set("min_block_size", 0,
650 "Size of the minimum diagonal blocks");
651 pl->set("replace_zero_pivot", true,
652 "Replace zero pivots during the numerical factorization");
653 pl->set("replace_tiny_pivot", false,
654 "Replace tiny pivots during the numerical factorization");
655 pl->set("use_metis", true,
656 "Use METIS for ND");
657 pl->set("use_nodeNDP", true,
658 "Use nodeND to compute ND partition");
659 pl->set("run_nd_on_leaves", false,
660 "Run ND on the final leaf-nodes for ND factorization");
661 pl->set("run_amd_on_leaves", false,
662 "Run AMD on the final leaf-nodes for ND factorization");
663 pl->set("amd_on_blocks", true,
664 "Run AMD on each diagonal blocks");
665 pl->set("transpose", false,
666 "Solve the transpose A");
667 pl->set("threaded_solve", false,
668 "Use threads for forward/backward solves");
669 pl->set("worker_threads", false,
670 "Use worker thread for ND factorization");
671 pl->set("use_sequential_diag_facto", false,
672 "Use sequential algorithm to factor each diagonal block");
673 pl->set("user_fill", (double)BASKER_FILL_USER,
674 "User-provided padding for the fill ratio");
675
676 // TODO: should these be const or not
677 scalar_type *dummy_scalar_ptr;
678 const local_ordinal_type *dummy_ordinal_ptr;
679 pl->set("PartialFacto", 0,
680 "Perform partial factorization to extract dense Schur complement (0: no, 1: form + factor Schur, 2: ony form");
681 pl->set("SchurPart", dummy_ordinal_ptr,
682 "Specify rows/columns belonging to Schur complement for partial factorization");
683 pl->set("SchurOut", dummy_scalar_ptr,
684 "Store output Schur complement from partial factorization");
685 pl->set("OnlyForwardSolve", false,
686 "Perform only the forward substitution");
687 pl->set("OnlyBackwardSolve", false,
688 "Perform only the backward substitution");
689 valid_params = pl;
690 }
691 return valid_params;
692}
693
694
695template <class Matrix, class Vector>
696bool
698{
699 using Teuchos::as;
700 if(current_phase == SOLVE || current_phase == PREORDERING ) return( false );
701
702 #ifdef HAVE_AMESOS2_TIMERS
703 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
704 #endif
705
706
707 // NDE: Can clean up duplicated code with the #ifdef guards
708 if ( single_proc_optimization() ) {
709 // NDE: Nothing is done in this special case - CRS raw pointers are passed to SHYLUBASKER and transpose of copies handled there
710 // In this case, colptr_, rowind_, nzvals_ are invalid
711 }
712 else
713 {
714 // Only the root image needs storage allocated
715 if( this->root_ && current_phase == SYMBFACT )
716 {
717 Kokkos::resize(nzvals_view_, this->globalNumNonZeros_);
718 Kokkos::resize(rowind_view_, this->globalNumNonZeros_);
719 Kokkos::resize(colptr_view_, this->globalNumCols_ + 1); //this will be wrong for case of gapped col ids, e.g. 0,2,4,9; num_cols = 10 ([0,10)) but num GIDs = 4...
720 }
721
722 local_ordinal_type nnz_ret = -1;
723 bool use_gather = use_gather_; // user param
724 use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1); // only with multiple MPIs
725 use_gather = (use_gather && (std::is_same<scalar_type, float>::value || std::is_same<scalar_type, double>::value)); // only for double or float
726 {
727 #ifdef HAVE_AMESOS2_TIMERS
728 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
729 #endif
730 if (use_gather) {
731 bool column_major = true;
732 if (!is_contiguous_) {
733 auto contig_mat = this->matrixA_->reindex(this->contig_rowmap_, this->contig_colmap_, current_phase);
734 nnz_ret = contig_mat->gather(nzvals_view_, rowind_view_, colptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
735 this->transpose_map, this->nzvals_t, column_major, current_phase);
736 } else {
737 nnz_ret = this->matrixA_->gather(nzvals_view_, rowind_view_, colptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
738 this->transpose_map, this->nzvals_t, column_major, current_phase);
739 }
740 // gather failed (e.g., not implemened for KokkosCrsMatrix)
741 // in case of the failure, it falls back to the original "do_get"
742 if (nnz_ret < 0) use_gather = false;
743 }
744 if (!use_gather) {
746 MatrixAdapter<Matrix>, host_value_type_array, host_ordinal_type_array, host_ordinal_type_array>
747 ::do_get(this->matrixA_.ptr(), nzvals_view_, rowind_view_, colptr_view_, nnz_ret,
748 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
749 ARBITRARY,
750 this->rowIndexBase_); // copies from matrixA_ to ShyLUBasker ConcreteSolver cp, ri, nzval members
751 }
752 }
753
754 // gather return the total nnz_ret on every MPI process
755 if (use_gather || this->root_) {
756 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<local_ordinal_type>(this->globalNumNonZeros_),
757 std::runtime_error,
758 "Amesos2_ShyLUBasker loadA_impl: Did not get the expected number of non-zero vals("
759 +std::to_string(nnz_ret)+" vs "+std::to_string(this->globalNumNonZeros_)+")");
760 }
761 } //end alternative path
762 return true;
763}
764
765
766template <class Matrix, class Vector>
767void
769 const Teuchos::EVerbosityLevel verbLevel) const
770{
771 out << " ShyLUBasker current parameters:" << std::endl;
772 out << " > IsContiguous = " << (is_contiguous_ ? "YES" : "NO") << std::endl;
773 out << " > UseCustomGather = " << (use_gather_ ? "YES" : "NO") << std::endl;
774 out << " > num_threads = " << num_threads << std::endl;
775 out << " > worker_threads = " << ShyLUbasker->Options.worker_threads << std::endl;
776 out << " > pivot = " << ShyLUbasker->Options.no_pivot << std::endl;
777 out << " > pivot_tol = " << ShyLUbasker->Options.pivot_tol << std::endl;
778 out << " > realloc = " << ShyLUbasker->Options.realloc << std::endl;
779 out << " > verbose = " << (ShyLUbasker->Options.verbose ? "YES" : "NO") << std::endl;
780 out << " > btf = " << (ShyLUbasker->Options.btf ? "YES" : "NO") << std::endl;
781 out << " > use_metis = " << (ShyLUbasker->Options.use_metis ? "YES" : "NO") << std::endl;
782 out << " > use_nodeNDP = " << (ShyLUbasker->Options.use_nodeNDP ? "YES" : "NO") << std::endl;
783 out << " > run_nd_on_leaves = " << (ShyLUbasker->Options.run_nd_on_leaves ? "YES" : "NO") << std::endl;
784 out << " > run_amd_on_leaves = " << (ShyLUbasker->Options.run_amd_on_leaves ? "YES" : "NO") << std::endl;
785 out << " > amd_on_blocks = " << (ShyLUbasker->Options.amd_dom ? "YES" : "NO") << std::endl;
786 out << " > transpose = " << (this->control_.useTranspose_ ? "YES" : "NO") << std::endl;
787 out << " > threaded_solve = " << (ShyLUbasker->Options.threaded_solve ? "YES" : "NO") << std::endl;
788 out << " > user_fill = " << ShyLUbasker->Options.user_fill << std::endl;
789 out << " > prune = " << (ShyLUbasker->Options.prune ? "YES" : "NO") << std::endl;
790 out << " > replace_zero_pivot = " << (ShyLUbasker->Options.replace_zero_pivot ? "YES" : "NO") << std::endl;
791 out << " > replace_tiny_pivot = " << (ShyLUbasker->Options.replace_tiny_pivot ? "YES" : "NO") << std::endl;
792 out << " > btf_matching = " << (ShyLUbasker->Options.btf_matching ? "YES" : "NO") << std::endl;
793 out << " > blk_matching = " << (ShyLUbasker->Options.blk_matching ? "YES" : "NO") << std::endl;
794 out << " > use_sequential_diag_facto = " << (ShyLUbasker->Options.use_sequential_diag_facto ? "YES" : "NO") << std::endl;
795 out << " > partial_facto = " << ShyLUbasker->Options.partial_facto << std::endl;
796 out << std::endl;
797}
798
799
800template<class Matrix, class Vector>
801const char* ShyLUBasker<Matrix,Vector>::name = "ShyLUBasker";
802
803
804} // end namespace Amesos2
805
806#endif // AMESOS2_SHYLUBASKER_DEF_HPP
Amesos2 ShyLUBasker declarations.
@ ROOTED
Definition Amesos2_TypeDecl.hpp:93
@ CONTIGUOUS_AND_ROOTED
Definition Amesos2_TypeDecl.hpp:94
@ ARBITRARY
Definition Amesos2_TypeDecl.hpp:109
A Matrix adapter interface for Amesos2.
Definition Amesos2_MatrixAdapter_decl.hpp:42
Amesos2 interface to the Baker package.
Definition Amesos2_ShyLUBasker_decl.hpp:43
EPhase
Used to indicate a phase in the direct solution.
Definition Amesos2_TypeDecl.hpp:31
void transpose(ArrayView< Scalar > vals, ArrayView< GlobalOrdinal > indices, ArrayView< GlobalSizeT > ptr, ArrayView< Scalar > trans_vals, ArrayView< GlobalOrdinal > trans_indices, ArrayView< GlobalSizeT > trans_ptr)
A templated MultiVector class adapter for Amesos2.
Definition Amesos2_MultiVecAdapter_decl.hpp:142
A generic helper class for getting a CCS representation of a Matrix.
Definition Amesos2_Util.hpp:589