Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_Cholmod_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Amesos2: Templated Direct Sparse Solver Package
4//
5// Copyright 2011 NTESS and the Amesos2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
19#ifndef AMESOS2_CHOLMOD_DEF_HPP
20#define AMESOS2_CHOLMOD_DEF_HPP
21
22#include <Teuchos_Tuple.hpp>
23#include <Teuchos_ParameterList.hpp>
24#include <Teuchos_StandardParameterEntryValidators.hpp>
25
28
29#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
30#include "KokkosSparse_sptrsv_cholmod.hpp"
31#endif
32
33namespace Amesos2 {
34
35
36template <class Matrix, class Vector>
38 Teuchos::RCP<const Matrix> A,
39 Teuchos::RCP<Vector> X,
40 Teuchos::RCP<const Vector> B )
41 : SolverCore<Amesos2::Cholmod,Matrix,Vector>(A, X, B)
42 , firstsolve(true)
43 , skip_symfact(false)
44 , map()
45 , is_contiguous_(true) // default is set by params
46 , use_triangular_solves_(false) // default is set by params
47 , use_cholmod_int_type_(false) // if true we use CHOLMOD_INT (no GPU support)
48{
49 data_.L = NULL;
50 data_.Y = NULL;
51 data_.E = NULL;
52
53 data_.c.supernodal = CHOLMOD_AUTO;
54 data_.c.quick_return_if_not_posdef = 1;
55}
56
57
58template <class Matrix, class Vector>
60{
61 if(use_cholmod_int_type_) {
62 cholmod_free_factor(&(data_.L), &(data_.c));
63 cholmod_free_dense(&(data_.Y), &data_.c);
64 cholmod_free_dense(&(data_.E), &data_.c);
65 cholmod_finish(&(data_.c));
66 }
67 else {
68 // useful to check if GPU is being used, but very verbose
69 // cholmod_l_gpu_stats(&(data_.c));
70 cholmod_l_free_factor(&(data_.L), &(data_.c));
71 cholmod_l_free_dense(&(data_.Y), &data_.c);
72 cholmod_l_free_dense(&(data_.E), &data_.c);
73 cholmod_l_finish(&(data_.c));
74 }
75}
76
77template<class Matrix, class Vector>
78int
80{
81#ifdef HAVE_AMESOS2_TIMERS
82 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
83#endif
84
85 int info = 0;
86
87 if(use_cholmod_int_type_) {
88 data_.L = cholmod_analyze(&data_.A, &(data_.c));
89 }
90 else {
91 data_.L = cholmod_l_analyze(&data_.A, &(data_.c));
92 }
93
94 info = data_.c.status;
95 skip_symfact = true;
96
97 /* All processes should have the same error code */
98 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
99
100 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
101 std::runtime_error,
102 "Amesos2 cholmod_l_analyze failure in Cholmod preOrdering_impl");
103
104 return(0);
105}
106
107
108template <class Matrix, class Vector>
109int
111{
112 int info = 0;
113
114 if(!skip_symfact) {
115#ifdef HAVE_AMESOS2_TIMERS
116 Teuchos::TimeMonitor symFactTimer(this->timers_.symFactTime_);
117#endif
118
119 if(use_cholmod_int_type_) {
120 cholmod_resymbol (&data_.A, NULL, 0, true, data_.L, &(data_.c));
121 }
122 else {
123 cholmod_l_resymbol (&data_.A, NULL, 0, true, data_.L, &(data_.c));
124 }
125
126 info = data_.c.status;
127 } else {
128 /*
129 * Symbolic factorization has already occured in preOrdering_impl,
130 * but if the user calls this routine directly later, we need to
131 * redo the symbolic factorization.
132 */
133 skip_symfact = false;
134 }
135
136 /* All processes should have the same error code */
137 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
138
139 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
140 std::runtime_error,
141 "Amesos2 cholmod_l_resymbol failure in Cholmod symbolicFactorization_impl");
142
143 if(use_triangular_solves_) {
144 triangular_solve_symbolic();
145 }
146
147 return(0);
148}
149
150
151template <class Matrix, class Vector>
152int
154{
155 int info = 0;
156
157#ifdef HAVE_AMESOS2_DEBUG
158 TEUCHOS_TEST_FOR_EXCEPTION(data_.A.ncol != Teuchos::as<size_t>(this->globalNumCols_),
159 std::runtime_error,
160 "Error in converting to cholmod_sparse: wrong number of global columns." );
161 TEUCHOS_TEST_FOR_EXCEPTION(data_.A.nrow != Teuchos::as<size_t>(this->globalNumRows_),
162 std::runtime_error,
163 "Error in converting to cholmod_sparse: wrong number of global rows." );
164#endif
165
166#ifdef HAVE_AMESOS2_TIMERS
167 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
168#endif
169
170#ifdef HAVE_AMESOS2_VERBOSE_DEBUG
171 // Add print out of views here - need host conversion first
172#endif
173
174 if(use_cholmod_int_type_) {
175 cholmod_factorize(&data_.A, data_.L, &(data_.c));
176 }
177 else {
178 cholmod_l_factorize(&data_.A, data_.L, &(data_.c));
179 }
180
181 info = data_.c.status;
182
183 /* All processes should have the same error code */
184 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
185
186 TEUCHOS_TEST_FOR_EXCEPTION(info == CHOLMOD_OUT_OF_MEMORY,
187 std::runtime_error,
188 "Amesos2 cholmod_l_factorize error code: CHOLMOD_OUT_OF_MEMORY");
189
190 TEUCHOS_TEST_FOR_EXCEPTION(info == CHOLMOD_NOT_POSDEF,
191 std::runtime_error,
192 "Amesos2 cholmod_l_factorize error code: CHOLMOD_NOT_POSDEF.");
193
194 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
195 std::runtime_error,
196 "Amesos2 cholmod_l_factorize error code:" << info);
197
198 if(use_triangular_solves_) {
199 triangular_solve_numeric();
200 }
201
202 return(info);
203}
204
205
206template <class Matrix, class Vector>
207int
209 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
210{
211 const global_size_type ld_rhs = X->getGlobalLength();
212 const size_t nrhs = X->getGlobalNumVectors();
213
214 bool bDidAssignX;
215 { // Get values from RHS B
216#ifdef HAVE_AMESOS2_TIMERS
217 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
218 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
219#endif
220
221 // In general we may want to write directly to the x space without a copy.
222 // So we 'get' x which may be a direct view assignment to the MV.
223 const bool initialize_data = true;
224 const bool do_not_initialize_data = false;
225 if(use_triangular_solves_) { // to device
226#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
227 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
228 device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
229 Teuchos::as<size_t>(ld_rhs),
230 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
231 this->rowIndexBase_);
232 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
233 device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
234 Teuchos::as<size_t>(ld_rhs),
235 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
236 this->rowIndexBase_);
237#endif
238 }
239 else { // to host
240 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
241 host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
242 Teuchos::as<size_t>(ld_rhs),
243 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
244 this->rowIndexBase_);
245 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
246 host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
247 Teuchos::as<size_t>(ld_rhs),
248 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
249 this->rowIndexBase_);
250 }
251 }
252
253 int ierr = 0; // returned error code
254
255#ifdef HAVE_AMESOS2_TIMERS
256 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
257#endif
258
259 if(use_triangular_solves_) {
260 triangular_solve();
261 }
262 else {
263 function_map::cholmod_init_dense(Teuchos::as<long>(this->globalNumRows_),
264 Teuchos::as<int>(nrhs), Teuchos::as<int>(ld_rhs), host_bValues_.data(), &data_.b);
265 function_map::cholmod_init_dense(Teuchos::as<long>(this->globalNumRows_),
266 Teuchos::as<int>(nrhs), Teuchos::as<int>(ld_rhs), host_xValues_.data(), &data_.x);
267
268 cholmod_dense *xtemp = &(data_.x);
269 if(use_cholmod_int_type_) {
270 cholmod_solve2(CHOLMOD_A, data_.L, &data_.b, NULL,
271 &(xtemp), NULL, &data_.Y, &data_.E, &data_.c);
272 }
273 else {
274 cholmod_l_solve2(CHOLMOD_A, data_.L, &data_.b, NULL,
275 &(xtemp), NULL, &data_.Y, &data_.E, &data_.c);
276 }
277 }
278
279 ierr = data_.c.status;
280
281 /* All processes should have the same error code */
282 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
283
284 TEUCHOS_TEST_FOR_EXCEPTION(ierr == -2, std::runtime_error, "Ran out of memory" );
285
286 /* Update X's global values */
287
288 // if bDidAssignX, then we solved straight to the adapter's X memory space without
289 // requiring additional memory allocation, so the x data is already in place.
290 if(!bDidAssignX) {
291#ifdef HAVE_AMESOS2_TIMERS
292 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
293#endif
294
295 if(use_triangular_solves_) { // to device
296#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
297 Util::put_1d_data_helper_kokkos_view<
298 MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, device_xValues_,
299 Teuchos::as<size_t>(ld_rhs),
300 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
301 this->rowIndexBase_);
302#endif
303 }
304 else { // to host
305 Util::put_1d_data_helper_kokkos_view<
306 MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, host_xValues_,
307 Teuchos::as<size_t>(ld_rhs),
308 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
309 this->rowIndexBase_);
310 }
311 }
312
313 return(ierr);
314}
315
316
317template <class Matrix, class Vector>
318bool
320{
321 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
322}
323
324
325template <class Matrix, class Vector>
326void
327Cholmod<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
328{
329 // Note that the ordering is very important here. We must do this sequence:
330 // (1) Read parameter for use_cholmod_int_type
331 // (2) Call cholmod_start or cholmod_l_start
332 // (3) Set additional data_.c values
333 use_cholmod_int_type_ = parameterList->get<bool>("CholmodInt", false);
334
335 if(use_cholmod_int_type_) {
336 data_.c.itype = CHOLMOD_INT;
337 cholmod_start(&data_.c);
338 }
339 else {
340 data_.c.itype = CHOLMOD_LONG;
341 cholmod_l_start(&data_.c); // long form required for CUDA
342 }
343
344 using Teuchos::RCP;
345 using Teuchos::getIntegralValue;
346 using Teuchos::ParameterEntryValidator;
347
348 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
349
350 is_contiguous_ = parameterList->get<bool>("IsContiguous", true);
351 use_triangular_solves_ = parameterList->get<bool>("Enable_KokkosKernels_TriangularSolves", false);
352
353 if(use_triangular_solves_) {
354#if not defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) || not defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
355 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
356 "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_CHOLMOD not configured." );
357#endif
358 }
359
360 data_.c.dbound = parameterList->get<double>("dbound", 0.0);
361 data_.c.prefer_upper = (parameterList->get<bool>("PreferUpper", true)) ? 1 : 0;
362 data_.c.print = parameterList->get<int>("print", 3);
363 data_.c.nmethods = parameterList->get<int>("nmethods", 0); // tries AMD and may try METIS if necessary and available
364
365 // useGPU = 1 means use GPU if possible - note this must be set after cholmod_l_start
366 // since cholmod_l_start sets it to the default value of -1.
367 // Setting to -1 means GPU is only used if env variable CHOLMOD_USE_GPU is set to 1.
368#ifdef KOKKOS_ENABLE_CUDA
369 const int default_gpu_setting = 1; // use gpu by default
370#else
371 // If building Trilinos with Cuda off, Cholmod can stil use GPU and the solver will still work.
372 // But that is likely not what was expected/wanted. If you do still want it, set the param to 1.
373 const int default_gpu_setting = 0; // use gpu by default
374#endif
375
376 data_.c.useGPU = parameterList->get<int>("useGPU", default_gpu_setting);
377
378#ifdef KOKKOS_ENABLE_CUDA
379 TEUCHOS_TEST_FOR_EXCEPTION(data_.c.useGPU != 0 && use_cholmod_int_type_, std::runtime_error,
380 "Amesos2 Cholmod solver must not use GPU (parameter useGPU = 0) if CholmodInt is turned on because Cholmod only supports GPU with long." );
381#endif
382
383 bool bSuperNodal = parameterList->get<bool>("SuperNodal", false);
384 data_.c.supernodal = bSuperNodal ? CHOLMOD_SUPERNODAL : CHOLMOD_AUTO;
385}
386
387
388template <class Matrix, class Vector>
389Teuchos::RCP<const Teuchos::ParameterList>
391{
392 using std::string;
393 using Teuchos::tuple;
394 using Teuchos::ParameterList;
395 using Teuchos::EnhancedNumberValidator;
396 using Teuchos::setStringToIntegralParameter;
397 using Teuchos::stringToIntegralParameterEntryValidator;
398
399 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
400
401 if( is_null(valid_params) ){
402 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
403
404
405 Teuchos::RCP<EnhancedNumberValidator<int> > print_validator
406 = Teuchos::rcp( new EnhancedNumberValidator<int>(0,5));
407
408 Teuchos::RCP<EnhancedNumberValidator<int> > nmethods_validator
409 = Teuchos::rcp( new EnhancedNumberValidator<int>(0,9));
410
411 pl->set("nmethods", 0, "Specifies the number of different ordering methods to try", nmethods_validator);
412
413 pl->set("print", 3, "Specifies the verbosity of the print statements", print_validator);
414
415 pl->set("dbound", 0.0,
416 "Specifies the smallest absolute value on the diagonal D for the LDL factorization");
417
418 pl->set("PreferUpper", true,
419 "Specifies whether the matrix will be "
420 "stored in upper triangular form.");
421
422 pl->set("useGPU", -1, "1: Use GPU is 1, 0: Do not use GPU, -1: ENV CHOLMOD_USE_GPU set GPU usage.");
423
424 pl->set("Enable_KokkosKernels_TriangularSolves", false, "Whether to use triangular solves.");
425
426 pl->set("CholmodInt", false, "Whether to use cholmod in int form");
427
428 pl->set("IsContiguous", true, "Whether GIDs contiguous");
429
430 pl->set("SuperNodal", false, "Whether to use super nodal");
431
432 valid_params = pl;
433 }
434
435 return valid_params;
436}
437
438
439template <class Matrix, class Vector>
440bool
442{
443#ifdef HAVE_AMESOS2_TIMERS
444 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
445#endif
446
447 // Only the root image needs storage allocated
448 Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
449 if(use_cholmod_int_type_) {
450 Kokkos::resize(host_rows_int_view_, this->globalNumNonZeros_);
451 Kokkos::resize(host_col_ptr_int_view_, this->globalNumRows_ + 1);
452 }
453 else {
454 Kokkos::resize(host_rows_long_view_, this->globalNumNonZeros_);
455 Kokkos::resize(host_col_ptr_long_view_, this->globalNumRows_ + 1);
456 }
457
458 {
459#ifdef HAVE_AMESOS2_TIMERS
460 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
461#endif
462
463 TEUCHOS_TEST_FOR_EXCEPTION(this->rowIndexBase_ != this->columnIndexBase_,
464 std::runtime_error,
465 "Row and column maps have different indexbase ");
466
467 if(use_cholmod_int_type_) {
468 int nnz_ret = 0;
470 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_int_type_array,
471 host_size_int_type_array>::do_get(this->matrixA_.ptr(),
472 host_nzvals_view_, host_rows_int_view_,
473 host_col_ptr_int_view_, nnz_ret,
474 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
475 ARBITRARY,
476 this->rowIndexBase_);
477
478 TEUCHOS_TEST_FOR_EXCEPTION(nnz_ret != Teuchos::as<long>(this->globalNumNonZeros_),
479 std::runtime_error,
480 "Did not get the expected number of non-zero vals");
481 }
482 else {
483 long nnz_ret = 0;
484
486 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_long_type_array,
487 host_size_long_type_array>::do_get(this->matrixA_.ptr(),
488 host_nzvals_view_, host_rows_long_view_,
489 host_col_ptr_long_view_, nnz_ret,
490 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
491 ARBITRARY,
492 this->rowIndexBase_);
493
494 TEUCHOS_TEST_FOR_EXCEPTION(nnz_ret != Teuchos::as<long>(this->globalNumNonZeros_),
495 std::runtime_error,
496 "Did not get the expected number of non-zero vals");
497 }
498 }
499
500 if(use_cholmod_int_type_) {
501 function_map::cholmod_init_sparse(Teuchos::as<size_t>(this->globalNumRows_),
502 Teuchos::as<size_t>(this->globalNumCols_),
503 Teuchos::as<size_t>(this->globalNumNonZeros_),
504 0,
505 host_col_ptr_int_view_.data(),
506 host_nzvals_view_.data(),
507 host_rows_int_view_.data(),
508 &(data_.A),
509 CHOLMOD_INT);
510 }
511 else {
512 function_map::cholmod_init_sparse(Teuchos::as<size_t>(this->globalNumRows_),
513 Teuchos::as<size_t>(this->globalNumCols_),
514 Teuchos::as<size_t>(this->globalNumNonZeros_),
515 0,
516 host_col_ptr_long_view_.data(),
517 host_nzvals_view_.data(),
518 host_rows_long_view_.data(),
519 &(data_.A),
520 CHOLMOD_LONG);
521 }
522
523 TEUCHOS_TEST_FOR_EXCEPTION(data_.A.stype == 0, std::runtime_error,
524 "CHOLMOD loadA_impl loaded matrix but it is not symmetric.");
525
526 return true;
527}
528
529
530template <class Matrix, class Vector>
531void
533{
534#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
535 // Create handles for U and U^T solves
536 if(use_cholmod_int_type_) {
537 device_int_khL_.create_sptrsv_handle(
538 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, true);
539 device_int_khU_.create_sptrsv_handle(
540 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, false);
541 }
542 else {
543 device_long_khL_.create_sptrsv_handle(
544 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, true);
545 device_long_khU_.create_sptrsv_handle(
546 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, false);
547 }
548
549 // extract etree and iperm from CHOLMOD
550 Kokkos::resize(host_trsv_etree_, data_.L->nsuper);
551 if(use_cholmod_int_type_) {
552 int *int_etree = static_cast<int*>(data_.c.Iwork) + 2 * data_.L->n;
553 for (size_t i = 0 ; i < data_.L->nsuper; ++i) {
554 host_trsv_etree_(i) = int_etree[i];
555 }
556 }
557 else {
558 long *long_etree = static_cast<long*>(data_.c.Iwork) + 2 * data_.L->n;
559 for (size_t i = 0 ; i < data_.L->nsuper; ++i) { // convert long to int array for trsv API
560 host_trsv_etree_(i) = static_cast<int>(long_etree[i]);
561 }
562 }
563
564 // set etree
565 if(use_cholmod_int_type_) {
566 device_int_khL_.set_sptrsv_etree(host_trsv_etree_.data());
567 device_int_khU_.set_sptrsv_etree(host_trsv_etree_.data());
568 }
569 else {
570 device_long_khL_.set_sptrsv_etree(host_trsv_etree_.data());
571 device_long_khU_.set_sptrsv_etree(host_trsv_etree_.data());
572 }
573
574 size_t ld_rhs = this->matrixA_->getGlobalNumRows();
575 Kokkos::resize(host_trsv_perm_, ld_rhs);
576 if(use_cholmod_int_type_) {
577 int *int_iperm = static_cast<int*>(data_.L->Perm);
578 for (size_t i = 0; i < ld_rhs; i++) {
579 host_trsv_perm_(int_iperm[i]) = i;
580 }
581 }
582 else {
583 long *long_iperm = static_cast<long*>(data_.L->Perm);
584 for (size_t i = 0; i < ld_rhs; i++) {
585 host_trsv_perm_(long_iperm[i]) = i;
586 }
587 }
588 deep_copy_or_assign_view(device_trsv_perm_, host_trsv_perm_); // will use device to permute
589
590 // set permutation
591 if(use_cholmod_int_type_) {
592 device_int_khL_.set_sptrsv_perm(host_trsv_perm_.data());
593 device_int_khU_.set_sptrsv_perm(host_trsv_perm_.data());
594 }
595 else {
596 device_long_khL_.set_sptrsv_perm(host_trsv_perm_.data());
597 device_long_khU_.set_sptrsv_perm(host_trsv_perm_.data());
598 }
599
600 // Do symbolic analysis
601 if(use_cholmod_int_type_) {
602 KokkosSparse::Experimental::sptrsv_symbolic<int, kernel_handle_int_type>
603 (&device_int_khL_, &device_int_khU_, data_.L, &data_.c);
604 }
605 else {
606 KokkosSparse::Experimental::sptrsv_symbolic<long, kernel_handle_long_type>
607 (&device_long_khL_, &device_long_khU_, data_.L, &data_.c);
608 }
609#endif
610}
611
612
613template <class Matrix, class Vector>
614void
615Cholmod<Matrix,Vector>::triangular_solve_numeric()
616{
617#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
618 // Do numerical compute
619 if(use_cholmod_int_type_) {
620 KokkosSparse::Experimental::sptrsv_compute<int, kernel_handle_int_type>
621 (&device_int_khL_, &device_int_khU_, data_.L, &data_.c);
622 }
623 else {
624 KokkosSparse::Experimental::sptrsv_compute<long, kernel_handle_long_type>
625 (&device_long_khL_, &device_long_khU_, data_.L, &data_.c);
626 }
627#endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
628}
629
630
631template <class Matrix, class Vector>
632void
633Cholmod<Matrix,Vector>::triangular_solve() const
634{
635#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
636 size_t ld_rhs = device_xValues_.extent(0);
637 size_t nrhs = device_xValues_.extent(1);
638
639 Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
640 Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
641
642 // forward pivot
643 auto local_device_bValues = device_bValues_;
644 auto local_device_trsv_perm = device_trsv_perm_;
645 auto local_device_trsv_rhs = device_trsv_rhs_;
646 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
647 KOKKOS_LAMBDA(size_t j) {
648 for(size_t k = 0; k < nrhs; ++k) {
649 local_device_trsv_rhs(local_device_trsv_perm(j),k) = local_device_bValues(j,k);
650 }
651 });
652
653 for(size_t k = 0; k < nrhs; ++k) { // sptrsv_solve does not batch
654 auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
655 auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
656
657 // do L solve= - numeric (only rhs is modified) on the default device/host space
658 if(use_cholmod_int_type_) {
659 KokkosSparse::Experimental::sptrsv_solve(&device_int_khL_, sub_sol, sub_rhs);
660 }
661 else {
662 KokkosSparse::Experimental::sptrsv_solve(&device_long_khL_, sub_sol, sub_rhs);
663 }
664
665 // do L^T solve - numeric (only rhs is modified) on the default device/host space
666 if(use_cholmod_int_type_) {
667 KokkosSparse::Experimental::sptrsv_solve(&device_int_khU_, sub_rhs, sub_sol);
668 }
669 else {
670 KokkosSparse::Experimental::sptrsv_solve(&device_long_khU_, sub_rhs, sub_sol);
671 }
672
673 } // end loop over rhs vectors
674
675 // backward pivot
676 auto local_device_xValues = device_xValues_;
677 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
678 KOKKOS_LAMBDA(size_t j) {
679 for(size_t k = 0; k < nrhs; ++k) {
680 local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm(j),k);
681 }
682 });
683#endif
684}
685
686
687template<class Matrix, class Vector>
688const char* Cholmod<Matrix,Vector>::name = "Cholmod";
689
690
691} // end namespace Amesos2
692
693#endif // AMESOS2_CHOLMOD_DEF_HPP
Amesos2 CHOLMOD declarations.
@ ROOTED
Definition Amesos2_TypeDecl.hpp:93
@ CONTIGUOUS_AND_ROOTED
Definition Amesos2_TypeDecl.hpp:94
@ ARBITRARY
Definition Amesos2_TypeDecl.hpp:109
Amesos2 interface to the CHOLMOD package.
Definition Amesos2_Cholmod_decl.hpp:43
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
CHOLMOD specific solve.
Definition Amesos2_Cholmod_def.hpp:208
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition Amesos2_Cholmod_def.hpp:79
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using CHOLMOD.
Definition Amesos2_Cholmod_def.hpp:110
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition Amesos2_Cholmod_def.hpp:327
~Cholmod()
Destructor.
Definition Amesos2_Cholmod_def.hpp:59
int numericFactorization_impl()
CHOLMOD specific numeric factorization.
Definition Amesos2_Cholmod_def.hpp:153
static const char * name
Name of this solver interface.
Definition Amesos2_Cholmod_decl.hpp:50
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition Amesos2_Cholmod_def.hpp:319
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition Amesos2_Cholmod_def.hpp:441
Cholmod(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition Amesos2_Cholmod_def.hpp:37
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition Amesos2_Cholmod_def.hpp:390
A Matrix adapter interface for Amesos2.
Definition Amesos2_MatrixAdapter_decl.hpp:42
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers.
Definition Amesos2_SolverCore_decl.hpp:72
EPhase
Used to indicate a phase in the direct solution.
Definition Amesos2_TypeDecl.hpp:31
A templated MultiVector class adapter for Amesos2.
Definition Amesos2_MultiVecAdapter_decl.hpp:142
A generic helper class for getting a CCS representation of a Matrix.
Definition Amesos2_Util.hpp:633