Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_Tacho_def.hpp
1// @HEADER
2// *****************************************************************************
3// Amesos2: Templated Direct Sparse Solver Package
4//
5// Copyright 2011 NTESS and the Amesos2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef AMESOS2_TACHO_DEF_HPP
11#define AMESOS2_TACHO_DEF_HPP
12
13#include <Teuchos_Tuple.hpp>
14#include <Teuchos_ParameterList.hpp>
15#include <Teuchos_StandardParameterEntryValidators.hpp>
16
18#include "Amesos2_Tacho_decl.hpp"
19#include "Amesos2_Util.hpp"
20
21namespace Amesos2 {
22
23template <class Matrix, class Vector>
25 Teuchos::RCP<const Matrix> A,
26 Teuchos::RCP<Vector> X,
27 Teuchos::RCP<const Vector> B )
28 : SolverCore<Amesos2::TachoSolver,Matrix,Vector>(A, X, B)
29{
30 data_.method = 1; // Cholesky
31 data_.variant = 2; // solver variant
32 data_.streams = 1; // # of streams
33 data_.dofs_per_node = 1; // DoFs / node
34 data_.pivot_pert = false; // Pertub small pivot
35 data_.diag_shift = false; // Shift diagonal
36 data_.verbose = false; // verbose
37 data_.small_problem_threshold_size = 1024;
38}
39
40
41template <class Matrix, class Vector>
43{
44 if ( this->root_ ) {
45 data_.solver.release();
46 }
47}
48
49template <class Matrix, class Vector>
50std::string
52{
53 std::ostringstream oss;
54 oss << "Tacho solver interface";
55 return oss.str();
56}
57
58template<class Matrix, class Vector>
59int
64
65template <class Matrix, class Vector>
66int
68{
69#ifdef HAVE_AMESOS2_TIMERS
70 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
71#endif
72
73 int status = 0;
74 if ( this->root_ ) {
75 if(do_optimization()) {
76 this->matrixA_->returnRowPtr_kokkos_view(host_row_ptr_view_);
77 this->matrixA_->returnColInd_kokkos_view(host_cols_view_);
78 }
79
80 if (data_.diag_shift) {
81 data_.solver.shiftDiagonal();
82 }
83 data_.solver.setSolutionMethod(data_.method);
84 data_.solver.setLevelSetOptionAlgorithmVariant(data_.variant);
85 data_.solver.setSmallProblemThresholdsize(data_.small_problem_threshold_size);
86 data_.solver.setVerbose(data_.verbose);
87 data_.solver.setLevelSetOptionNumStreams(data_.streams);
88 // TODO: Confirm param options
89 // data_.solver.setMaxNumberOfSuperblocks(data_.max_num_superblocks);
90
91 // Symbolic factorization currently must be done on host
92 if (data_.dofs_per_node > 1) {
93 data_.solver.analyze(this->globalNumCols_, data_.dofs_per_node, host_row_ptr_view_, host_cols_view_);
94 } else {
95 data_.solver.analyze(this->globalNumCols_, host_row_ptr_view_, host_cols_view_);
96 }
97 data_.solver.initialize();
98 }
99 return status;
100}
101
102
103template <class Matrix, class Vector>
104int
106{
107#ifdef HAVE_AMESOS2_TIMERS
108 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
109#endif
110
111 int status = 0;
112 if ( this->root_ ) {
113 if(do_optimization()) {
114 // instead of holding onto the device poinster
115 // this->matrixA_->returnValues_kokkos_view(device_nzvals_view_);
116 // make an explicit copy
117 device_value_type_array device_nzvals_temp;
118 this->matrixA_->returnValues_kokkos_view(device_nzvals_temp);
119 Kokkos::deep_copy(device_nzvals_view_, device_nzvals_temp);
120 }
121 if (data_.pivot_pert) {
122 data_.solver.useDefaultPivotTolerance();
123 } else {
124 data_.solver.useNoPivotTolerance();
125 }
126 data_.solver.factorize(device_nzvals_view_);
127 }
128 return status;
129}
130
131template <class Matrix, class Vector>
132int
134 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
135{
136 using Teuchos::as;
137
138 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
139 const size_t nrhs = X->getGlobalNumVectors();
140
141 // don't allocate b since it's handled by the copy manager and might just be
142 // be assigned, not copied anyways.
143 // also don't allocate x since we will also use do_get to allocate this if
144 // necessary. When a copy is not necessary we'll solve directly to the x
145 // values in the MV.
146 bool bDidAssignX;
147 { // Get values from RHS B
148#ifdef HAVE_AMESOS2_TIMERS
149 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
150#endif
151 const bool initialize_data = true;
152 const bool do_not_initialize_data = false;
153 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
154 device_solve_array_t>::do_get(initialize_data, B, this->bValues_,
155 as<size_t>(ld_rhs),
156 ROOTED, this->rowIndexBase_);
157 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
158 device_solve_array_t>::do_get(do_not_initialize_data, X, this->xValues_,
159 as<size_t>(ld_rhs),
160 ROOTED, this->rowIndexBase_);
161 }
162
163 int ierr = 0; // returned error code
164
165 if ( this->root_ ) { // Do solve!
166 // Bump up the workspace size if needed
167#ifdef HAVE_AMESOS2_TIMERS
168 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
169#endif
170 if (workspace_.extent(0) < this->globalNumRows_ || workspace_.extent(1) < nrhs) {
171 workspace_ = device_solve_array_t(
172 Kokkos::ViewAllocateWithoutInitializing("t"), this->globalNumRows_, nrhs);
173 }
174
175 data_.solver.solve(xValues_, bValues_, workspace_);
176
177 int status = 0; // TODO: determine what error handling will be
178 if(status != 0) {
179 ierr = status;
180 }
181 }
182
183 /* All processes should have the same error code */
184 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
185
186 TEUCHOS_TEST_FOR_EXCEPTION( ierr != 0, std::runtime_error,
187 "tacho_solve has error code: " << ierr );
188
189 /* Update X's global values */
190
191 // if bDidAssignX, then we solved straight to the adapter's X memory space without
192 // requiring additional memory allocation, so the x data is already in place.
193 if(!bDidAssignX) {
194#ifdef HAVE_AMESOS2_TIMERS
195 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
196#endif
197
198 // This will do nothing is if the target view matches the src view, which
199 // can be the case if the memory spaces match. See comments above for do_get.
200 Util::template put_1d_data_helper_kokkos_view<
201 MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, xValues_,
202 as<size_t>(ld_rhs),
203 ROOTED, this->rowIndexBase_);
204 }
205
206 return(ierr);
207}
208
209
210template <class Matrix, class Vector>
211bool
213{
214 // Tacho can only apply the solve routines to square matrices
215 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
216}
217
218
219template <class Matrix, class Vector>
220void
221TachoSolver<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
222{
223 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
224
225 // TODO: Confirm param options
226
227 // factorization type
228 auto method_name = parameterList->get<std::string> ("method", "chol");
229 if (method_name == "ldl-nopiv")
230 data_.method = 0;
231 else if (method_name == "chol")
232 data_.method = 1;
233 else if (method_name == "ldl")
234 data_.method = 2;
235 else if (method_name == "lu")
236 data_.method = 3;
237 else {
238 std::cout << "Error: not supported solution method\n";
239 }
240 // solver type
241 data_.variant = parameterList->get<int> ("variant", 2);
242 // small problem threshold
243 data_.small_problem_threshold_size = parameterList->get<int> ("small problem threshold size", 1024);
244 // verbosity
245 data_.verbose = parameterList->get<bool> ("verbose", false);
246 // # of streams
247 data_.streams = parameterList->get<int> ("num-streams", 1);
248 // DoFs / node
249 data_.dofs_per_node = parameterList->get<int> ("dofs-per-node", 1);
250 // Perturb tiny pivots
251 data_.pivot_pert = parameterList->get<bool> ("perturb-pivot", false);
252 data_.diag_shift = parameterList->get<bool> ("shift-diag", false);
253 // TODO: Confirm param options
254 // data_.num_kokkos_threads = parameterList->get<int>("kokkos-threads", 1);
255 // data_.max_num_superblocks = parameterList->get<int>("max-num-superblocks", 4);
256}
257
258
259template <class Matrix, class Vector>
260Teuchos::RCP<const Teuchos::ParameterList>
262{
263 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
264
265 if( is_null(valid_params) ){
266 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
267
268 pl->set("method", "chol", "Type of factorization, chol, ldl, or lu");
269 pl->set("variant", 2, "Type of solver variant, 0, 1, or 2");
270 pl->set("small problem threshold size", 1024, "Problem size threshold below with Tacho uses LAPACK.");
271 pl->set("verbose", false, "Verbosity");
272 pl->set("num-streams", 1, "Number of GPU streams");
273 pl->set("dofs-per-node", 1, "DoFs per node");
274 pl->set("perturb-pivot", false, "Perturb tiny pivots");
275 pl->set("shift-diag", false, "Shift diagonal entries");
276
277 // TODO: Confirm param options
278 // pl->set("kokkos-threads", 1, "Number of threads");
279 // pl->set("max-num-superblocks", 4, "Max number of superblocks");
280
281 valid_params = pl;
282 }
283
284 return valid_params;
285}
286
287template <class Matrix, class Vector>
288bool
290 return (this->root_ && (this->matrixA_->getComm()->getSize() == 1));
291}
292
293template <class Matrix, class Vector>
294bool
296{
297
298 if(current_phase == SOLVE) {
299 return(false);
300 }
301
302 if(!do_optimization()) {
303#ifdef HAVE_AMESOS2_TIMERS
304 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
305#endif
306
307 // Note views are allocated but eventually we should remove this.
308 // The internal copy manager will decide if we can assign or deep_copy
309 // and then allocate if necessary. However the GPU solvers are serial right
310 // now so I didn't complete refactoring the matrix code for the parallel
311 // case. If we added that later, we should have it hooked up to the copy
312 // manager and then these allocations can go away.
313 {
314 if( this->root_ ) {
315 if (device_nzvals_view_.extent(0) != this->globalNumNonZeros_)
316 Kokkos::resize(device_nzvals_view_, this->globalNumNonZeros_);
317 if (host_cols_view_.extent(0) != this->globalNumNonZeros_)
318 Kokkos::resize(host_cols_view_, this->globalNumNonZeros_);
319 if (host_row_ptr_view_.extent(0) != this->globalNumRows_ + 1)
320 Kokkos::resize(host_row_ptr_view_, this->globalNumRows_ + 1);
321 } else {
322 Kokkos::resize(device_nzvals_view_, 0);
323 Kokkos::resize(host_cols_view_, 0);
324 Kokkos::resize(host_row_ptr_view_, 1);
325 }
326 }
327
328 typename host_size_type_array::value_type nnz_ret = 0;
329 {
330 #ifdef HAVE_AMESOS2_TIMERS
331 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
332 #endif
333
334 TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
335 std::runtime_error,
336 "Row and column maps have different indexbase ");
337
339 device_value_type_array, host_ordinal_type_array, host_size_type_array>::do_get(
340 this->matrixA_.ptr(),
341 device_nzvals_view_,
342 host_cols_view_,
343 host_row_ptr_view_,
344 nnz_ret,
346 this->columnIndexBase_);
347 }
348 }
349 else {
350 if( this->root_ ) {
351 // instead of holding onto the device poinster (which could cause issue)
352 // make an explicit copy
353 device_nzvals_view_ = device_value_type_array(
354 Kokkos::ViewAllocateWithoutInitializing("nzvals"), this->globalNumNonZeros_);
355 }
356 }
357
358 return true;
359}
360
361
362template <class Matrix, class Vector>
363void
365 const Teuchos::EVerbosityLevel verbLevel) const
366{
367 out << " Tacho current parameters:" << std::endl;
368 out << " > method = " << data_.method;
369 if (data_.method == 0) out << " (ldl-nopiv)" << std::endl;
370 if (data_.method == 1) out << " (chol)" << std::endl;
371 if (data_.method == 2) out << " (ldl)" << std::endl;
372 if (data_.method == 3) out << " (lu)" << std::endl;
373 out << " > variant = " << data_.variant << std::endl;
374 out << " > verbose = " << data_.verbose << std::endl;
375 out << " > num-streams = " << data_.streams << std::endl;
376 out << " > dofs-per-node = " << data_.dofs_per_node << std::endl;
377 out << " > perturb-pivo = " << (data_.pivot_pert ? "YES" : "NO") << std::endl;
378 out << " > shift-diag = " << (data_.diag_shift ? "YES" : "NO") << std::endl;
379 out << " > small problem threshold size = " << data_.small_problem_threshold_size << std::endl;
380 out << std::endl;
381}
382
383
384template<class Matrix, class Vector>
385const char* TachoSolver<Matrix,Vector>::name = "Tacho";
386
387
388} // end namespace Amesos2
389
390#endif // AMESOS2_TACHO_DEF_HPP
@ ROOTED
Definition Amesos2_TypeDecl.hpp:93
@ ARBITRARY
Definition Amesos2_TypeDecl.hpp:109
Utility functions for Amesos2.
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers.
Definition Amesos2_SolverCore_decl.hpp:72
Amesos2 interface to the Tacho package.
Definition Amesos2_Tacho_decl.hpp:34
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition Amesos2_Tacho_def.hpp:212
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition Amesos2_Tacho_def.hpp:261
int numericFactorization_impl()
Tacho specific numeric factorization.
Definition Amesos2_Tacho_def.hpp:105
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Tacho specific solve.
Definition Amesos2_Tacho_def.hpp:133
~TachoSolver()
Destructor.
Definition Amesos2_Tacho_def.hpp:42
bool do_optimization() const
can we optimize size_type and ordinal_type for straight pass through
Definition Amesos2_Tacho_def.hpp:289
TachoSolver(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition Amesos2_Tacho_def.hpp:24
std::string description() const override
Returns a short description of this Solver.
Definition Amesos2_Tacho_def.hpp:51
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition Amesos2_Tacho_def.hpp:295
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Tacho.
Definition Amesos2_Tacho_def.hpp:67
void describe_impl(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const
Prints the status information about the current solver with some level of verbosity.
Definition Amesos2_Tacho_def.hpp:364
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition Amesos2_Tacho_def.hpp:60
EPhase
Used to indicate a phase in the direct solution.
Definition Amesos2_TypeDecl.hpp:31
A templated MultiVector class adapter for Amesos2.
Definition Amesos2_MultiVecAdapter_decl.hpp:142
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition Amesos2_Util.hpp:600