Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_Tacho_def.hpp
1// @HEADER
2// *****************************************************************************
3// Amesos2: Templated Direct Sparse Solver Package
4//
5// Copyright 2011 NTESS and the Amesos2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef AMESOS2_TACHO_DEF_HPP
11#define AMESOS2_TACHO_DEF_HPP
12
13#include <Teuchos_Tuple.hpp>
14#include <Teuchos_ParameterList.hpp>
15#include <Teuchos_StandardParameterEntryValidators.hpp>
16
18#include "Amesos2_Tacho_decl.hpp"
19#include "Amesos2_Util.hpp"
20
21namespace Amesos2 {
22
23template <class Matrix, class Vector>
25 Teuchos::RCP<const Matrix> A,
26 Teuchos::RCP<Vector> X,
27 Teuchos::RCP<const Vector> B )
28 : SolverCore<Amesos2::TachoSolver,Matrix,Vector>(A, X, B)
29{
30 data_.method = 1; // Cholesky
31 data_.variant = 2; // solver variant
32 data_.streams = 1; // # of streams
33 data_.dofs_per_node = 1; // DoFs / node
34 data_.pivot_pert = false; // Pertub small pivot
35 data_.diag_shift = false; // Shift diagonal
36 data_.verbose = false; // verbose
37 data_.team_on_user_stream = false; // use user stream-0 for team/batched kernels
38 data_.small_problem_threshold_size = 1024;
39}
40
41
42template <class Matrix, class Vector>
44{
45 if ( this->root_ ) {
46 data_.solver.release();
47 }
48}
49
50template <class Matrix, class Vector>
51std::string
53{
54 std::ostringstream oss;
55 oss << "Tacho solver interface";
56 return oss.str();
57}
58
59template<class Matrix, class Vector>
60int
65
66template <class Matrix, class Vector>
67int
69{
70#ifdef HAVE_AMESOS2_TIMERS
71 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
72#endif
73
74 int status = 0;
75 if ( this->root_ ) {
76 if(do_optimization()) {
77 this->matrixA_->returnRowPtr_kokkos_view(host_row_ptr_view_);
78 this->matrixA_->returnColInd_kokkos_view(host_cols_view_);
79 }
80
81 data_.solver.setSolutionMethod(data_.method);
82 data_.solver.setLevelSetOptionAlgorithmVariant(data_.variant);
83 data_.solver.setSmallProblemThresholdsize(data_.small_problem_threshold_size);
84 data_.solver.setVerbose(data_.verbose);
85 data_.solver.setLevelSetOptionNumStreams(data_.streams, data_.team_on_user_stream);
86 // TODO: Confirm param options
87 // data_.solver.setMaxNumberOfSuperblocks(data_.max_num_superblocks);
88
89 // Symbolic factorization currently must be done on host
90 if (data_.dofs_per_node > 1) {
91 data_.solver.analyze(this->globalNumCols_, data_.dofs_per_node, host_row_ptr_view_, host_cols_view_);
92 } else {
93 data_.solver.analyze(this->globalNumCols_, host_row_ptr_view_, host_cols_view_);
94 }
95 data_.solver.initialize();
96 }
97 return status;
98}
99
100
101template <class Matrix, class Vector>
102int
104{
105#ifdef HAVE_AMESOS2_TIMERS
106 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
107#endif
108
109 int status = 0;
110 if ( this->root_ ) {
111 if(do_optimization()) {
112 // instead of holding onto the device poinster
113 // this->matrixA_->returnValues_kokkos_view(device_nzvals_view_);
114 // make an explicit copy
115 device_value_type_array device_nzvals_temp;
116 this->matrixA_->returnValues_kokkos_view(device_nzvals_temp);
117 Kokkos::deep_copy(device_nzvals_view_, device_nzvals_temp);
118 }
119 if (data_.diag_shift) {
120 data_.solver.shiftDiagonal();
121 }
122 if (data_.pivot_pert) {
123 data_.solver.useDefaultPivotTolerance();
124 } else {
125 data_.solver.useNoPivotTolerance();
126 }
127 data_.solver.factorize(device_nzvals_view_);
128 }
129 return status;
130}
131
132template <class Matrix, class Vector>
133int
135 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
136{
137 using Teuchos::as;
138
139 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
140 const size_t nrhs = X->getGlobalNumVectors();
141
142 // don't allocate b since it's handled by the copy manager and might just be
143 // be assigned, not copied anyways.
144 // also don't allocate x since we will also use do_get to allocate this if
145 // necessary. When a copy is not necessary we'll solve directly to the x
146 // values in the MV.
147 bool bDidAssignX;
148 { // Get values from RHS B
149#ifdef HAVE_AMESOS2_TIMERS
150 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
151#endif
152 const bool initialize_data = true;
153 const bool do_not_initialize_data = false;
154 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
155 device_solve_array_t>::do_get(initialize_data, B, this->bValues_,
156 as<size_t>(ld_rhs),
157 ROOTED, this->rowIndexBase_);
158 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
159 device_solve_array_t>::do_get(do_not_initialize_data, X, this->xValues_,
160 as<size_t>(ld_rhs),
161 ROOTED, this->rowIndexBase_);
162 }
163
164 int ierr = 0; // returned error code
165
166 if ( this->root_ ) { // Do solve!
167 // Bump up the workspace size if needed
168#ifdef HAVE_AMESOS2_TIMERS
169 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
170#endif
171 if (workspace_.extent(0) < this->globalNumRows_ || workspace_.extent(1) < nrhs) {
172 workspace_ = device_solve_array_t(
173 Kokkos::ViewAllocateWithoutInitializing("t"), this->globalNumRows_, nrhs);
174 }
175
176 data_.solver.solve(xValues_, bValues_, workspace_);
177
178 int status = 0; // TODO: determine what error handling will be
179 if(status != 0) {
180 ierr = status;
181 }
182 }
183
184 /* All processes should have the same error code */
185 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
186
187 TEUCHOS_TEST_FOR_EXCEPTION( ierr != 0, std::runtime_error,
188 "tacho_solve has error code: " << ierr );
189
190 /* Update X's global values */
191
192 // if bDidAssignX, then we solved straight to the adapter's X memory space without
193 // requiring additional memory allocation, so the x data is already in place.
194 if(!bDidAssignX) {
195#ifdef HAVE_AMESOS2_TIMERS
196 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
197#endif
198
199 // This will do nothing is if the target view matches the src view, which
200 // can be the case if the memory spaces match. See comments above for do_get.
201 Util::template put_1d_data_helper_kokkos_view<
202 MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, xValues_,
203 as<size_t>(ld_rhs),
204 ROOTED, this->rowIndexBase_);
205 }
206
207 return(ierr);
208}
209
210
211template <class Matrix, class Vector>
212bool
214{
215 // Tacho can only apply the solve routines to square matrices
216 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
217}
218
219
220template <class Matrix, class Vector>
221void
222TachoSolver<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
223{
224 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
225
226 // TODO: Confirm param options
227
228 // factorization type
229 auto method_name = parameterList->get<std::string> ("method", "chol");
230 if (method_name == "ldl-nopiv")
231 data_.method = 0;
232 else if (method_name == "chol")
233 data_.method = 1;
234 else if (method_name == "ldl")
235 data_.method = 2;
236 else if (method_name == "lu")
237 data_.method = 3;
238 else {
239 std::cout << "Error: not supported solution method\n";
240 }
241 // solver type
242 data_.variant = parameterList->get<int> ("variant", 2);
243 // small problem threshold
244 data_.small_problem_threshold_size = parameterList->get<int> ("small problem threshold size", 1024);
245 // verbosity
246 data_.verbose = parameterList->get<bool> ("verbose", false);
247 // # of streams
248 data_.streams = parameterList->get<int> ("num-streams", 1);
249 // use user stream-0 for batched kernels
250 data_.team_on_user_stream = parameterList->get<bool> ("team-on-user-stream", false);
251 // DoFs / node
252 data_.dofs_per_node = parameterList->get<int> ("dofs-per-node", 1);
253 // Perturb tiny pivots
254 data_.pivot_pert = parameterList->get<bool> ("perturb-pivot", false);
255 data_.diag_shift = parameterList->get<bool> ("shift-diag", false);
256 // TODO: Confirm param options
257 // data_.num_kokkos_threads = parameterList->get<int>("kokkos-threads", 1);
258 // data_.max_num_superblocks = parameterList->get<int>("max-num-superblocks", 4);
259}
260
261
262template <class Matrix, class Vector>
263Teuchos::RCP<const Teuchos::ParameterList>
265{
266 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
267
268 if( is_null(valid_params) ){
269 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
270
271 pl->set("method", "chol", "Type of factorization, chol, ldl, or lu");
272 pl->set("variant", 2, "Type of solver variant, 0, 1, or 2");
273 pl->set("small problem threshold size", 1024, "Problem size threshold below with Tacho uses LAPACK.");
274 pl->set("verbose", false, "Verbosity");
275 pl->set("num-streams", 1, "Number of GPU streams");
276 pl->set("dofs-per-node", 1, "DoFs per node");
277 pl->set("perturb-pivot", false, "Perturb tiny pivots");
278 pl->set("shift-diag", false, "Shift diagonal entries");
279 pl->set("team-on-user-stream", false, "Use user stream-0 for team/batched kernels");
280
281 // TODO: Confirm param options
282 // pl->set("kokkos-threads", 1, "Number of threads");
283 // pl->set("max-num-superblocks", 4, "Max number of superblocks");
284
285 valid_params = pl;
286 }
287
288 return valid_params;
289}
290
291template <class Matrix, class Vector>
292bool
294 return (this->root_ && (this->matrixA_->getComm()->getSize() == 1));
295}
296
297template <class Matrix, class Vector>
298bool
300{
301
302 if(current_phase == SOLVE) {
303 return(false);
304 }
305
306 if(!do_optimization()) {
307#ifdef HAVE_AMESOS2_TIMERS
308 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
309#endif
310
311 // Note views are allocated but eventually we should remove this.
312 // The internal copy manager will decide if we can assign or deep_copy
313 // and then allocate if necessary. However the GPU solvers are serial right
314 // now so I didn't complete refactoring the matrix code for the parallel
315 // case. If we added that later, we should have it hooked up to the copy
316 // manager and then these allocations can go away.
317 {
318 if( this->root_ ) {
319 if (device_nzvals_view_.extent(0) != this->globalNumNonZeros_)
320 Kokkos::resize(device_nzvals_view_, this->globalNumNonZeros_);
321 if (host_cols_view_.extent(0) != this->globalNumNonZeros_)
322 Kokkos::resize(host_cols_view_, this->globalNumNonZeros_);
323 if (host_row_ptr_view_.extent(0) != this->globalNumRows_ + 1)
324 Kokkos::resize(host_row_ptr_view_, this->globalNumRows_ + 1);
325 } else {
326 Kokkos::resize(device_nzvals_view_, 0);
327 Kokkos::resize(host_cols_view_, 0);
328 Kokkos::resize(host_row_ptr_view_, 1);
329 }
330 }
331
332 typename host_size_type_array::value_type nnz_ret = 0;
333 {
334 #ifdef HAVE_AMESOS2_TIMERS
335 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
336 #endif
337
338 TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
339 std::runtime_error,
340 "Row and column maps have different indexbase ");
341
343 device_value_type_array, host_ordinal_type_array, host_size_type_array>::do_get(
344 this->matrixA_.ptr(),
345 device_nzvals_view_,
346 host_cols_view_,
347 host_row_ptr_view_,
348 nnz_ret,
350 this->columnIndexBase_);
351 }
352 }
353 else {
354 if( this->root_ ) {
355 // instead of holding onto the device poinster (which could cause issue)
356 // make an explicit copy
357 device_nzvals_view_ = device_value_type_array(
358 Kokkos::ViewAllocateWithoutInitializing("nzvals"), this->globalNumNonZeros_);
359 }
360 }
361
362 return true;
363}
364
365
366template <class Matrix, class Vector>
367void
369 const Teuchos::EVerbosityLevel verbLevel) const
370{
371 out << " Tacho current parameters:" << std::endl;
372 out << " > method = " << data_.method;
373 if (data_.method == 0) out << " (ldl-nopiv)" << std::endl;
374 if (data_.method == 1) out << " (chol)" << std::endl;
375 if (data_.method == 2) out << " (ldl)" << std::endl;
376 if (data_.method == 3) out << " (lu)" << std::endl;
377 out << " > variant = " << data_.variant << std::endl;
378 out << " > verbose = " << data_.verbose << std::endl;
379 out << " > num-streams = " << data_.streams << std::endl;
380 out << " > dofs-per-node = " << data_.dofs_per_node << std::endl;
381 out << " > perturb-pivo = " << (data_.pivot_pert ? "YES" : "NO") << std::endl;
382 out << " > shift-diag = " << (data_.diag_shift ? "YES" : "NO") << std::endl;
383 out << " > team-on-user-stream = " << (data_.team_on_user_stream ? "YES" : "NO") << std::endl;
384 out << " > small problem threshold size = " << data_.small_problem_threshold_size << std::endl;
385 out << std::endl;
386}
387
388
389template<class Matrix, class Vector>
390const char* TachoSolver<Matrix,Vector>::name = "Tacho";
391
392
393} // end namespace Amesos2
394
395#endif // AMESOS2_TACHO_DEF_HPP
@ ROOTED
Definition Amesos2_TypeDecl.hpp:93
@ ARBITRARY
Definition Amesos2_TypeDecl.hpp:109
Utility functions for Amesos2.
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers.
Definition Amesos2_SolverCore_decl.hpp:72
Amesos2 interface to the Tacho package.
Definition Amesos2_Tacho_decl.hpp:34
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition Amesos2_Tacho_def.hpp:213
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition Amesos2_Tacho_def.hpp:264
int numericFactorization_impl()
Tacho specific numeric factorization.
Definition Amesos2_Tacho_def.hpp:103
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Tacho specific solve.
Definition Amesos2_Tacho_def.hpp:134
~TachoSolver()
Destructor.
Definition Amesos2_Tacho_def.hpp:43
bool do_optimization() const
can we optimize size_type and ordinal_type for straight pass through
Definition Amesos2_Tacho_def.hpp:293
TachoSolver(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition Amesos2_Tacho_def.hpp:24
std::string description() const override
Returns a short description of this Solver.
Definition Amesos2_Tacho_def.hpp:52
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition Amesos2_Tacho_def.hpp:299
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Tacho.
Definition Amesos2_Tacho_def.hpp:68
void describe_impl(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const
Prints the status information about the current solver with some level of verbosity.
Definition Amesos2_Tacho_def.hpp:368
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition Amesos2_Tacho_def.hpp:61
EPhase
Used to indicate a phase in the direct solution.
Definition Amesos2_TypeDecl.hpp:31
A templated MultiVector class adapter for Amesos2.
Definition Amesos2_MultiVecAdapter_decl.hpp:142
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition Amesos2_Util.hpp:600