Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_Tacho_def.hpp
1// @HEADER
2// *****************************************************************************
3// Amesos2: Templated Direct Sparse Solver Package
4//
5// Copyright 2011 NTESS and the Amesos2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef AMESOS2_TACHO_DEF_HPP
11#define AMESOS2_TACHO_DEF_HPP
12
13#include <Teuchos_Tuple.hpp>
14#include <Teuchos_ParameterList.hpp>
15#include <Teuchos_StandardParameterEntryValidators.hpp>
16
18#include "Amesos2_Tacho_decl.hpp"
19#include "Amesos2_Util.hpp"
20
21namespace Amesos2 {
22
23template <class Matrix, class Vector>
25 Teuchos::RCP<const Matrix> A,
26 Teuchos::RCP<Vector> X,
27 Teuchos::RCP<const Vector> B )
28 : SolverCore<Amesos2::TachoSolver,Matrix,Vector>(A, X, B)
29{
30 data_.method = 1; // Cholesky
31 data_.variant = 2; // solver variant
32 data_.streams = 1; // # of streams
33 data_.dofs_per_node = 1; // DoFs / node
34 data_.pivot_pert = false; // Diagonal pertubation
35 data_.verbose = false; // verbose
36}
37
38
39template <class Matrix, class Vector>
41{
42 if ( this->root_ ) {
43 data_.solver.release();
44 }
45}
46
47template <class Matrix, class Vector>
48std::string
50{
51 std::ostringstream oss;
52 oss << "Tacho solver interface";
53 return oss.str();
54}
55
56template<class Matrix, class Vector>
57int
62
63template <class Matrix, class Vector>
64int
66{
67#ifdef HAVE_AMESOS2_TIMERS
68 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
69#endif
70
71 int status = 0;
72 if ( this->root_ ) {
73 if(do_optimization()) {
74 this->matrixA_->returnRowPtr_kokkos_view(host_row_ptr_view_);
75 this->matrixA_->returnColInd_kokkos_view(host_cols_view_);
76 }
77
78 data_.solver.setSolutionMethod(data_.method);
79 data_.solver.setLevelSetOptionAlgorithmVariant(data_.variant);
80 data_.solver.setSmallProblemThresholdsize(data_.small_problem_threshold_size);
81 data_.solver.setVerbose(data_.verbose);
82 data_.solver.setLevelSetOptionNumStreams(data_.streams);
83 // TODO: Confirm param options
84 // data_.solver.setMaxNumberOfSuperblocks(data_.max_num_superblocks);
85
86 // Symbolic factorization currently must be done on host
87 if (data_.dofs_per_node > 1) {
88 data_.solver.analyze(this->globalNumCols_, data_.dofs_per_node, host_row_ptr_view_, host_cols_view_);
89 } else {
90 data_.solver.analyze(this->globalNumCols_, host_row_ptr_view_, host_cols_view_);
91 }
92 data_.solver.initialize();
93 }
94 return status;
95}
96
97
98template <class Matrix, class Vector>
99int
101{
102#ifdef HAVE_AMESOS2_TIMERS
103 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
104#endif
105
106 int status = 0;
107 if ( this->root_ ) {
108 if(do_optimization()) {
109 // instead of holding onto the device poinster
110 // this->matrixA_->returnValues_kokkos_view(device_nzvals_view_);
111 // make an explicit copy
112 device_value_type_array device_nzvals_temp;
113 this->matrixA_->returnValues_kokkos_view(device_nzvals_temp);
114 Kokkos::deep_copy(device_nzvals_view_, device_nzvals_temp);
115 }
116 if (data_.pivot_pert) {
117 data_.solver.useDefaultPivotTolerance();
118 } else {
119 data_.solver.useNoPivotTolerance();
120 }
121 data_.solver.factorize(device_nzvals_view_);
122 }
123 return status;
124}
125
126template <class Matrix, class Vector>
127int
129 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
130{
131 using Teuchos::as;
132
133 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
134 const size_t nrhs = X->getGlobalNumVectors();
135
136 // don't allocate b since it's handled by the copy manager and might just be
137 // be assigned, not copied anyways.
138 // also don't allocate x since we will also use do_get to allocate this if
139 // necessary. When a copy is not necessary we'll solve directly to the x
140 // values in the MV.
141 bool bDidAssignX;
142 { // Get values from RHS B
143#ifdef HAVE_AMESOS2_TIMERS
144 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
145#endif
146 const bool initialize_data = true;
147 const bool do_not_initialize_data = false;
148 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
149 device_solve_array_t>::do_get(initialize_data, B, this->bValues_,
150 as<size_t>(ld_rhs),
151 ROOTED, this->rowIndexBase_);
152 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
153 device_solve_array_t>::do_get(do_not_initialize_data, X, this->xValues_,
154 as<size_t>(ld_rhs),
155 ROOTED, this->rowIndexBase_);
156 }
157
158 int ierr = 0; // returned error code
159
160 if ( this->root_ ) { // Do solve!
161 // Bump up the workspace size if needed
162#ifdef HAVE_AMESOS2_TIMERS
163 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
164#endif
165 if (workspace_.extent(0) < this->globalNumRows_ || workspace_.extent(1) < nrhs) {
166 workspace_ = device_solve_array_t(
167 Kokkos::ViewAllocateWithoutInitializing("t"), this->globalNumRows_, nrhs);
168 }
169
170 data_.solver.solve(xValues_, bValues_, workspace_);
171
172 int status = 0; // TODO: determine what error handling will be
173 if(status != 0) {
174 ierr = status;
175 }
176 }
177
178 /* All processes should have the same error code */
179 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
180
181 TEUCHOS_TEST_FOR_EXCEPTION( ierr != 0, std::runtime_error,
182 "tacho_solve has error code: " << ierr );
183
184 /* Update X's global values */
185
186 // if bDidAssignX, then we solved straight to the adapter's X memory space without
187 // requiring additional memory allocation, so the x data is already in place.
188 if(!bDidAssignX) {
189#ifdef HAVE_AMESOS2_TIMERS
190 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
191#endif
192
193 // This will do nothing is if the target view matches the src view, which
194 // can be the case if the memory spaces match. See comments above for do_get.
195 Util::template put_1d_data_helper_kokkos_view<
196 MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, xValues_,
197 as<size_t>(ld_rhs),
198 ROOTED, this->rowIndexBase_);
199 }
200
201 return(ierr);
202}
203
204
205template <class Matrix, class Vector>
206bool
208{
209 // Tacho can only apply the solve routines to square matrices
210 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
211}
212
213
214template <class Matrix, class Vector>
215void
216TachoSolver<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
217{
218 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
219
220 // TODO: Confirm param options
221
222 // factorization type
223 auto method_name = parameterList->get<std::string> ("method", "chol");
224 if (method_name == "chol")
225 data_.method = 1;
226 else if (method_name == "ldl")
227 data_.method = 2;
228 else if (method_name == "lu")
229 data_.method = 3;
230 else {
231 std::cout << "Error: not supported solution method\n";
232 }
233 // solver type
234 data_.variant = parameterList->get<int> ("variant", 2);
235 // small problem threshold
236 data_.small_problem_threshold_size = parameterList->get<int> ("small problem threshold size", 1024);
237 // verbosity
238 data_.verbose = parameterList->get<bool> ("verbose", false);
239 // # of streams
240 data_.streams = parameterList->get<int> ("num-streams", 1);
241 // DoFs / node
242 data_.dofs_per_node = parameterList->get<int> ("dofs-per-node", 1);
243 // Perturb tiny pivots
244 data_.pivot_pert = parameterList->get<bool> ("perturb-pivot", false);
245 // TODO: Confirm param options
246 // data_.num_kokkos_threads = parameterList->get<int>("kokkos-threads", 1);
247 // data_.max_num_superblocks = parameterList->get<int>("max-num-superblocks", 4);
248}
249
250
251template <class Matrix, class Vector>
252Teuchos::RCP<const Teuchos::ParameterList>
254{
255 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
256
257 if( is_null(valid_params) ){
258 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
259
260 pl->set("method", "chol", "Type of factorization, chol, ldl, or lu");
261 pl->set("variant", 2, "Type of solver variant, 0, 1, or 2");
262 pl->set("small problem threshold size", 1024, "Problem size threshold below with Tacho uses LAPACK.");
263 pl->set("verbose", false, "Verbosity");
264 pl->set("num-streams", 1, "Number of GPU streams");
265 pl->set("dofs-per-node", 1, "DoFs per node");
266 pl->set("perturb-pivot", false, "Perturb tiny pivots");
267
268 // TODO: Confirm param options
269 // pl->set("kokkos-threads", 1, "Number of threads");
270 // pl->set("max-num-superblocks", 4, "Max number of superblocks");
271
272 valid_params = pl;
273 }
274
275 return valid_params;
276}
277
278template <class Matrix, class Vector>
279bool
281 return (this->root_ && (this->matrixA_->getComm()->getSize() == 1));
282}
283
284template <class Matrix, class Vector>
285bool
287{
288
289 if(current_phase == SOLVE) {
290 return(false);
291 }
292
293 if(!do_optimization()) {
294#ifdef HAVE_AMESOS2_TIMERS
295 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
296#endif
297
298 // Note views are allocated but eventually we should remove this.
299 // The internal copy manager will decide if we can assign or deep_copy
300 // and then allocate if necessary. However the GPU solvers are serial right
301 // now so I didn't complete refactoring the matrix code for the parallel
302 // case. If we added that later, we should have it hooked up to the copy
303 // manager and then these allocations can go away.
304 {
305 if( this->root_ ) {
306 if (device_nzvals_view_.extent(0) != this->globalNumNonZeros_)
307 Kokkos::resize(device_nzvals_view_, this->globalNumNonZeros_);
308 if (host_cols_view_.extent(0) != this->globalNumNonZeros_)
309 Kokkos::resize(host_cols_view_, this->globalNumNonZeros_);
310 if (host_row_ptr_view_.extent(0) != this->globalNumRows_ + 1)
311 Kokkos::resize(host_row_ptr_view_, this->globalNumRows_ + 1);
312 } else {
313 Kokkos::resize(device_nzvals_view_, 0);
314 Kokkos::resize(host_cols_view_, 0);
315 Kokkos::resize(host_row_ptr_view_, 1);
316 }
317 }
318
319 typename host_size_type_array::value_type nnz_ret = 0;
320 {
321 #ifdef HAVE_AMESOS2_TIMERS
322 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
323 #endif
324
325 TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
326 std::runtime_error,
327 "Row and column maps have different indexbase ");
328
330 device_value_type_array, host_ordinal_type_array, host_size_type_array>::do_get(
331 this->matrixA_.ptr(),
332 device_nzvals_view_,
333 host_cols_view_,
334 host_row_ptr_view_,
335 nnz_ret,
337 this->columnIndexBase_);
338 }
339 }
340 else {
341 if( this->root_ ) {
342 // instead of holding onto the device poinster (which could cause issue)
343 // make an explicit copy
344 device_nzvals_view_ = device_value_type_array(
345 Kokkos::ViewAllocateWithoutInitializing("nzvals"), this->globalNumNonZeros_);
346 }
347 }
348
349 return true;
350}
351
352
353template<class Matrix, class Vector>
354const char* TachoSolver<Matrix,Vector>::name = "Tacho";
355
356
357} // end namespace Amesos2
358
359#endif // AMESOS2_TACHO_DEF_HPP
@ ROOTED
Definition Amesos2_TypeDecl.hpp:93
@ ARBITRARY
Definition Amesos2_TypeDecl.hpp:109
Utility functions for Amesos2.
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers.
Definition Amesos2_SolverCore_decl.hpp:72
Amesos2 interface to the Tacho package.
Definition Amesos2_Tacho_decl.hpp:34
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition Amesos2_Tacho_def.hpp:207
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition Amesos2_Tacho_def.hpp:253
int numericFactorization_impl()
Tacho specific numeric factorization.
Definition Amesos2_Tacho_def.hpp:100
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Tacho specific solve.
Definition Amesos2_Tacho_def.hpp:128
~TachoSolver()
Destructor.
Definition Amesos2_Tacho_def.hpp:40
bool do_optimization() const
can we optimize size_type and ordinal_type for straight pass through
Definition Amesos2_Tacho_def.hpp:280
TachoSolver(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition Amesos2_Tacho_def.hpp:24
std::string description() const override
Returns a short description of this Solver.
Definition Amesos2_Tacho_def.hpp:49
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition Amesos2_Tacho_def.hpp:286
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Tacho.
Definition Amesos2_Tacho_def.hpp:65
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition Amesos2_Tacho_def.hpp:58
EPhase
Used to indicate a phase in the direct solution.
Definition Amesos2_TypeDecl.hpp:31
A templated MultiVector class adapter for Amesos2.
Definition Amesos2_MultiVecAdapter_decl.hpp:142
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition Amesos2_Util.hpp:644