Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Details_ScaledDampedResidual_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
11#define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
12
13#include "Tpetra_CrsMatrix.hpp"
14#include "Tpetra_MultiVector.hpp"
15#include "Tpetra_Operator.hpp"
16#include "Tpetra_Vector.hpp"
17#include "Tpetra_Export_decl.hpp"
18#include "Tpetra_Import_decl.hpp"
19#include "KokkosKernels_ArithTraits.hpp"
20#include "Teuchos_Assert.hpp"
21#include <type_traits>
22#include "KokkosSparse_spmv_impl.hpp"
23
24namespace Ifpack2 {
25namespace Details {
26namespace Impl {
27
32template <class WVector,
33 class DVector,
34 class BVector,
35 class AMatrix,
36 class XVector,
37 class Scalar,
38 bool use_beta>
40 static_assert(static_cast<int>(WVector::rank) == 1,
41 "WVector must be a rank 1 View.");
42 static_assert(static_cast<int>(DVector::rank) == 1,
43 "DVector must be a rank 1 View.");
44 static_assert(static_cast<int>(BVector::rank) == 1,
45 "BVector must be a rank 1 View.");
46 static_assert(static_cast<int>(XVector::rank) == 1,
47 "XVector must be a rank 1 View.");
48
49 using execution_space = typename AMatrix::execution_space;
50 using LO = typename AMatrix::non_const_ordinal_type;
51 using value_type = typename AMatrix::non_const_value_type;
52 using team_policy = typename Kokkos::TeamPolicy<execution_space>;
53 using team_member = typename team_policy::member_type;
54 using ATV = KokkosKernels::ArithTraits<value_type>;
55
56 const Scalar alpha;
57 WVector m_w;
58 DVector m_d;
59 BVector m_b;
60 AMatrix m_A;
61 XVector m_x;
62 const Scalar beta;
63
64 const LO rows_per_team;
65
67 const WVector& m_w_,
68 const DVector& m_d_,
69 const BVector& m_b_,
70 const AMatrix& m_A_,
71 const XVector& m_x_,
72 const Scalar& beta_,
73 const int rows_per_team_)
74 : alpha(alpha_)
75 , m_w(m_w_)
76 , m_d(m_d_)
77 , m_b(m_b_)
78 , m_A(m_A_)
79 , m_x(m_x_)
80 , beta(beta_)
81 , rows_per_team(rows_per_team_) {
82 const size_t numRows = m_A.numRows();
83 const size_t numCols = m_A.numCols();
84
85 TEUCHOS_ASSERT(m_w.extent(0) == m_d.extent(0));
86 TEUCHOS_ASSERT(m_w.extent(0) == m_b.extent(0));
87 TEUCHOS_ASSERT(numRows == size_t(m_w.extent(0)));
88 TEUCHOS_ASSERT(numCols <= size_t(m_x.extent(0)));
89 }
90
92 void operator()(const team_member& dev) const {
93 using residual_value_type = typename BVector::non_const_value_type;
94 using KAT = KokkosKernels::ArithTraits<residual_value_type>;
95
96 Kokkos::parallel_for(Kokkos::TeamThreadRange(dev, 0, rows_per_team),
97 [&](const LO& loop) {
98 const LO lclRow =
99 static_cast<LO>(dev.league_rank()) * rows_per_team + loop;
100 if (lclRow >= m_A.numRows()) {
101 return;
102 }
103 const auto A_row = m_A.rowConst(lclRow);
104 const LO row_length = static_cast<LO>(A_row.length);
105 residual_value_type A_x = KAT::zero();
106
107 Kokkos::parallel_reduce(
108 Kokkos::ThreadVectorRange(dev, row_length),
109 [&](const LO iEntry, residual_value_type& lsum) {
110 const auto A_val = A_row.value(iEntry);
111 lsum += A_val * m_x(A_row.colidx(iEntry));
112 },
113 A_x);
114
115 Kokkos::single(Kokkos::PerThread(dev),
116 [&]() {
117 const auto alpha_D_res =
118 alpha * m_d(lclRow) * (m_b(lclRow) - A_x);
119 if (use_beta) {
120 m_w(lclRow) = beta * m_w(lclRow) + alpha_D_res;
121 } else {
122 m_w(lclRow) = alpha_D_res;
123 }
124 });
125 });
126 }
127};
128
129// W := alpha * D * (B - A*X) + beta * W.
130template <class WVector,
131 class DVector,
132 class BVector,
133 class AMatrix,
134 class XVector,
135 class Scalar>
136static void
137scaled_damped_residual_vector(const Scalar& alpha,
138 const WVector& w,
139 const DVector& d,
140 const BVector& b,
141 const AMatrix& A,
142 const XVector& x,
143 const Scalar& beta) {
144 using execution_space = typename AMatrix::execution_space;
145
146 if (A.numRows() == 0) {
147 return;
148 }
149
150 int team_size = -1;
151 int vector_length = -1;
153
154 const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(), rows_per_thread, team_size, vector_length);
155 int64_t worksets = (b.extent(0) + rows_per_team - 1) / rows_per_team;
156
157 using Kokkos::Dynamic;
158 using Kokkos::Schedule;
159 using Kokkos::Static;
160 using Kokkos::TeamPolicy;
163 const char kernel_label[] = "scaled_damped_residual_vector";
166 if (team_size < 0) {
167 policyDynamic = policy_type_dynamic(worksets, Kokkos::AUTO, vector_length);
168 policyStatic = policy_type_static(worksets, Kokkos::AUTO, vector_length);
169 } else {
170 policyDynamic = policy_type_dynamic(worksets, team_size, vector_length);
171 policyStatic = policy_type_static(worksets, team_size, vector_length);
172 }
173
174 // Canonicalize template arguments to avoid redundant instantiations.
175 using w_vec_type = typename WVector::non_const_type;
176 using d_vec_type = typename DVector::const_type;
177 using b_vec_type = typename BVector::const_type;
178 using matrix_type = AMatrix;
179 using x_vec_type = typename XVector::const_type;
180 using scalar_type = typename KokkosKernels::ArithTraits<Scalar>::val_type;
181
182 if (beta == KokkosKernels::ArithTraits<Scalar>::zero()) {
183 constexpr bool use_beta = false;
184 using functor_type =
185 ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
186 b_vec_type, matrix_type,
187 x_vec_type, scalar_type,
188 use_beta>;
189 functor_type func(alpha, w, d, b, A, x, beta, rows_per_team);
190 if (A.nnz() > 10000000)
191 Kokkos::parallel_for(kernel_label, policyDynamic, func);
192 else
193 Kokkos::parallel_for(kernel_label, policyStatic, func);
194 } else {
195 constexpr bool use_beta = true;
196 using functor_type =
197 ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
198 b_vec_type, matrix_type,
199 x_vec_type, scalar_type,
200 use_beta>;
201 functor_type func(alpha, w, d, b, A, x, beta, rows_per_team);
202 if (A.nnz() > 10000000)
203 Kokkos::parallel_for(kernel_label, policyDynamic, func);
204 else
205 Kokkos::parallel_for(kernel_label, policyStatic, func);
206 }
207}
208
209} // namespace Impl
210
211template <class TpetraOperatorType>
212ScaledDampedResidual<TpetraOperatorType>::
213 ScaledDampedResidual(const Teuchos::RCP<const operator_type>& A) {
214 setMatrix(A);
215}
216
217template <class TpetraOperatorType>
218void ScaledDampedResidual<TpetraOperatorType>::
219 setMatrix(const Teuchos::RCP<const operator_type>& A) {
220 if (A_op_.get() != A.get()) {
221 A_op_ = A;
222
223 // We'll (re)allocate these on demand.
224 X_colMap_ = std::unique_ptr<vector_type>(nullptr);
225 V1_ = std::unique_ptr<multivector_type>(nullptr);
226
227 using Teuchos::rcp_dynamic_cast;
228 Teuchos::RCP<const crs_matrix_type> A_crs =
229 rcp_dynamic_cast<const crs_matrix_type>(A);
230 if (A_crs.is_null()) {
231 A_crs_ = Teuchos::null;
232 imp_ = Teuchos::null;
233 exp_ = Teuchos::null;
234 } else {
235 TEUCHOS_ASSERT(A_crs->isFillComplete());
236 A_crs_ = A_crs;
237 auto G = A_crs->getCrsGraph();
238 imp_ = G->getImporter();
239 exp_ = G->getExporter();
240 }
241 }
242}
243
244template <class TpetraOperatorType>
245void ScaledDampedResidual<TpetraOperatorType>::
246 compute(multivector_type& W,
247 const SC& alpha,
248 vector_type& D_inv,
249 multivector_type& B,
250 multivector_type& X,
251 const SC& beta) {
252 using Teuchos::RCP;
253 using Teuchos::rcp;
254
255 if (canFuse(B)) {
256 // "nonconst" here has no effect other than on the return type.
257 W_vec_ = W.getVectorNonConst(0);
258 B_vec_ = B.getVectorNonConst(0);
259 X_vec_ = X.getVectorNonConst(0);
260 TEUCHOS_ASSERT(!A_crs_.is_null());
261 fusedCase(*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
262 } else {
263 TEUCHOS_ASSERT(!A_op_.is_null());
264 unfusedCase(W, alpha, D_inv, B, *A_op_, X, beta);
265 }
266}
267
268template <class TpetraOperatorType>
269typename ScaledDampedResidual<TpetraOperatorType>::vector_type&
270ScaledDampedResidual<TpetraOperatorType>::
271 importVector(vector_type& X_domMap) {
272 if (imp_.is_null()) {
273 return X_domMap;
274 } else {
275 if (X_colMap_.get() == nullptr) {
276 using V = vector_type;
277 X_colMap_ = std::unique_ptr<V>(new V(imp_->getTargetMap()));
278 }
279 X_colMap_->doImport(X_domMap, *imp_, Tpetra::REPLACE);
280 return *X_colMap_;
281 }
282}
283
284template <class TpetraOperatorType>
285bool ScaledDampedResidual<TpetraOperatorType>::
286 canFuse(const multivector_type& B) const {
287 return B.getNumVectors() == size_t(1) &&
288 !A_crs_.is_null() &&
289 exp_.is_null();
290}
291
292template <class TpetraOperatorType>
293void ScaledDampedResidual<TpetraOperatorType>::
294 unfusedCase(multivector_type& W,
295 const SC& alpha,
296 vector_type& D_inv,
297 multivector_type& B,
298 const operator_type& A,
299 multivector_type& X,
300 const SC& beta) {
301 if (V1_.get() == nullptr) {
302 using MV = multivector_type;
303 const size_t numVecs = B.getNumVectors();
304 V1_ = std::unique_ptr<MV>(new MV(B.getMap(), numVecs));
305 }
306 const SC one = Teuchos::ScalarTraits<SC>::one();
307
308 // V1 = B - A*X
309 Tpetra::deep_copy(*V1_, B);
310 A.apply(X, *V1_, Teuchos::NO_TRANS, -one, one);
311
312 // W := alpha * D_inv * V1 + beta * W
313 W.elementWiseMultiply(alpha, D_inv, *V1_, beta);
314}
315
316template <class TpetraOperatorType>
317void ScaledDampedResidual<TpetraOperatorType>::
318 fusedCase(vector_type& W,
319 const SC& alpha,
320 vector_type& D_inv,
321 vector_type& B,
322 const crs_matrix_type& A,
323 vector_type& X,
324 const SC& beta) {
325 vector_type& X_colMap = importVector(X);
326
327 using Impl::scaled_damped_residual_vector;
328 using STS = Teuchos::ScalarTraits<SC>;
329
330 auto A_lcl = A.getLocalMatrixDevice();
331 auto Dinv_lcl = Kokkos::subview(D_inv.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
332 auto B_lcl = Kokkos::subview(B.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
333 auto X_lcl = Kokkos::subview(X_colMap.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
334 if (beta == STS::zero()) {
335 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::OverwriteAll), Kokkos::ALL(), 0);
336 scaled_damped_residual_vector(alpha, W_lcl, Dinv_lcl,
337 B_lcl, A_lcl, X_lcl, beta);
338 } else { // need to read _and_ write W if beta != 0
339 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
340 scaled_damped_residual_vector(alpha, W_lcl, Dinv_lcl,
341 B_lcl, A_lcl, X_lcl, beta);
342 }
343}
344
345} // namespace Details
346} // namespace Ifpack2
347
348#define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_INSTANT(SC, LO, GO, NT) \
349 template class Ifpack2::Details::ScaledDampedResidual<Tpetra::Operator<SC, LO, GO, NT> >;
350
351#endif // IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
Ifpack2's implementation of Trilinos::Details::LinearSolver interface.
Definition Ifpack2_Details_LinearSolver_decl.hpp:75
void setMatrix(const Teuchos::RCP< const OP > &A)
Set the solver's matrix.
Definition Ifpack2_Details_LinearSolver_def.hpp:53
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40
Functor for computing W := alpha * D * (B - A*X) + beta * W.
Definition Ifpack2_Details_ScaledDampedResidual_def.hpp:39