Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Details_ScaledDampedResidual_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
11#define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
12
13#include "Tpetra_CrsMatrix.hpp"
14#include "Tpetra_MultiVector.hpp"
15#include "Tpetra_Operator.hpp"
16#include "Tpetra_Vector.hpp"
17#include "Tpetra_Export_decl.hpp"
18#include "Tpetra_Import_decl.hpp"
19#if KOKKOS_VERSION >= 40799
20#include "KokkosKernels_ArithTraits.hpp"
21#else
22#include "Kokkos_ArithTraits.hpp"
23#endif
24#include "Teuchos_Assert.hpp"
25#include <type_traits>
26#include "KokkosSparse_spmv_impl.hpp"
27
28namespace Ifpack2 {
29namespace Details {
30namespace Impl {
31
36template <class WVector,
37 class DVector,
38 class BVector,
39 class AMatrix,
40 class XVector,
41 class Scalar,
42 bool use_beta>
44 static_assert(static_cast<int>(WVector::rank) == 1,
45 "WVector must be a rank 1 View.");
46 static_assert(static_cast<int>(DVector::rank) == 1,
47 "DVector must be a rank 1 View.");
48 static_assert(static_cast<int>(BVector::rank) == 1,
49 "BVector must be a rank 1 View.");
50 static_assert(static_cast<int>(XVector::rank) == 1,
51 "XVector must be a rank 1 View.");
52
53 using execution_space = typename AMatrix::execution_space;
54 using LO = typename AMatrix::non_const_ordinal_type;
55 using value_type = typename AMatrix::non_const_value_type;
56 using team_policy = typename Kokkos::TeamPolicy<execution_space>;
57 using team_member = typename team_policy::member_type;
58#if KOKKOS_VERSION >= 40799
59 using ATV = KokkosKernels::ArithTraits<value_type>;
60#else
61 using ATV = Kokkos::ArithTraits<value_type>;
62#endif
63
64 const Scalar alpha;
65 WVector m_w;
66 DVector m_d;
67 BVector m_b;
68 AMatrix m_A;
69 XVector m_x;
70 const Scalar beta;
71
72 const LO rows_per_team;
73
75 const WVector& m_w_,
76 const DVector& m_d_,
77 const BVector& m_b_,
78 const AMatrix& m_A_,
79 const XVector& m_x_,
80 const Scalar& beta_,
81 const int rows_per_team_)
82 : alpha(alpha_)
83 , m_w(m_w_)
84 , m_d(m_d_)
85 , m_b(m_b_)
86 , m_A(m_A_)
87 , m_x(m_x_)
88 , beta(beta_)
89 , rows_per_team(rows_per_team_) {
90 const size_t numRows = m_A.numRows();
91 const size_t numCols = m_A.numCols();
92
93 TEUCHOS_ASSERT(m_w.extent(0) == m_d.extent(0));
94 TEUCHOS_ASSERT(m_w.extent(0) == m_b.extent(0));
95 TEUCHOS_ASSERT(numRows == size_t(m_w.extent(0)));
96 TEUCHOS_ASSERT(numCols <= size_t(m_x.extent(0)));
97 }
98
100 void operator()(const team_member& dev) const {
101 using residual_value_type = typename BVector::non_const_value_type;
102#if KOKKOS_VERSION >= 40799
103 using KAT = KokkosKernels::ArithTraits<residual_value_type>;
104#else
105 using KAT = Kokkos::ArithTraits<residual_value_type>;
106#endif
107
108 Kokkos::parallel_for(Kokkos::TeamThreadRange(dev, 0, rows_per_team),
109 [&](const LO& loop) {
110 const LO lclRow =
111 static_cast<LO>(dev.league_rank()) * rows_per_team + loop;
112 if (lclRow >= m_A.numRows()) {
113 return;
114 }
115 const auto A_row = m_A.rowConst(lclRow);
116 const LO row_length = static_cast<LO>(A_row.length);
117 residual_value_type A_x = KAT::zero();
118
119 Kokkos::parallel_reduce(
120 Kokkos::ThreadVectorRange(dev, row_length),
121 [&](const LO iEntry, residual_value_type& lsum) {
122 const auto A_val = A_row.value(iEntry);
123 lsum += A_val * m_x(A_row.colidx(iEntry));
124 },
125 A_x);
126
127 Kokkos::single(Kokkos::PerThread(dev),
128 [&]() {
129 const auto alpha_D_res =
130 alpha * m_d(lclRow) * (m_b(lclRow) - A_x);
131 if (use_beta) {
132 m_w(lclRow) = beta * m_w(lclRow) + alpha_D_res;
133 } else {
134 m_w(lclRow) = alpha_D_res;
135 }
136 });
137 });
138 }
139};
140
141// W := alpha * D * (B - A*X) + beta * W.
142template <class WVector,
143 class DVector,
144 class BVector,
145 class AMatrix,
146 class XVector,
147 class Scalar>
148static void
149scaled_damped_residual_vector(const Scalar& alpha,
150 const WVector& w,
151 const DVector& d,
152 const BVector& b,
153 const AMatrix& A,
154 const XVector& x,
155 const Scalar& beta) {
156 using execution_space = typename AMatrix::execution_space;
157
158 if (A.numRows() == 0) {
159 return;
160 }
161
162 int team_size = -1;
163 int vector_length = -1;
165
166 const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(), rows_per_thread, team_size, vector_length);
167 int64_t worksets = (b.extent(0) + rows_per_team - 1) / rows_per_team;
168
169 using Kokkos::Dynamic;
170 using Kokkos::Schedule;
171 using Kokkos::Static;
172 using Kokkos::TeamPolicy;
175 const char kernel_label[] = "scaled_damped_residual_vector";
178 if (team_size < 0) {
179 policyDynamic = policy_type_dynamic(worksets, Kokkos::AUTO, vector_length);
180 policyStatic = policy_type_static(worksets, Kokkos::AUTO, vector_length);
181 } else {
182 policyDynamic = policy_type_dynamic(worksets, team_size, vector_length);
183 policyStatic = policy_type_static(worksets, team_size, vector_length);
184 }
185
186 // Canonicalize template arguments to avoid redundant instantiations.
187 using w_vec_type = typename WVector::non_const_type;
188 using d_vec_type = typename DVector::const_type;
189 using b_vec_type = typename BVector::const_type;
190 using matrix_type = AMatrix;
191 using x_vec_type = typename XVector::const_type;
192#if KOKKOS_VERSION >= 40799
193 using scalar_type = typename KokkosKernels::ArithTraits<Scalar>::val_type;
194#else
195 using scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
196#endif
197
198#if KOKKOS_VERSION >= 40799
199 if (beta == KokkosKernels::ArithTraits<Scalar>::zero()) {
200#else
201 if (beta == Kokkos::ArithTraits<Scalar>::zero()) {
202#endif
203 constexpr bool use_beta = false;
204 using functor_type =
205 ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
206 b_vec_type, matrix_type,
207 x_vec_type, scalar_type,
208 use_beta>;
209 functor_type func(alpha, w, d, b, A, x, beta, rows_per_team);
210 if (A.nnz() > 10000000)
211 Kokkos::parallel_for(kernel_label, policyDynamic, func);
212 else
213 Kokkos::parallel_for(kernel_label, policyStatic, func);
214 } else {
215 constexpr bool use_beta = true;
216 using functor_type =
217 ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
218 b_vec_type, matrix_type,
219 x_vec_type, scalar_type,
220 use_beta>;
221 functor_type func(alpha, w, d, b, A, x, beta, rows_per_team);
222 if (A.nnz() > 10000000)
223 Kokkos::parallel_for(kernel_label, policyDynamic, func);
224 else
225 Kokkos::parallel_for(kernel_label, policyStatic, func);
226 }
227}
228
229} // namespace Impl
230
231template <class TpetraOperatorType>
232ScaledDampedResidual<TpetraOperatorType>::
233 ScaledDampedResidual(const Teuchos::RCP<const operator_type>& A) {
234 setMatrix(A);
235}
236
237template <class TpetraOperatorType>
238void ScaledDampedResidual<TpetraOperatorType>::
239 setMatrix(const Teuchos::RCP<const operator_type>& A) {
240 if (A_op_.get() != A.get()) {
241 A_op_ = A;
242
243 // We'll (re)allocate these on demand.
244 X_colMap_ = std::unique_ptr<vector_type>(nullptr);
245 V1_ = std::unique_ptr<multivector_type>(nullptr);
246
247 using Teuchos::rcp_dynamic_cast;
248 Teuchos::RCP<const crs_matrix_type> A_crs =
249 rcp_dynamic_cast<const crs_matrix_type>(A);
250 if (A_crs.is_null()) {
251 A_crs_ = Teuchos::null;
252 imp_ = Teuchos::null;
253 exp_ = Teuchos::null;
254 } else {
255 TEUCHOS_ASSERT(A_crs->isFillComplete());
256 A_crs_ = A_crs;
257 auto G = A_crs->getCrsGraph();
258 imp_ = G->getImporter();
259 exp_ = G->getExporter();
260 }
261 }
262}
263
264template <class TpetraOperatorType>
265void ScaledDampedResidual<TpetraOperatorType>::
266 compute(multivector_type& W,
267 const SC& alpha,
268 vector_type& D_inv,
269 multivector_type& B,
270 multivector_type& X,
271 const SC& beta) {
272 using Teuchos::RCP;
273 using Teuchos::rcp;
274
275 if (canFuse(B)) {
276 // "nonconst" here has no effect other than on the return type.
277 W_vec_ = W.getVectorNonConst(0);
278 B_vec_ = B.getVectorNonConst(0);
279 X_vec_ = X.getVectorNonConst(0);
280 TEUCHOS_ASSERT(!A_crs_.is_null());
281 fusedCase(*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
282 } else {
283 TEUCHOS_ASSERT(!A_op_.is_null());
284 unfusedCase(W, alpha, D_inv, B, *A_op_, X, beta);
285 }
286}
287
288template <class TpetraOperatorType>
289typename ScaledDampedResidual<TpetraOperatorType>::vector_type&
290ScaledDampedResidual<TpetraOperatorType>::
291 importVector(vector_type& X_domMap) {
292 if (imp_.is_null()) {
293 return X_domMap;
294 } else {
295 if (X_colMap_.get() == nullptr) {
296 using V = vector_type;
297 X_colMap_ = std::unique_ptr<V>(new V(imp_->getTargetMap()));
298 }
299 X_colMap_->doImport(X_domMap, *imp_, Tpetra::REPLACE);
300 return *X_colMap_;
301 }
302}
303
304template <class TpetraOperatorType>
305bool ScaledDampedResidual<TpetraOperatorType>::
306 canFuse(const multivector_type& B) const {
307 return B.getNumVectors() == size_t(1) &&
308 !A_crs_.is_null() &&
309 exp_.is_null();
310}
311
312template <class TpetraOperatorType>
313void ScaledDampedResidual<TpetraOperatorType>::
314 unfusedCase(multivector_type& W,
315 const SC& alpha,
316 vector_type& D_inv,
317 multivector_type& B,
318 const operator_type& A,
319 multivector_type& X,
320 const SC& beta) {
321 if (V1_.get() == nullptr) {
322 using MV = multivector_type;
323 const size_t numVecs = B.getNumVectors();
324 V1_ = std::unique_ptr<MV>(new MV(B.getMap(), numVecs));
325 }
326 const SC one = Teuchos::ScalarTraits<SC>::one();
327
328 // V1 = B - A*X
329 Tpetra::deep_copy(*V1_, B);
330 A.apply(X, *V1_, Teuchos::NO_TRANS, -one, one);
331
332 // W := alpha * D_inv * V1 + beta * W
333 W.elementWiseMultiply(alpha, D_inv, *V1_, beta);
334}
335
336template <class TpetraOperatorType>
337void ScaledDampedResidual<TpetraOperatorType>::
338 fusedCase(vector_type& W,
339 const SC& alpha,
340 vector_type& D_inv,
341 vector_type& B,
342 const crs_matrix_type& A,
343 vector_type& X,
344 const SC& beta) {
345 vector_type& X_colMap = importVector(X);
346
347 using Impl::scaled_damped_residual_vector;
348 using STS = Teuchos::ScalarTraits<SC>;
349
350 auto A_lcl = A.getLocalMatrixDevice();
351 auto Dinv_lcl = Kokkos::subview(D_inv.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
352 auto B_lcl = Kokkos::subview(B.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
353 auto X_lcl = Kokkos::subview(X_colMap.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
354 if (beta == STS::zero()) {
355 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::OverwriteAll), Kokkos::ALL(), 0);
356 scaled_damped_residual_vector(alpha, W_lcl, Dinv_lcl,
357 B_lcl, A_lcl, X_lcl, beta);
358 } else { // need to read _and_ write W if beta != 0
359 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
360 scaled_damped_residual_vector(alpha, W_lcl, Dinv_lcl,
361 B_lcl, A_lcl, X_lcl, beta);
362 }
363}
364
365} // namespace Details
366} // namespace Ifpack2
367
368#define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_INSTANT(SC, LO, GO, NT) \
369 template class Ifpack2::Details::ScaledDampedResidual<Tpetra::Operator<SC, LO, GO, NT> >;
370
371#endif // IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
Ifpack2's implementation of Trilinos::Details::LinearSolver interface.
Definition Ifpack2_Details_LinearSolver_decl.hpp:75
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40
Functor for computing W := alpha * D * (B - A*X) + beta * W.
Definition Ifpack2_Details_ScaledDampedResidual_def.hpp:43