Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Details_ChebyshevKernel_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_DETAILS_CHEBYSHEVKERNEL_DEF_HPP
11#define IFPACK2_DETAILS_CHEBYSHEVKERNEL_DEF_HPP
12
13#include "Tpetra_CrsMatrix.hpp"
14#include "Tpetra_MultiVector.hpp"
15#include "Tpetra_Operator.hpp"
16#include "Tpetra_Vector.hpp"
17#include "Tpetra_Export_decl.hpp"
18#include "Tpetra_Import_decl.hpp"
19#include "KokkosKernels_ArithTraits.hpp"
20#include "Teuchos_Assert.hpp"
21#include <type_traits>
22#include "KokkosSparse_spmv_impl.hpp"
23
24namespace Ifpack2 {
25namespace Details {
26namespace Impl {
27
32template <class WVector,
33 class DVector,
34 class BVector,
35 class AMatrix,
36 class XVector_colMap,
37 class XVector_domMap,
38 class Scalar,
39 bool use_beta,
40 bool do_X_update>
42 static_assert(static_cast<int>(WVector::rank) == 1,
43 "WVector must be a rank 1 View.");
44 static_assert(static_cast<int>(DVector::rank) == 1,
45 "DVector must be a rank 1 View.");
46 static_assert(static_cast<int>(BVector::rank) == 1,
47 "BVector must be a rank 1 View.");
48 static_assert(static_cast<int>(XVector_colMap::rank) == 1,
49 "XVector_colMap must be a rank 1 View.");
50 static_assert(static_cast<int>(XVector_domMap::rank) == 1,
51 "XVector_domMap must be a rank 1 View.");
52
53 using execution_space = typename AMatrix::execution_space;
54 using LO = typename AMatrix::non_const_ordinal_type;
55 using value_type = typename AMatrix::non_const_value_type;
56 using team_policy = typename Kokkos::TeamPolicy<execution_space>;
57 using team_member = typename team_policy::member_type;
58 using ATV = KokkosKernels::ArithTraits<value_type>;
59
60 const Scalar alpha;
61 WVector m_w;
62 DVector m_d;
63 BVector m_b;
64 AMatrix m_A;
65 XVector_colMap m_x_colMap;
66 XVector_domMap m_x_domMap;
67 const Scalar beta;
68
69 const LO rows_per_team;
70
72 const WVector& m_w_,
73 const DVector& m_d_,
74 const BVector& m_b_,
75 const AMatrix& m_A_,
78 const Scalar& beta_,
79 const int rows_per_team_)
80 : alpha(alpha_)
81 , m_w(m_w_)
82 , m_d(m_d_)
83 , m_b(m_b_)
84 , m_A(m_A_)
85 , m_x_colMap(m_x_colMap_)
86 , m_x_domMap(m_x_domMap_)
87 , beta(beta_)
88 , rows_per_team(rows_per_team_) {
89 const size_t numRows = m_A.numRows();
90 const size_t numCols = m_A.numCols();
91
92 TEUCHOS_ASSERT(m_w.extent(0) == m_d.extent(0));
93 TEUCHOS_ASSERT(m_w.extent(0) == m_b.extent(0));
94 TEUCHOS_ASSERT(numRows == size_t(m_w.extent(0)));
95 TEUCHOS_ASSERT(numCols <= size_t(m_x_colMap.extent(0)));
96 TEUCHOS_ASSERT(numRows <= size_t(m_x_domMap.extent(0)));
97 }
98
100 void operator()(const team_member& dev) const {
101 using residual_value_type = typename BVector::non_const_value_type;
102 using KAT = KokkosKernels::ArithTraits<residual_value_type>;
103
104 Kokkos::parallel_for(Kokkos::TeamThreadRange(dev, 0, rows_per_team),
105 [&](const LO& loop) {
106 const LO lclRow =
107 static_cast<LO>(dev.league_rank()) * rows_per_team + loop;
108 if (lclRow >= m_A.numRows()) {
109 return;
110 }
111 const KokkosSparse::SparseRowViewConst<AMatrix> A_row = m_A.rowConst(lclRow);
112 const LO row_length = static_cast<LO>(A_row.length);
113 residual_value_type A_x = KAT::zero();
114
115 Kokkos::parallel_reduce(
116 Kokkos::ThreadVectorRange(dev, row_length),
117 [&](const LO iEntry, residual_value_type& lsum) {
118 const auto A_val = A_row.value(iEntry);
119 lsum += A_val * m_x_colMap(A_row.colidx(iEntry));
120 },
121 A_x);
122
123 Kokkos::single(Kokkos::PerThread(dev),
124 [&]() {
125 const auto alpha_D_res =
126 alpha * m_d(lclRow) * (m_b(lclRow) - A_x);
127 if (use_beta) {
128 m_w(lclRow) = beta * m_w(lclRow) + alpha_D_res;
129 } else {
130 m_w(lclRow) = alpha_D_res;
131 }
132 if (do_X_update)
133 m_x_domMap(lclRow) += m_w(lclRow);
134 });
135 });
136 }
137};
138
139// W := alpha * D * (B - A*X) + beta * W.
140template <class WVector,
141 class DVector,
142 class BVector,
143 class AMatrix,
144 class XVector_colMap,
145 class XVector_domMap,
146 class Scalar>
147static void
148chebyshev_kernel_vector(const Scalar& alpha,
149 const WVector& w,
150 const DVector& d,
151 const BVector& b,
152 const AMatrix& A,
155 const Scalar& beta,
156 const bool do_X_update) {
157 using execution_space = typename AMatrix::execution_space;
158
159 if (A.numRows() == 0) {
160 return;
161 }
162
163 int team_size = -1;
164 int vector_length = -1;
166
167 const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(), rows_per_thread, team_size, vector_length);
168 int64_t worksets = (b.extent(0) + rows_per_team - 1) / rows_per_team;
169
170 using Kokkos::Dynamic;
171 using Kokkos::Schedule;
172 using Kokkos::Static;
173 using Kokkos::TeamPolicy;
176 const char kernel_label[] = "chebyshev_kernel_vector";
179 if (team_size < 0) {
180 policyDynamic = policy_type_dynamic(worksets, Kokkos::AUTO, vector_length);
181 policyStatic = policy_type_static(worksets, Kokkos::AUTO, vector_length);
182 } else {
183 policyDynamic = policy_type_dynamic(worksets, team_size, vector_length);
184 policyStatic = policy_type_static(worksets, team_size, vector_length);
185 }
186
187 // Canonicalize template arguments to avoid redundant instantiations.
188 using w_vec_type = typename WVector::non_const_type;
189 using d_vec_type = typename DVector::const_type;
190 using b_vec_type = typename BVector::const_type;
191 using matrix_type = AMatrix;
192 using x_colMap_vec_type = typename XVector_colMap::const_type;
193 using x_domMap_vec_type = typename XVector_domMap::non_const_type;
194 using scalar_type = typename KokkosKernels::ArithTraits<Scalar>::val_type;
195
196 if (beta == KokkosKernels::ArithTraits<Scalar>::zero()) {
197 constexpr bool use_beta = false;
198 if (do_X_update) {
199 using functor_type =
200 ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
201 b_vec_type, matrix_type,
202 x_colMap_vec_type, x_domMap_vec_type,
203 scalar_type,
204 use_beta,
205 true>;
206 functor_type func(alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
207 if (A.nnz() > 10000000)
208 Kokkos::parallel_for(kernel_label, policyDynamic, func);
209 else
210 Kokkos::parallel_for(kernel_label, policyStatic, func);
211 } else {
212 using functor_type =
213 ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
214 b_vec_type, matrix_type,
215 x_colMap_vec_type, x_domMap_vec_type,
216 scalar_type,
217 use_beta,
218 false>;
219 functor_type func(alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
220 if (A.nnz() > 10000000)
221 Kokkos::parallel_for(kernel_label, policyDynamic, func);
222 else
223 Kokkos::parallel_for(kernel_label, policyStatic, func);
224 }
225 } else {
226 constexpr bool use_beta = true;
227 if (do_X_update) {
228 using functor_type =
229 ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
230 b_vec_type, matrix_type,
231 x_colMap_vec_type, x_domMap_vec_type,
232 scalar_type,
233 use_beta,
234 true>;
235 functor_type func(alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
236 if (A.nnz() > 10000000)
237 Kokkos::parallel_for(kernel_label, policyDynamic, func);
238 else
239 Kokkos::parallel_for(kernel_label, policyStatic, func);
240 } else {
241 using functor_type =
242 ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
243 b_vec_type, matrix_type,
244 x_colMap_vec_type, x_domMap_vec_type,
245 scalar_type,
246 use_beta,
247 false>;
248 functor_type func(alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
249 if (A.nnz() > 10000000)
250 Kokkos::parallel_for(kernel_label, policyDynamic, func);
251 else
252 Kokkos::parallel_for(kernel_label, policyStatic, func);
253 }
254 }
255}
256
257} // namespace Impl
258
259template <class TpetraOperatorType>
260ChebyshevKernel<TpetraOperatorType>::
261 ChebyshevKernel(const Teuchos::RCP<const operator_type>& A,
262 const bool useNativeSpMV)
263 : useNativeSpMV_(useNativeSpMV) {
264 setMatrix(A);
265}
266
267template <class TpetraOperatorType>
268void ChebyshevKernel<TpetraOperatorType>::
269 setMatrix(const Teuchos::RCP<const operator_type>& A) {
270 if (A_op_.get() != A.get()) {
271 A_op_ = A;
272
273 // We'll (re)allocate these on demand.
274 V1_ = std::unique_ptr<multivector_type>(nullptr);
275
276 using Teuchos::rcp_dynamic_cast;
277 Teuchos::RCP<const crs_matrix_type> A_crs =
278 rcp_dynamic_cast<const crs_matrix_type>(A);
279 if (A_crs.is_null()) {
280 A_crs_ = Teuchos::null;
281 imp_ = Teuchos::null;
282 exp_ = Teuchos::null;
283 X_colMap_ = nullptr;
284 } else {
285 TEUCHOS_ASSERT(A_crs->isFillComplete());
286 A_crs_ = A_crs;
287 auto G = A_crs->getCrsGraph();
288 imp_ = G->getImporter();
289 exp_ = G->getExporter();
290 if (!imp_.is_null()) {
291 if (X_colMap_.get() == nullptr ||
292 !X_colMap_->getMap()->isSameAs(*(imp_->getTargetMap()))) {
293 X_colMap_ = std::unique_ptr<multivector_type>(new multivector_type(imp_->getTargetMap(), 1));
294 }
295 } else
296 X_colMap_ = nullptr;
297 }
298 }
299}
300
301template <class TpetraOperatorType>
302void ChebyshevKernel<TpetraOperatorType>::
303 setAuxiliaryVectors(size_t numVectors) {
304 if ((V1_.get() == nullptr) || V1_->getNumVectors() != numVectors) {
305 using MV = multivector_type;
306 V1_ = std::unique_ptr<MV>(new MV(A_op_->getRangeMap(), numVectors));
307 }
308}
309
310template <class TpetraOperatorType>
311void ChebyshevKernel<TpetraOperatorType>::
312 compute(multivector_type& W,
313 const SC& alpha,
314 vector_type& D_inv,
315 multivector_type& B,
316 multivector_type& X,
317 const SC& beta) {
318 using Teuchos::RCP;
319 using Teuchos::rcp;
320
321 if (canFuse(B)) {
322 TEUCHOS_ASSERT(!A_crs_.is_null());
323 fusedCase(W, alpha, D_inv, B, *A_crs_, X, beta);
324 } else {
325 TEUCHOS_ASSERT(!A_op_.is_null());
326 unfusedCase(W, alpha, D_inv, B, *A_op_, X, beta);
327 }
328}
329
330template <class TpetraOperatorType>
331typename ChebyshevKernel<TpetraOperatorType>::multivector_type&
332ChebyshevKernel<TpetraOperatorType>::
333 importVector(multivector_type& X_domMap) {
334 if (imp_.is_null()) {
335 return X_domMap;
336 } else {
337 X_colMap_->doImport(X_domMap, *imp_, Tpetra::REPLACE);
338 return *X_colMap_;
339 }
340}
341
342template <class TpetraOperatorType>
343bool ChebyshevKernel<TpetraOperatorType>::
344 canFuse(const multivector_type& B) const {
345 // If override is enabled
346 if (useNativeSpMV_)
347 return false;
348
349 // Some criteria must be met for fused kernel
350 return B.getNumVectors() == size_t(1) &&
351 !A_crs_.is_null() &&
352 exp_.is_null();
353}
354
355template <class TpetraOperatorType>
356void ChebyshevKernel<TpetraOperatorType>::
357 unfusedCase(multivector_type& W,
358 const SC& alpha,
359 vector_type& D_inv,
360 multivector_type& B,
361 const operator_type& A,
362 multivector_type& X,
363 const SC& beta) {
364 using STS = Teuchos::ScalarTraits<SC>;
365 setAuxiliaryVectors(B.getNumVectors());
366
367 const SC one = Teuchos::ScalarTraits<SC>::one();
368
369 // V1 = B - A*X
370 Tpetra::deep_copy(*V1_, B);
371 A.apply(X, *V1_, Teuchos::NO_TRANS, -one, one);
372
373 // W := alpha * D_inv * V1 + beta * W
374 W.elementWiseMultiply(alpha, D_inv, *V1_, beta);
375
376 // X := X + W
377 X.update(STS::one(), W, STS::one());
378}
379
380template <class TpetraOperatorType>
381void ChebyshevKernel<TpetraOperatorType>::
382 fusedCase(multivector_type& W,
383 const SC& alpha,
384 multivector_type& D_inv,
385 multivector_type& B,
386 const crs_matrix_type& A,
387 multivector_type& X,
388 const SC& beta) {
389 multivector_type& X_colMap = importVector(X);
390
391 using Impl::chebyshev_kernel_vector;
392 using STS = Teuchos::ScalarTraits<SC>;
393
394 auto A_lcl = A.getLocalMatrixDevice();
395 // D_inv, B, X and W are all Vectors, so it's safe to take the first column only
396 auto Dinv_lcl = Kokkos::subview(D_inv.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
397 auto B_lcl = Kokkos::subview(B.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
398 auto X_domMap_lcl = Kokkos::subview(X.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
399 auto X_colMap_lcl = Kokkos::subview(X_colMap.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
400
401 const bool do_X_update = !imp_.is_null();
402 if (beta == STS::zero()) {
403 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::OverwriteAll), Kokkos::ALL(), 0);
404 chebyshev_kernel_vector(alpha, W_lcl, Dinv_lcl,
405 B_lcl, A_lcl,
406 X_colMap_lcl, X_domMap_lcl,
407 beta,
408 do_X_update);
409 } else { // need to read _and_ write W if beta != 0
410 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
411 chebyshev_kernel_vector(alpha, W_lcl, Dinv_lcl,
412 B_lcl, A_lcl,
413 X_colMap_lcl, X_domMap_lcl,
414 beta,
415 do_X_update);
416 }
417 if (!do_X_update)
418 X.update(STS::one(), W, STS::one());
419}
420
421} // namespace Details
422} // namespace Ifpack2
423
424#define IFPACK2_DETAILS_CHEBYSHEVKERNEL_INSTANT(SC, LO, GO, NT) \
425 template class Ifpack2::Details::ChebyshevKernel<Tpetra::Operator<SC, LO, GO, NT> >;
426
427#endif // IFPACK2_DETAILS_CHEBYSHEVKERNEL_DEF_HPP
Ifpack2's implementation of Trilinos::Details::LinearSolver interface.
Definition Ifpack2_Details_LinearSolver_decl.hpp:75
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40
Functor for computing W := alpha * D * (B - A*X) + beta * W and X := X+W.
Definition Ifpack2_Details_ChebyshevKernel_def.hpp:41