10#ifndef IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
11#define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
13#include "Tpetra_CrsMatrix.hpp"
14#include "Tpetra_MultiVector.hpp"
15#include "Tpetra_Operator.hpp"
16#include "Tpetra_Vector.hpp"
17#include "Tpetra_Export_decl.hpp"
18#include "Tpetra_Import_decl.hpp"
19#include "KokkosKernels_ArithTraits.hpp"
20#include "Teuchos_Assert.hpp"
22#include "KokkosSparse_spmv_impl.hpp"
32template <
class WVector,
40 static_assert(
static_cast<int>(WVector::rank) == 1,
41 "WVector must be a rank 1 View.");
42 static_assert(
static_cast<int>(DVector::rank) == 1,
43 "DVector must be a rank 1 View.");
44 static_assert(
static_cast<int>(BVector::rank) == 1,
45 "BVector must be a rank 1 View.");
46 static_assert(
static_cast<int>(XVector::rank) == 1,
47 "XVector must be a rank 1 View.");
49 using execution_space =
typename AMatrix::execution_space;
50 using LO =
typename AMatrix::non_const_ordinal_type;
51 using value_type =
typename AMatrix::non_const_value_type;
52 using team_policy =
typename Kokkos::TeamPolicy<execution_space>;
53 using team_member =
typename team_policy::member_type;
54 using ATV = KokkosKernels::ArithTraits<value_type>;
64 const LO rows_per_team;
82 const size_t numRows = m_A.numRows();
83 const size_t numCols = m_A.numCols();
92 void operator()(
const team_member&
dev)
const {
94 using KAT = KokkosKernels::ArithTraits<residual_value_type>;
96 Kokkos::parallel_for(Kokkos::TeamThreadRange(
dev, 0, rows_per_team),
99 static_cast<LO
>(
dev.league_rank()) * rows_per_team +
loop;
100 if (
lclRow >= m_A.numRows()) {
107 Kokkos::parallel_reduce(
115 Kokkos::single(Kokkos::PerThread(
dev),
137scaled_damped_residual_vector(
const Scalar& alpha,
143 const Scalar& beta) {
144 using execution_space =
typename AMatrix::execution_space;
146 if (A.numRows() == 0) {
151 int vector_length = -1;
154 const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(),
rows_per_thread, team_size, vector_length);
157 using Kokkos::Dynamic;
158 using Kokkos::Schedule;
159 using Kokkos::Static;
160 using Kokkos::TeamPolicy;
163 const char kernel_label[] =
"scaled_damped_residual_vector";
170 policyDynamic = policy_type_dynamic(worksets, team_size, vector_length);
171 policyStatic = policy_type_static(worksets, team_size, vector_length);
175 using w_vec_type =
typename WVector::non_const_type;
176 using d_vec_type =
typename DVector::const_type;
177 using b_vec_type =
typename BVector::const_type;
178 using matrix_type = AMatrix;
179 using x_vec_type =
typename XVector::const_type;
180 using scalar_type =
typename KokkosKernels::ArithTraits<Scalar>::val_type;
182 if (beta == KokkosKernels::ArithTraits<Scalar>::zero()) {
183 constexpr bool use_beta =
false;
185 ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
186 b_vec_type, matrix_type,
187 x_vec_type, scalar_type,
189 functor_type func(alpha, w, d, b, A, x, beta, rows_per_team);
190 if (A.nnz() > 10000000)
191 Kokkos::parallel_for(kernel_label, policyDynamic, func);
193 Kokkos::parallel_for(kernel_label, policyStatic, func);
195 constexpr bool use_beta =
true;
197 ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
198 b_vec_type, matrix_type,
199 x_vec_type, scalar_type,
201 functor_type func(alpha, w, d, b, A, x, beta, rows_per_team);
202 if (A.nnz() > 10000000)
203 Kokkos::parallel_for(kernel_label, policyDynamic, func);
205 Kokkos::parallel_for(kernel_label, policyStatic, func);
211template <
class TpetraOperatorType>
212ScaledDampedResidual<TpetraOperatorType>::
213 ScaledDampedResidual(
const Teuchos::RCP<const operator_type>& A) {
217template <
class TpetraOperatorType>
218void ScaledDampedResidual<TpetraOperatorType>::
219 setMatrix(
const Teuchos::RCP<const operator_type>& A) {
220 if (A_op_.get() != A.get()) {
224 X_colMap_ = std::unique_ptr<vector_type>(
nullptr);
225 V1_ = std::unique_ptr<multivector_type>(
nullptr);
227 using Teuchos::rcp_dynamic_cast;
228 Teuchos::RCP<const crs_matrix_type> A_crs =
229 rcp_dynamic_cast<const crs_matrix_type>(A);
230 if (A_crs.is_null()) {
231 A_crs_ = Teuchos::null;
232 imp_ = Teuchos::null;
233 exp_ = Teuchos::null;
235 TEUCHOS_ASSERT(A_crs->isFillComplete());
237 auto G = A_crs->getCrsGraph();
238 imp_ = G->getImporter();
239 exp_ = G->getExporter();
244template <
class TpetraOperatorType>
245void ScaledDampedResidual<TpetraOperatorType>::
246 compute(multivector_type& W,
257 W_vec_ = W.getVectorNonConst(0);
258 B_vec_ = B.getVectorNonConst(0);
259 X_vec_ = X.getVectorNonConst(0);
260 TEUCHOS_ASSERT(!A_crs_.is_null());
261 fusedCase(*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
263 TEUCHOS_ASSERT(!A_op_.is_null());
264 unfusedCase(W, alpha, D_inv, B, *A_op_, X, beta);
268template <
class TpetraOperatorType>
269typename ScaledDampedResidual<TpetraOperatorType>::vector_type&
270ScaledDampedResidual<TpetraOperatorType>::
271 importVector(vector_type& X_domMap) {
272 if (imp_.is_null()) {
275 if (X_colMap_.get() ==
nullptr) {
276 using V = vector_type;
277 X_colMap_ = std::unique_ptr<V>(
new V(imp_->getTargetMap()));
279 X_colMap_->doImport(X_domMap, *imp_, Tpetra::REPLACE);
284template <
class TpetraOperatorType>
285bool ScaledDampedResidual<TpetraOperatorType>::
286 canFuse(
const multivector_type& B)
const {
287 return B.getNumVectors() == size_t(1) &&
292template <
class TpetraOperatorType>
293void ScaledDampedResidual<TpetraOperatorType>::
294 unfusedCase(multivector_type& W,
298 const operator_type& A,
301 if (V1_.get() ==
nullptr) {
302 using MV = multivector_type;
303 const size_t numVecs = B.getNumVectors();
304 V1_ = std::unique_ptr<MV>(
new MV(B.getMap(), numVecs));
306 const SC one = Teuchos::ScalarTraits<SC>::one();
309 Tpetra::deep_copy(*V1_, B);
310 A.apply(X, *V1_, Teuchos::NO_TRANS, -one, one);
313 W.elementWiseMultiply(alpha, D_inv, *V1_, beta);
316template <
class TpetraOperatorType>
317void ScaledDampedResidual<TpetraOperatorType>::
318 fusedCase(vector_type& W,
322 const crs_matrix_type& A,
325 vector_type& X_colMap = importVector(X);
327 using Impl::scaled_damped_residual_vector;
328 using STS = Teuchos::ScalarTraits<SC>;
330 auto A_lcl = A.getLocalMatrixDevice();
331 auto Dinv_lcl = Kokkos::subview(D_inv.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
332 auto B_lcl = Kokkos::subview(B.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
333 auto X_lcl = Kokkos::subview(X_colMap.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
334 if (beta == STS::zero()) {
335 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::OverwriteAll), Kokkos::ALL(), 0);
336 scaled_damped_residual_vector(alpha, W_lcl, Dinv_lcl,
337 B_lcl, A_lcl, X_lcl, beta);
339 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
340 scaled_damped_residual_vector(alpha, W_lcl, Dinv_lcl,
341 B_lcl, A_lcl, X_lcl, beta);
348#define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_INSTANT(SC, LO, GO, NT) \
349 template class Ifpack2::Details::ScaledDampedResidual<Tpetra::Operator<SC, LO, GO, NT> >;
Ifpack2's implementation of Trilinos::Details::LinearSolver interface.
Definition Ifpack2_Details_LinearSolver_decl.hpp:75
void setMatrix(const Teuchos::RCP< const OP > &A)
Set the solver's matrix.
Definition Ifpack2_Details_LinearSolver_def.hpp:53
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40
Functor for computing W := alpha * D * (B - A*X) + beta * W.
Definition Ifpack2_Details_ScaledDampedResidual_def.hpp:39