Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_BlockComputeResidualVector_decl.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_BLOCKCOMPUTERES_VECTOR_DECL_HPP
11#define IFPACK2_BLOCKCOMPUTERES_VECTOR_DECL_HPP
12
13#include <KokkosBatched_Util.hpp>
14#include <KokkosBatched_Vector.hpp>
15#include <Tpetra_BlockMultiVector.hpp>
16#include <Tpetra_BlockCrsMatrix_decl.hpp>
17#include "Ifpack2_BlockHelper.hpp"
18#include "Ifpack2_BlockHelper_ETI.hpp"
19
20namespace Ifpack2::BlockHelperDetails {
21
22template <typename MatrixType,
23 typename ImplTagType = typename BlockTriDiContainerDetails::ImplTag<typename MatrixType::scalar_type>::type>
24struct ComputeResidualVector;
25
26template <typename MatrixType>
27struct ComputeResidualVector<MatrixType, BlockTriDiContainerDetails::ImplSimdTag> {
28 using impl_type = BlockHelperDetails::ImplType<MatrixType>;
29 using node_device_type = typename impl_type::node_device_type;
30 using execution_space = typename impl_type::execution_space;
31 using memory_space = typename impl_type::memory_space;
32
33 using local_ordinal_type = typename impl_type::local_ordinal_type;
34 using size_type = typename impl_type::size_type;
35 using impl_scalar_type = typename impl_type::impl_scalar_type;
36 using magnitude_type = typename impl_type::magnitude_type;
37 using btdm_scalar_type = typename impl_type::btdm_scalar_type;
38 using btdm_magnitude_type = typename impl_type::btdm_magnitude_type;
40 using local_ordinal_type_1d_view = typename impl_type::local_ordinal_type_1d_view;
41 using size_type_1d_view = typename impl_type::size_type_1d_view;
42 using tpetra_block_access_view_type = typename impl_type::tpetra_block_access_view_type; // block crs (layout right)
43 using impl_scalar_type_1d_view = typename impl_type::impl_scalar_type_1d_view;
44 using impl_scalar_type_2d_view_tpetra = typename impl_type::impl_scalar_type_2d_view_tpetra; // block multivector (layout left)
45 using vector_type_3d_view = typename impl_type::vector_type_3d_view;
46 using btdm_scalar_type_4d_view = typename impl_type::btdm_scalar_type_4d_view;
47 using i64_3d_view = typename impl_type::i64_3d_view;
48 static constexpr int vector_length = impl_type::vector_length;
49
51 using member_type = typename Kokkos::TeamPolicy<execution_space>::member_type;
52
53 // AmD information
54 const ConstUnmanaged<size_type_1d_view> rowptr, rowptr_remote;
55 const ConstUnmanaged<local_ordinal_type_1d_view> colindsub, colindsub_remote;
56 const ConstUnmanaged<impl_scalar_type_1d_view> tpetra_values;
57
58 // block crs graph information
59 // for cuda (kokkos crs graph uses a different size_type from size_t)
60 const ConstUnmanaged<Kokkos::View<size_t *, node_device_type>> A_block_rowptr;
61 const ConstUnmanaged<Kokkos::View<size_t *, node_device_type>> A_point_rowptr;
62 const ConstUnmanaged<Kokkos::View<local_ordinal_type *, node_device_type>> A_colind;
63
64 // blocksize
65 const local_ordinal_type blocksize_requested;
66
67 // part interface
68 const ConstUnmanaged<local_ordinal_type_1d_view> part2packrowidx0;
69 const ConstUnmanaged<local_ordinal_type_1d_view> part2rowidx0;
70 const ConstUnmanaged<local_ordinal_type_1d_view> rowidx2part;
71 const ConstUnmanaged<local_ordinal_type_1d_view> partptr;
72 const ConstUnmanaged<local_ordinal_type_1d_view> lclrow;
73 const ConstUnmanaged<local_ordinal_type_1d_view> dm2cm;
74
75 // block offsets
76 const ConstUnmanaged<i64_3d_view> A_x_offsets;
77 const ConstUnmanaged<i64_3d_view> A_x_offsets_remote;
78
79 const bool is_dm2cm_active;
80 const bool hasBlockCrsMatrix;
81
82 template <typename LocalCrsGraphType>
83 ComputeResidualVector(const AmD<MatrixType> &amd,
84 const LocalCrsGraphType &block_graph,
85 const LocalCrsGraphType &point_graph,
86 const local_ordinal_type &blocksize_requested_,
87 const PartInterface<MatrixType> &interf,
88 const local_ordinal_type_1d_view &dm2cm_,
89 bool hasBlockCrsMatrix_)
90 : rowptr(amd.rowptr)
91 , rowptr_remote(amd.rowptr_remote)
92 , colindsub(amd.A_colindsub)
93 , colindsub_remote(amd.A_colindsub_remote)
94 , tpetra_values(amd.tpetra_values)
95 , A_block_rowptr(block_graph.row_map)
96 , A_point_rowptr(point_graph.row_map)
97 , A_colind(block_graph.entries)
98 , blocksize_requested(blocksize_requested_)
99 , part2packrowidx0(interf.part2packrowidx0)
100 , part2rowidx0(interf.part2rowidx0)
101 , rowidx2part(interf.rowidx2part)
102 , partptr(interf.partptr)
103 , lclrow(interf.lclrow)
104 , dm2cm(dm2cm_)
105 , A_x_offsets(amd.A_x_offsets)
106 , A_x_offsets_remote(amd.A_x_offsets_remote)
107 , is_dm2cm_active(dm2cm_.span() > 0)
108 , hasBlockCrsMatrix(hasBlockCrsMatrix_) {}
109
110 // Precompute offsets of each A and x entry to speed up residual.
111 // (Applies for hasBlockCrsMatrix == true and OverlapTag/AsyncTag)
112 // Reading A, x take up to 4, 6 levels of indirection respectively,
113 // but precomputing the offsets reduces it to 2 for both.
114 //
115 // This function allocates and populates these members of AmD:
116 // A_x_offsets, A_x_offsets_remote
117 static void precompute_A_x_offsets(
118 AmD<MatrixType> &amd,
119 const PartInterface<MatrixType> &interf,
120 const Teuchos::RCP<const typename ImplType<MatrixType>::tpetra_crs_graph_type> &g,
121 const typename ImplType<MatrixType>::local_ordinal_type_1d_view &dm2cm,
122 int blocksize,
123 bool ownedRemoteSeparate);
124
125 // y = b - Rx; seq method
126 void run(const impl_scalar_type_2d_view_tpetra &y_,
127 const Const<impl_scalar_type_2d_view_tpetra> &b_,
128 const impl_scalar_type_2d_view_tpetra &x_);
129
130 // y = b - R (x , x_remote)
131 void run(const vector_type_3d_view &y_packed_,
132 const Const<impl_scalar_type_2d_view_tpetra> &b_,
133 const impl_scalar_type_2d_view_tpetra &x_,
134 const impl_scalar_type_2d_view_tpetra &x_remote_);
135
136 // y = b - R (y , y_remote)
137 void run(const vector_type_3d_view &y_packed_,
138 const Const<impl_scalar_type_2d_view_tpetra> &b_,
139 const impl_scalar_type_2d_view_tpetra &x_,
140 const impl_scalar_type_2d_view_tpetra &x_remote_,
141 const bool compute_owned);
142};
143
144template <typename MatrixType>
145struct ComputeResidualVector<MatrixType, BlockTriDiContainerDetails::ImplNotAvailTag> {
146 using impl_type = BlockHelperDetails::ImplType<MatrixType>;
147 using node_device_type = typename impl_type::node_device_type;
148 using execution_space = typename impl_type::execution_space;
149 using memory_space = typename impl_type::memory_space;
150
151 using local_ordinal_type = typename impl_type::local_ordinal_type;
152 using size_type = typename impl_type::size_type;
153 using impl_scalar_type = typename impl_type::impl_scalar_type;
154 using magnitude_type = typename impl_type::magnitude_type;
155 using btdm_scalar_type = typename impl_type::btdm_scalar_type;
156 using btdm_magnitude_type = typename impl_type::btdm_magnitude_type;
158 using local_ordinal_type_1d_view = typename impl_type::local_ordinal_type_1d_view;
159 using size_type_1d_view = typename impl_type::size_type_1d_view;
160 using tpetra_block_access_view_type = typename impl_type::tpetra_block_access_view_type; // block crs (layout right)
161 using impl_scalar_type_1d_view = typename impl_type::impl_scalar_type_1d_view;
162 using impl_scalar_type_2d_view_tpetra = typename impl_type::impl_scalar_type_2d_view_tpetra; // block multivector (layout left)
163 using vector_type_3d_view = typename impl_type::vector_type_3d_view;
164 using btdm_scalar_type_4d_view = typename impl_type::btdm_scalar_type_4d_view;
165 using i64_3d_view = typename impl_type::i64_3d_view;
166 static constexpr int vector_length = impl_type::vector_length;
167
169 using member_type = typename Kokkos::TeamPolicy<execution_space>::member_type;
170
171 // AmD information
172 const ConstUnmanaged<size_type_1d_view> rowptr, rowptr_remote;
173 const ConstUnmanaged<local_ordinal_type_1d_view> colindsub, colindsub_remote;
174 const ConstUnmanaged<impl_scalar_type_1d_view> tpetra_values;
175
176 // block crs graph information
177 // for cuda (kokkos crs graph uses a different size_type from size_t)
178 const ConstUnmanaged<Kokkos::View<size_t *, node_device_type>> A_block_rowptr;
179 const ConstUnmanaged<Kokkos::View<size_t *, node_device_type>> A_point_rowptr;
180 const ConstUnmanaged<Kokkos::View<local_ordinal_type *, node_device_type>> A_colind;
181
182 // blocksize
183 const local_ordinal_type blocksize_requested;
184
185 // part interface
186 const ConstUnmanaged<local_ordinal_type_1d_view> part2packrowidx0;
187 const ConstUnmanaged<local_ordinal_type_1d_view> part2rowidx0;
188 const ConstUnmanaged<local_ordinal_type_1d_view> rowidx2part;
189 const ConstUnmanaged<local_ordinal_type_1d_view> partptr;
190 const ConstUnmanaged<local_ordinal_type_1d_view> lclrow;
191 const ConstUnmanaged<local_ordinal_type_1d_view> dm2cm;
192
193 // block offsets
194 const ConstUnmanaged<i64_3d_view> A_x_offsets;
195 const ConstUnmanaged<i64_3d_view> A_x_offsets_remote;
196
197 const bool is_dm2cm_active;
198 const bool hasBlockCrsMatrix;
199
200 template <typename LocalCrsGraphType>
201 ComputeResidualVector(const AmD<MatrixType> &amd,
202 const LocalCrsGraphType &block_graph,
203 const LocalCrsGraphType &point_graph,
204 const local_ordinal_type &blocksize_requested_,
205 const PartInterface<MatrixType> &interf,
206 const local_ordinal_type_1d_view &dm2cm_,
207 bool hasBlockCrsMatrix_)
208 : rowptr(amd.rowptr)
209 , rowptr_remote(amd.rowptr_remote)
210 , colindsub(amd.A_colindsub)
211 , colindsub_remote(amd.A_colindsub_remote)
212 , tpetra_values(amd.tpetra_values)
213 , A_block_rowptr(block_graph.row_map)
214 , A_point_rowptr(point_graph.row_map)
215 , A_colind(block_graph.entries)
216 , blocksize_requested(blocksize_requested_)
217 , part2packrowidx0(interf.part2packrowidx0)
218 , part2rowidx0(interf.part2rowidx0)
219 , rowidx2part(interf.rowidx2part)
220 , partptr(interf.partptr)
221 , lclrow(interf.lclrow)
222 , dm2cm(dm2cm_)
223 , A_x_offsets(amd.A_x_offsets)
224 , A_x_offsets_remote(amd.A_x_offsets_remote)
225 , is_dm2cm_active(dm2cm_.span() > 0)
226 , hasBlockCrsMatrix(hasBlockCrsMatrix_) {
227 TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "Error: BlockTriDiContainer and related classes are not available for this scalar_type");
228 }
229
230 // Precompute offsets of each A and x entry to speed up residual.
231 // (Applies for hasBlockCrsMatrix == true and OverlapTag/AsyncTag)
232 // Reading A, x take up to 4, 6 levels of indirection respectively,
233 // but precomputing the offsets reduces it to 2 for both.
234 //
235 // This function allocates and populates these members of AmD:
236 // A_x_offsets, A_x_offsets_remote
237 static void precompute_A_x_offsets(
238 AmD<MatrixType> &amd,
239 const PartInterface<MatrixType> &interf,
240 const Teuchos::RCP<const typename ImplType<MatrixType>::tpetra_crs_graph_type> &g,
241 const typename ImplType<MatrixType>::local_ordinal_type_1d_view &dm2cm,
242 int blocksize,
243 bool ownedRemoteSeparate) {}
244
245 // y = b - Rx; seq method
246 void run(const impl_scalar_type_2d_view_tpetra &y_,
247 const Const<impl_scalar_type_2d_view_tpetra> &b_,
248 const impl_scalar_type_2d_view_tpetra &x_) {}
249
250 // y = b - R (x , x_remote)
251 void run(const vector_type_3d_view &y_packed_,
252 const Const<impl_scalar_type_2d_view_tpetra> &b_,
253 const impl_scalar_type_2d_view_tpetra &x_,
254 const impl_scalar_type_2d_view_tpetra &x_remote_) {}
255
256 // y = b - R (y , y_remote)
257 void run(const vector_type_3d_view &y_packed_,
258 const Const<impl_scalar_type_2d_view_tpetra> &b_,
259 const impl_scalar_type_2d_view_tpetra &x_,
260 const impl_scalar_type_2d_view_tpetra &x_remote_,
261 const bool compute_owned) {}
262};
263
264} // namespace Ifpack2::BlockHelperDetails
265
266#endif