Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Details_GaussSeidel.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_GAUSS_SEIDEL_HPP
11#define IFPACK2_GAUSS_SEIDEL_HPP
12
13namespace Ifpack2 {
14namespace Details {
15template <typename Scalar, typename LO, typename GO, typename NT>
16struct GaussSeidel {
17 using crs_matrix_type = Tpetra::CrsMatrix<Scalar, LO, GO, NT>;
18 using bcrs_matrix_type = Tpetra::BlockCrsMatrix<Scalar, LO, GO, NT>;
19 using row_matrix_type = Tpetra::RowMatrix<Scalar, LO, GO, NT>;
20 using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
21 using vector_type = Tpetra::Vector<Scalar, LO, GO, NT>;
22 using multivector_type = Tpetra::MultiVector<Scalar, LO, GO, NT>;
23 using block_multivector_type = Tpetra::BlockMultiVector<Scalar, LO, GO, NT>;
24 using mem_space_t = typename local_matrix_device_type::memory_space;
25 using rowmap_t = typename local_matrix_device_type::row_map_type::host_mirror_type;
26 using entries_t = typename local_matrix_device_type::index_type::host_mirror_type;
27 using values_t = typename local_matrix_device_type::values_type::host_mirror_type;
28 using const_rowmap_t = typename rowmap_t::const_type;
29 using const_entries_t = typename entries_t::const_type;
30 using const_values_t = typename values_t::const_type;
31 using Offset = typename rowmap_t::non_const_value_type;
32 using IST = typename crs_matrix_type::impl_scalar_type;
33#if KOKKOS_VERSION >= 40799
34 using KAT = KokkosKernels::ArithTraits<IST>;
35#else
36 using KAT = Kokkos::ArithTraits<IST>;
37#endif
38 // Type of view representing inverse diagonal blocks, and its host_mirror_type.
39 using InverseBlocks = Kokkos::View<IST***, typename bcrs_matrix_type::device_type>;
40 using InverseBlocksHost = typename InverseBlocks::host_mirror_type;
41
42 typedef typename crs_matrix_type::nonconst_global_inds_host_view_type nonconst_global_inds_host_view_type;
43 typedef typename crs_matrix_type::nonconst_local_inds_host_view_type nonconst_local_inds_host_view_type;
44 typedef typename crs_matrix_type::nonconst_values_host_view_type nonconst_values_host_view_type;
45
46 // Setup for CrsMatrix
47 GaussSeidel(Teuchos::RCP<const crs_matrix_type> A_, Teuchos::RCP<vector_type>& inverseDiagVec_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_) {
48 A = A_;
49 numRows = A_->getLocalNumRows();
50 haveRowMatrix = false;
51 inverseDiagVec = inverseDiagVec_;
52 applyRows = applyRows_;
53 blockSize = 1;
54 omega = omega_;
55 }
56
57 GaussSeidel(Teuchos::RCP<const row_matrix_type> A_, Teuchos::RCP<vector_type>& inverseDiagVec_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_) {
58 A = A_;
59 numRows = A_->getLocalNumRows();
60 haveRowMatrix = true;
61 inverseDiagVec = inverseDiagVec_;
62 applyRows = applyRows_;
63 blockSize = 1;
64 omega = omega_;
65 // Here, need to make a deep CRS copy to avoid slow access via getLocalRowCopy
66 rowMatrixRowmap = rowmap_t(Kokkos::ViewAllocateWithoutInitializing("Arowmap"), numRows + 1);
67 rowMatrixEntries = entries_t(Kokkos::ViewAllocateWithoutInitializing("Aentries"), A_->getLocalNumEntries());
68 rowMatrixValues = values_t(Kokkos::ViewAllocateWithoutInitializing("Avalues"), A_->getLocalNumEntries());
69 size_t maxDegree = A_->getLocalMaxNumRowEntries();
70 nonconst_values_host_view_type rowValues("rowValues", maxDegree);
71 nonconst_local_inds_host_view_type rowEntries("rowEntries", maxDegree);
72 size_t accum = 0;
73 for (LO i = 0; i <= numRows; i++) {
74 rowMatrixRowmap(i) = accum;
75 if (i == numRows)
76 break;
77 size_t degree;
78 A_->getLocalRowCopy(i, rowEntries, rowValues, degree);
79 accum += degree;
80 size_t rowBegin = rowMatrixRowmap(i);
81 for (size_t j = 0; j < degree; j++) {
82 rowMatrixEntries(rowBegin + j) = rowEntries[j];
83 rowMatrixValues(rowBegin + j) = rowValues[j];
84 }
85 }
86 }
87
88 GaussSeidel(Teuchos::RCP<const bcrs_matrix_type> A_, const InverseBlocks& inverseBlockDiag_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_) {
89 A = A_;
90 numRows = A_->getLocalNumRows();
91 haveRowMatrix = false;
92 // note: next 2 lines are no-ops if inverseBlockDiag_ is already host-accessible
93 inverseBlockDiag = Kokkos::create_mirror_view(inverseBlockDiag_);
94 Kokkos::deep_copy(inverseBlockDiag, inverseBlockDiag_);
95 applyRows = applyRows_;
96 omega = omega_;
97 blockSize = A_->getBlockSize();
98 }
99
100 template <bool useApplyRows, bool multipleRHS, bool omegaNotOne>
101 void applyImpl(const const_values_t& Avalues, const const_rowmap_t& Arowmap, const const_entries_t& Aentries, multivector_type& x, const multivector_type& b, Tpetra::ESweepDirection direction) {
102 // note: direction is either Forward or Backward (Symmetric is handled in apply())
103 LO numApplyRows = useApplyRows ? (LO)applyRows.size() : numRows;
104 // note: inverseDiagMV always has only one column
105 auto inverseDiag = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
106 bool forward = direction == Tpetra::Forward;
107 if (multipleRHS) {
108 LO numVecs = x.getNumVectors();
109 Teuchos::Array<IST> accum(numVecs);
110 auto xlcl = x.get2dViewNonConst();
111 auto blcl = b.get2dView();
112 for (LO i = 0; i < numApplyRows; i++) {
113 LO row;
114 if (forward)
115 row = useApplyRows ? applyRows[i] : i;
116 else
117 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
118 for (LO k = 0; k < numVecs; k++) {
119 accum[k] = KAT::zero();
120 }
121 Offset rowBegin = Arowmap(row);
122 Offset rowEnd = Arowmap(row + 1);
123 for (Offset j = rowBegin; j < rowEnd; j++) {
124 LO col = Aentries(j);
125 IST val = Avalues(j);
126 for (LO k = 0; k < numVecs; k++) {
127 accum[k] += val * IST(xlcl[k][col]);
128 }
129 }
130 // Update x
131 IST dinv = inverseDiag(row);
132 for (LO k = 0; k < numVecs; k++) {
133 if (omegaNotOne)
134 xlcl[k][row] += Scalar(omega * dinv * (IST(blcl[k][row]) - accum[k]));
135 else
136 xlcl[k][row] += Scalar(dinv * (IST(blcl[k][row]) - accum[k]));
137 }
138 }
139 } else {
140 auto xlcl = Kokkos::subview(x.getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
141 auto blcl = Kokkos::subview(b.getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
142 auto dlcl = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
143 for (LO i = 0; i < numApplyRows; i++) {
144 LO row;
145 if (forward)
146 row = useApplyRows ? applyRows[i] : i;
147 else
148 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
149 IST accum = KAT::zero();
150 Offset rowBegin = Arowmap(row);
151 Offset rowEnd = Arowmap(row + 1);
152 for (Offset j = rowBegin; j < rowEnd; j++) {
153 accum += Avalues(j) * xlcl(Aentries(j));
154 }
155 // Update x
156 IST dinv = dlcl(row);
157 if (omegaNotOne)
158 xlcl(row) += omega * dinv * (blcl(row) - accum);
159 else
160 xlcl(row) += dinv * (blcl(row) - accum);
161 }
162 }
163 }
164
165 void applyBlock(block_multivector_type& x, const block_multivector_type& b, Tpetra::ESweepDirection direction) {
166 if (direction == Tpetra::Symmetric) {
167 applyBlock(x, b, Tpetra::Forward);
168 applyBlock(x, b, Tpetra::Backward);
169 return;
170 }
171 auto Abcrs = Teuchos::rcp_dynamic_cast<const bcrs_matrix_type>(A);
172 if (Abcrs.is_null())
173 throw std::runtime_error("Ifpack2::Details::GaussSeidel::applyBlock: A must be a BlockCrsMatrix");
174 auto Avalues = Abcrs->getValuesHost();
175 auto AlclGraph = Abcrs->getCrsGraph().getLocalGraphHost();
176 auto Arowmap = AlclGraph.row_map;
177 auto Aentries = AlclGraph.entries;
178 // Number of scalars in Avalues per block entry.
179 Offset bs2 = blockSize * blockSize;
180 LO numVecs = x.getNumVectors();
181 Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> accum(
182 Kokkos::ViewAllocateWithoutInitializing("BlockGaussSeidel Accumulator"), blockSize, numVecs);
183 Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> dinv_accum(
184 Kokkos::ViewAllocateWithoutInitializing("BlockGaussSeidel A_ii^-1*Accumulator"), blockSize, numVecs);
185 bool useApplyRows = !applyRows.is_null();
186 bool forward = direction == Tpetra::Forward;
187 LO numApplyRows = useApplyRows ? applyRows.size() : numRows;
188 for (LO i = 0; i < numApplyRows; i++) {
189 LO row;
190 if (forward)
191 row = useApplyRows ? applyRows[i] : i;
192 else
193 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
194 for (LO v = 0; v < numVecs; v++) {
195 auto bRow = b.getLocalBlockHost(row, v, Tpetra::Access::ReadOnly);
196 for (LO k = 0; k < blockSize; k++) {
197 accum(k, v) = KAT::zero();
198 }
199 }
200 Offset rowBegin = Arowmap(row);
201 Offset rowEnd = Arowmap(row + 1);
202 for (Offset j = rowBegin; j < rowEnd; j++) {
203 LO col = Aentries(j);
204 const IST* blk = &Avalues(j * bs2);
205 for (LO v = 0; v < numVecs; v++) {
206 auto xCol = x.getLocalBlockHost(col, v, Tpetra::Access::ReadOnly);
207 for (LO br = 0; br < blockSize; br++) {
208 for (LO bc = 0; bc < blockSize; bc++) {
209 IST Aval = blk[br * blockSize + bc];
210 accum(br, v) += Aval * xCol(bc);
211 }
212 }
213 }
214 }
215 // Update x: term is omega * Aii^-1 * accum, where omega is scalar, Aii^-1 is bs*bs, and accum is bs*nv
216 auto invBlock = Kokkos::subview(inverseBlockDiag, row, Kokkos::ALL(), Kokkos::ALL());
217 Kokkos::deep_copy(dinv_accum, KAT::zero());
218 for (LO v = 0; v < numVecs; v++) {
219 auto bRow = b.getLocalBlockHost(row, v, Tpetra::Access::ReadOnly);
220 for (LO br = 0; br < blockSize; br++) {
221 accum(br, v) = bRow(br) - accum(br, v);
222 }
223 }
224 for (LO v = 0; v < numVecs; v++) {
225 for (LO br = 0; br < blockSize; br++) {
226 for (LO bc = 0; bc < blockSize; bc++) {
227 dinv_accum(br, v) += invBlock(br, bc) * accum(bc, v);
228 }
229 }
230 }
231 // Update x
232 for (LO v = 0; v < numVecs; v++) {
233 auto xRow = x.getLocalBlockHost(row, v, Tpetra::Access::ReadWrite);
234 for (LO k = 0; k < blockSize; k++) {
235 xRow(k) += omega * dinv_accum(k, v);
236 }
237 }
238 }
239 }
240
241 // Version of apply for CrsMatrix/RowMatrix (for BlockCrsMatrix, call applyBlock)
242 void apply(multivector_type& x, const multivector_type& b, Tpetra::ESweepDirection direction) {
243 if (direction == Tpetra::Symmetric) {
244 apply(x, b, Tpetra::Forward);
245 apply(x, b, Tpetra::Backward);
246 return;
247 }
248 const_values_t Avalues;
249 const_rowmap_t Arowmap;
250 const_entries_t Aentries;
251 if (haveRowMatrix) {
252 Avalues = rowMatrixValues;
253 Arowmap = rowMatrixRowmap;
254 Aentries = rowMatrixEntries;
255 } else {
256 auto Acrs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
257 if (Acrs.is_null())
258 throw std::runtime_error("Ifpack2::Details::GaussSeidel::apply: either haveRowMatrix, or A is CrsMatrix");
259 auto Alcl = Acrs->getLocalMatrixHost();
260 Avalues = Alcl.values;
261 Arowmap = Alcl.graph.row_map;
262 Aentries = Alcl.graph.entries;
263 }
264 if (applyRows.is_null()) {
265 if (x.getNumVectors() > 1)
266 this->template applyImpl<false, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
267 else {
268 // Optimize for the all-rows, single vector, omega = 1 case
269 if (omega == KAT::one())
270 this->template applyImpl<false, false, false>(Avalues, Arowmap, Aentries, x, b, direction);
271 else
272 this->template applyImpl<false, false, true>(Avalues, Arowmap, Aentries, x, b, direction);
273 }
274 } else {
275 this->template applyImpl<true, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
276 }
277 }
278
279 Teuchos::RCP<const row_matrix_type> A;
280 // For efficiency, if input is a RowMatrix, keep a persistent copy of the CRS formatted local matrix.
281 bool haveRowMatrix;
282 values_t rowMatrixValues;
283 rowmap_t rowMatrixRowmap;
284 entries_t rowMatrixEntries;
285 LO numRows;
286 IST omega;
287 // If set up with BlockCrsMatrix, the block size. Otherwise 1.
288 LO blockSize;
289 // If using a non-block matrix, the inverse diagonal.
290 Teuchos::RCP<vector_type> inverseDiagVec;
291 // If using a block matrix, the inverses of all diagonal blocks.
292 InverseBlocksHost inverseBlockDiag;
293 // If null, apply over all rows in natural order. Otherwise, apply for each row listed in order.
294 Teuchos::ArrayRCP<LO> applyRows;
295};
296} // namespace Details
297} // namespace Ifpack2
298
299#endif
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40