Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Details_GaussSeidel.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_GAUSS_SEIDEL_HPP
11#define IFPACK2_GAUSS_SEIDEL_HPP
12
13namespace Ifpack2 {
14namespace Details {
15template <typename Scalar, typename LO, typename GO, typename NT>
16struct GaussSeidel {
17 using crs_matrix_type = Tpetra::CrsMatrix<Scalar, LO, GO, NT>;
18 using bcrs_matrix_type = Tpetra::BlockCrsMatrix<Scalar, LO, GO, NT>;
19 using row_matrix_type = Tpetra::RowMatrix<Scalar, LO, GO, NT>;
20 using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
21 using vector_type = Tpetra::Vector<Scalar, LO, GO, NT>;
22 using multivector_type = Tpetra::MultiVector<Scalar, LO, GO, NT>;
23 using block_multivector_type = Tpetra::BlockMultiVector<Scalar, LO, GO, NT>;
24 using mem_space_t = typename local_matrix_device_type::memory_space;
25 using rowmap_t = typename local_matrix_device_type::row_map_type::host_mirror_type;
26 using entries_t = typename local_matrix_device_type::index_type::host_mirror_type;
27 using values_t = typename local_matrix_device_type::values_type::host_mirror_type;
28 using const_rowmap_t = typename rowmap_t::const_type;
29 using const_entries_t = typename entries_t::const_type;
30 using const_values_t = typename values_t::const_type;
31 using Offset = typename rowmap_t::non_const_value_type;
32 using IST = typename crs_matrix_type::impl_scalar_type;
33 using KAT = KokkosKernels::ArithTraits<IST>;
34 // Type of view representing inverse diagonal blocks, and its host_mirror_type.
35 using InverseBlocks = Kokkos::View<IST***, typename bcrs_matrix_type::device_type>;
36 using InverseBlocksHost = typename InverseBlocks::host_mirror_type;
37
38 typedef typename crs_matrix_type::nonconst_global_inds_host_view_type nonconst_global_inds_host_view_type;
39 typedef typename crs_matrix_type::nonconst_local_inds_host_view_type nonconst_local_inds_host_view_type;
40 typedef typename crs_matrix_type::nonconst_values_host_view_type nonconst_values_host_view_type;
41
42 // Setup for CrsMatrix
43 GaussSeidel(Teuchos::RCP<const crs_matrix_type> A_, Teuchos::RCP<vector_type>& inverseDiagVec_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_) {
44 A = A_;
45 numRows = A_->getLocalNumRows();
46 haveRowMatrix = false;
47 inverseDiagVec = inverseDiagVec_;
48 applyRows = applyRows_;
49 blockSize = 1;
50 omega = omega_;
51 }
52
53 GaussSeidel(Teuchos::RCP<const row_matrix_type> A_, Teuchos::RCP<vector_type>& inverseDiagVec_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_) {
54 A = A_;
55 numRows = A_->getLocalNumRows();
56 haveRowMatrix = true;
57 inverseDiagVec = inverseDiagVec_;
58 applyRows = applyRows_;
59 blockSize = 1;
60 omega = omega_;
61 // Here, need to make a deep CRS copy to avoid slow access via getLocalRowCopy
62 rowMatrixRowmap = rowmap_t(Kokkos::ViewAllocateWithoutInitializing("Arowmap"), numRows + 1);
63 rowMatrixEntries = entries_t(Kokkos::ViewAllocateWithoutInitializing("Aentries"), A_->getLocalNumEntries());
64 rowMatrixValues = values_t(Kokkos::ViewAllocateWithoutInitializing("Avalues"), A_->getLocalNumEntries());
65 size_t maxDegree = A_->getLocalMaxNumRowEntries();
66 nonconst_values_host_view_type rowValues("rowValues", maxDegree);
67 nonconst_local_inds_host_view_type rowEntries("rowEntries", maxDegree);
68 size_t accum = 0;
69 for (LO i = 0; i <= numRows; i++) {
70 rowMatrixRowmap(i) = accum;
71 if (i == numRows)
72 break;
73 size_t degree;
74 A_->getLocalRowCopy(i, rowEntries, rowValues, degree);
75 accum += degree;
76 size_t rowBegin = rowMatrixRowmap(i);
77 for (size_t j = 0; j < degree; j++) {
78 rowMatrixEntries(rowBegin + j) = rowEntries[j];
79 rowMatrixValues(rowBegin + j) = rowValues[j];
80 }
81 }
82 }
83
84 GaussSeidel(Teuchos::RCP<const bcrs_matrix_type> A_, const InverseBlocks& inverseBlockDiag_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_) {
85 A = A_;
86 numRows = A_->getLocalNumRows();
87 haveRowMatrix = false;
88 // note: next 2 lines are no-ops if inverseBlockDiag_ is already host-accessible
89 inverseBlockDiag = Kokkos::create_mirror_view(inverseBlockDiag_);
90 Kokkos::deep_copy(inverseBlockDiag, inverseBlockDiag_);
91 applyRows = applyRows_;
92 omega = omega_;
93 blockSize = A_->getBlockSize();
94 }
95
96 template <bool useApplyRows, bool multipleRHS, bool omegaNotOne>
97 void applyImpl(const const_values_t& Avalues, const const_rowmap_t& Arowmap, const const_entries_t& Aentries, multivector_type& x, const multivector_type& b, Tpetra::ESweepDirection direction) {
98 // note: direction is either Forward or Backward (Symmetric is handled in apply())
99 LO numApplyRows = useApplyRows ? (LO)applyRows.size() : numRows;
100 // note: inverseDiagMV always has only one column
101 auto inverseDiag = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
102 bool forward = direction == Tpetra::Forward;
103 if (multipleRHS) {
104 LO numVecs = x.getNumVectors();
105 Teuchos::Array<IST> accum(numVecs);
106 auto xlcl = x.get2dViewNonConst();
107 auto blcl = b.get2dView();
108 for (LO i = 0; i < numApplyRows; i++) {
109 LO row;
110 if (forward)
111 row = useApplyRows ? applyRows[i] : i;
112 else
113 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
114 for (LO k = 0; k < numVecs; k++) {
115 accum[k] = KAT::zero();
116 }
117 Offset rowBegin = Arowmap(row);
118 Offset rowEnd = Arowmap(row + 1);
119 for (Offset j = rowBegin; j < rowEnd; j++) {
120 LO col = Aentries(j);
121 IST val = Avalues(j);
122 for (LO k = 0; k < numVecs; k++) {
123 accum[k] += val * IST(xlcl[k][col]);
124 }
125 }
126 // Update x
127 IST dinv = inverseDiag(row);
128 for (LO k = 0; k < numVecs; k++) {
129 if (omegaNotOne)
130 xlcl[k][row] += Scalar(omega * dinv * (IST(blcl[k][row]) - accum[k]));
131 else
132 xlcl[k][row] += Scalar(dinv * (IST(blcl[k][row]) - accum[k]));
133 }
134 }
135 } else {
136 auto xlcl = Kokkos::subview(x.getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
137 auto blcl = Kokkos::subview(b.getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
138 auto dlcl = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
139 for (LO i = 0; i < numApplyRows; i++) {
140 LO row;
141 if (forward)
142 row = useApplyRows ? applyRows[i] : i;
143 else
144 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
145 IST accum = KAT::zero();
146 Offset rowBegin = Arowmap(row);
147 Offset rowEnd = Arowmap(row + 1);
148 for (Offset j = rowBegin; j < rowEnd; j++) {
149 accum += Avalues(j) * xlcl(Aentries(j));
150 }
151 // Update x
152 IST dinv = dlcl(row);
153 if (omegaNotOne)
154 xlcl(row) += omega * dinv * (blcl(row) - accum);
155 else
156 xlcl(row) += dinv * (blcl(row) - accum);
157 }
158 }
159 }
160
161 void applyBlock(block_multivector_type& x, const block_multivector_type& b, Tpetra::ESweepDirection direction) {
162 if (direction == Tpetra::Symmetric) {
163 applyBlock(x, b, Tpetra::Forward);
164 applyBlock(x, b, Tpetra::Backward);
165 return;
166 }
167 auto Abcrs = Teuchos::rcp_dynamic_cast<const bcrs_matrix_type>(A);
168 if (Abcrs.is_null())
169 throw std::runtime_error("Ifpack2::Details::GaussSeidel::applyBlock: A must be a BlockCrsMatrix");
170 auto Avalues = Abcrs->getValuesHost();
171 auto AlclGraph = Abcrs->getCrsGraph().getLocalGraphHost();
172 auto Arowmap = AlclGraph.row_map;
173 auto Aentries = AlclGraph.entries;
174 // Number of scalars in Avalues per block entry.
175 Offset bs2 = blockSize * blockSize;
176 LO numVecs = x.getNumVectors();
177 Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> accum(
178 Kokkos::ViewAllocateWithoutInitializing("BlockGaussSeidel Accumulator"), blockSize, numVecs);
179 Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> dinv_accum(
180 Kokkos::ViewAllocateWithoutInitializing("BlockGaussSeidel A_ii^-1*Accumulator"), blockSize, numVecs);
181 bool useApplyRows = !applyRows.is_null();
182 bool forward = direction == Tpetra::Forward;
183 LO numApplyRows = useApplyRows ? applyRows.size() : numRows;
184 for (LO i = 0; i < numApplyRows; i++) {
185 LO row;
186 if (forward)
187 row = useApplyRows ? applyRows[i] : i;
188 else
189 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
190 for (LO v = 0; v < numVecs; v++) {
191 auto bRow = b.getLocalBlockHost(row, v, Tpetra::Access::ReadOnly);
192 for (LO k = 0; k < blockSize; k++) {
193 accum(k, v) = KAT::zero();
194 }
195 }
196 Offset rowBegin = Arowmap(row);
197 Offset rowEnd = Arowmap(row + 1);
198 for (Offset j = rowBegin; j < rowEnd; j++) {
199 LO col = Aentries(j);
200 const IST* blk = &Avalues(j * bs2);
201 for (LO v = 0; v < numVecs; v++) {
202 auto xCol = x.getLocalBlockHost(col, v, Tpetra::Access::ReadOnly);
203 for (LO br = 0; br < blockSize; br++) {
204 for (LO bc = 0; bc < blockSize; bc++) {
205 IST Aval = blk[br * blockSize + bc];
206 accum(br, v) += Aval * xCol(bc);
207 }
208 }
209 }
210 }
211 // Update x: term is omega * Aii^-1 * accum, where omega is scalar, Aii^-1 is bs*bs, and accum is bs*nv
212 auto invBlock = Kokkos::subview(inverseBlockDiag, row, Kokkos::ALL(), Kokkos::ALL());
213 Kokkos::deep_copy(dinv_accum, KAT::zero());
214 for (LO v = 0; v < numVecs; v++) {
215 auto bRow = b.getLocalBlockHost(row, v, Tpetra::Access::ReadOnly);
216 for (LO br = 0; br < blockSize; br++) {
217 accum(br, v) = bRow(br) - accum(br, v);
218 }
219 }
220 for (LO v = 0; v < numVecs; v++) {
221 for (LO br = 0; br < blockSize; br++) {
222 for (LO bc = 0; bc < blockSize; bc++) {
223 dinv_accum(br, v) += invBlock(br, bc) * accum(bc, v);
224 }
225 }
226 }
227 // Update x
228 for (LO v = 0; v < numVecs; v++) {
229 auto xRow = x.getLocalBlockHost(row, v, Tpetra::Access::ReadWrite);
230 for (LO k = 0; k < blockSize; k++) {
231 xRow(k) += omega * dinv_accum(k, v);
232 }
233 }
234 }
235 }
236
237 // Version of apply for CrsMatrix/RowMatrix (for BlockCrsMatrix, call applyBlock)
238 void apply(multivector_type& x, const multivector_type& b, Tpetra::ESweepDirection direction) {
239 if (direction == Tpetra::Symmetric) {
240 apply(x, b, Tpetra::Forward);
241 apply(x, b, Tpetra::Backward);
242 return;
243 }
244 const_values_t Avalues;
245 const_rowmap_t Arowmap;
246 const_entries_t Aentries;
247 if (haveRowMatrix) {
248 Avalues = rowMatrixValues;
249 Arowmap = rowMatrixRowmap;
250 Aentries = rowMatrixEntries;
251 } else {
252 auto Acrs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
253 if (Acrs.is_null())
254 throw std::runtime_error("Ifpack2::Details::GaussSeidel::apply: either haveRowMatrix, or A is CrsMatrix");
255 auto Alcl = Acrs->getLocalMatrixHost();
256 Avalues = Alcl.values;
257 Arowmap = Alcl.graph.row_map;
258 Aentries = Alcl.graph.entries;
259 }
260 if (applyRows.is_null()) {
261 if (x.getNumVectors() > 1)
262 this->template applyImpl<false, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
263 else {
264 // Optimize for the all-rows, single vector, omega = 1 case
265 if (omega == KAT::one())
266 this->template applyImpl<false, false, false>(Avalues, Arowmap, Aentries, x, b, direction);
267 else
268 this->template applyImpl<false, false, true>(Avalues, Arowmap, Aentries, x, b, direction);
269 }
270 } else {
271 this->template applyImpl<true, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
272 }
273 }
274
275 Teuchos::RCP<const row_matrix_type> A;
276 // For efficiency, if input is a RowMatrix, keep a persistent copy of the CRS formatted local matrix.
277 bool haveRowMatrix;
278 values_t rowMatrixValues;
279 rowmap_t rowMatrixRowmap;
280 entries_t rowMatrixEntries;
281 LO numRows;
282 IST omega;
283 // If set up with BlockCrsMatrix, the block size. Otherwise 1.
284 LO blockSize;
285 // If using a non-block matrix, the inverse diagonal.
286 Teuchos::RCP<vector_type> inverseDiagVec;
287 // If using a block matrix, the inverses of all diagonal blocks.
288 InverseBlocksHost inverseBlockDiag;
289 // If null, apply over all rows in natural order. Otherwise, apply for each row listed in order.
290 Teuchos::ArrayRCP<LO> applyRows;
291};
292} // namespace Details
293} // namespace Ifpack2
294
295#endif
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40