MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_InverseApproximationFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_INVERSEAPPROXIMATIONFACTORY_DEF_HPP_
11#define MUELU_INVERSEAPPROXIMATIONFACTORY_DEF_HPP_
12
13#include <Xpetra_BlockedCrsMatrix.hpp>
14#include <Xpetra_CrsGraph.hpp>
15#include <Xpetra_CrsMatrixWrap.hpp>
16#include <Xpetra_CrsMatrix.hpp>
17#include <Xpetra_VectorFactory.hpp>
18#include <Xpetra_MatrixFactory.hpp>
19#include <Xpetra_Matrix.hpp>
20
21#include "Kokkos_Sort.hpp"
22#include "KokkosBlas1_set.hpp"
23#include "KokkosBatched_QR_Decl.hpp"
24#include "KokkosBatched_ApplyQ_Decl.hpp"
25#include "KokkosBatched_Trsv_Decl.hpp"
26#include "KokkosBatched_Util.hpp"
27
28#include "MueLu_Level.hpp"
29#include "MueLu_Monitor.hpp"
30#include "MueLu_Utilities.hpp"
32
33#if KOKKOSKERNELS_VERSION < 50102
34#include "Teuchos_SerialDenseVector.hpp"
35#include "Teuchos_SerialDenseMatrix.hpp"
36#include "Teuchos_SerialQRDenseSolver.hpp"
37#endif
38
39namespace MueLu {
40
41template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
43 RCP<ParameterList> validParamList = rcp(new ParameterList());
44 using Magnitude = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
45
46 validParamList->set<RCP<const FactoryBase>>("A", NoFactory::getRCP(), "Matrix to build the approximate inverse on.\n");
47
48 validParamList->set<std::string>("inverse: approximation type", "diagonal", "Method used to approximate the inverse.");
49 validParamList->set<Magnitude>("inverse: drop tolerance", 0.0, "Values below this threshold are dropped from the matrix (or fixed if diagonal fixing is active).");
50 validParamList->set<bool>("inverse: fixing", false, "Keep diagonal and fix small entries with 1.0");
51
52 return validParamList;
53}
54
55template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
57 Input(currentLevel, "A");
58}
59
60template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
62 FactoryMonitor m(*this, "Build", currentLevel);
63
64 using STS = Teuchos::ScalarTraits<SC>;
65 const SC one = STS::one();
66 using Magnitude = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
67
68 const ParameterList& pL = GetParameterList();
69 const bool fixing = pL.get<bool>("inverse: fixing");
70
71 // check which approximation type to use
72 const std::string method = pL.get<std::string>("inverse: approximation type");
73 TEUCHOS_TEST_FOR_EXCEPTION(method != "diagonal" && method != "lumping" && method != "sparseapproxinverse", Exceptions::RuntimeError,
74 "MueLu::InverseApproximationFactory::Build: Approximation type can be 'diagonal' or 'lumping' or "
75 "'sparseapproxinverse'.");
76
77 RCP<Matrix> A = Get<RCP<Matrix>>(currentLevel, "A");
78 RCP<BlockedCrsMatrix> bA = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(A);
79 const bool isBlocked = (bA == Teuchos::null ? false : true);
80
81 // if blocked operator is used, defaults to A(0,0)
82 if (isBlocked) A = bA->getMatrix(0, 0);
83
84 const Magnitude tol = pL.get<Magnitude>("inverse: drop tolerance");
85 RCP<Matrix> Ainv = Teuchos::null;
86
87 if (method == "diagonal") {
88 const auto diag = VectorFactory::Build(A->getRangeMap(), true);
89 A->getLocalDiagCopy(*diag);
90 const RCP<const Vector> D = (!fixing ? Utilities::GetInverse(diag) : Utilities::GetInverse(diag, tol, one));
91 Ainv = MatrixFactory::Build(D);
92 } else if (method == "lumping") {
93 const auto diag = Utilities::GetLumpedMatrixDiagonal(*A);
94 const RCP<const Vector> D = (!fixing ? Utilities::GetInverse(diag) : Utilities::GetInverse(diag, tol, one));
95 Ainv = MatrixFactory::Build(D);
96 } else if (method == "sparseapproxinverse") {
97 RCP<CrsGraph> sparsityPattern = Utilities::GetThresholdedGraph(A, tol);
98 if (IsPrint(Statistics1)) {
99 sparsityPattern->computeGlobalConstants();
100 GetOStream(Statistics1) << "NNZ Graph(A): " << A->getCrsGraph()->getGlobalNumEntries() << " , NNZ Tresholded Graph(A): " << sparsityPattern->getGlobalNumEntries() << std::endl;
101 }
102 RCP<Matrix> pAinv = GetSparseInverse(A, sparsityPattern);
103 Ainv = Utilities::GetThresholdedMatrix(pAinv, tol, fixing);
104 if (IsPrint(Statistics1)) {
105 rcp_const_cast<CrsGraph>(Ainv->getCrsGraph())->computeGlobalConstants();
106 GetOStream(Statistics1) << "NNZ Ainv: " << pAinv->getGlobalNumEntries() << ", NNZ Tresholded Ainv (parameter: " << tol << "): " << Ainv->getGlobalNumEntries() << std::endl;
107 }
108 }
109
110 GetOStream(Statistics1) << "Approximate inverse calculated by: " << method << "." << std::endl;
111 GetOStream(Statistics1) << "Ainv has " << Ainv->getGlobalNumRows() << "x" << Ainv->getGlobalNumCols() << " rows and columns." << std::endl;
112
113 Set(currentLevel, "Ainv", Ainv);
114}
115
116#if KOKKOSKERNELS_VERSION >= 50102
117
118template <class local_matrix_type>
119class LocalSPAIFunctor {
120 private:
121 using scalar_type = typename local_matrix_type::value_type;
122 using local_ordinal_type = typename local_matrix_type::ordinal_type;
123 using execution_space = typename local_matrix_type::execution_space;
124 using impl_scalar_type = typename KokkosKernels::ArithTraits<scalar_type>::val_type;
125 using impl_ATS = KokkosKernels::ArithTraits<impl_scalar_type>;
126
127 public:
128 using shared_matrix = Kokkos::View<impl_scalar_type**, typename execution_space::scratch_memory_space, Kokkos::MemoryUnmanaged>;
129 using shared_vector = Kokkos::View<impl_scalar_type*, typename execution_space::scratch_memory_space, Kokkos::MemoryUnmanaged>;
130 using shared_lo_vector = Kokkos::View<local_ordinal_type*, typename execution_space::scratch_memory_space, Kokkos::MemoryUnmanaged>;
131
132 private:
133 const local_matrix_type lclA;
134 local_matrix_type lclAinv;
135 const local_ordinal_type maxUniqueColEntries;
136 const int scratchLevel;
137
138 public:
139 LocalSPAIFunctor(const local_matrix_type& lclA_, local_matrix_type& lclAinv_, local_ordinal_type maxUniqueColEntries_, int scratchLevel_)
140 : lclA(lclA_)
141 , lclAinv(lclAinv_)
142 , maxUniqueColEntries(maxUniqueColEntries_)
143 , scratchLevel(scratchLevel_) {}
144
145 KOKKOS_INLINE_FUNCTION
146 void operator()(const typename Kokkos::TeamPolicy<execution_space>::member_type& thread) const {
147 auto rlid = thread.league_rank();
148 auto rowAinv = lclAinv.row(rlid);
149
150 // Loop over entries in row rlid of Ainv and collect all of A's column indices.
151 shared_lo_vector column_indices(thread.team_scratch(scratchLevel), maxUniqueColEntries);
152 local_ordinal_type numColEntries = 0;
153 for (local_ordinal_type ii = 0; ii < rowAinv.length; ++ii) {
154 auto i = rowAinv.colidx(ii);
155 auto rowA = lclA.rowConst(i);
156 for (local_ordinal_type jj = 0; jj < rowA.length; ++jj) {
157 auto j = rowA.colidx(jj);
158 column_indices(numColEntries) = j;
159 ++numColEntries;
160 }
161 }
162
163 // Get merged list of column indices.
164 local_ordinal_type numUniqeColEntries = 0;
165 local_ordinal_type diagOffset = 0;
166 {
167 // Sort
168 Kokkos::Experimental::sort_team(thread, Kokkos::subview(column_indices, Kokkos::make_pair(0, numColEntries)));
169 // Merge
170 if (numColEntries > 0)
171 ++numUniqeColEntries;
172 local_ordinal_type pos = 0;
173 for (local_ordinal_type m = 1; m < numColEntries; ++m) {
174 if (column_indices(pos) != column_indices(m)) {
175 column_indices(pos + 1) = column_indices(m);
176 ++pos;
177 ++numUniqeColEntries;
178 if (column_indices(pos) == rlid)
179 diagOffset = pos;
180 }
181 }
182 }
183
184 // Extract local part of A into a dense view.
185 shared_matrix localA(thread.team_scratch(scratchLevel), numUniqeColEntries, rowAinv.length);
186 KokkosBlas::SerialSet::invoke(impl_ATS::zero(), localA);
187
188 // Now fill localA.
189 for (local_ordinal_type ii = 0; ii < rowAinv.length; ++ii) {
190 auto i = rowAinv.colidx(ii);
191 auto rowA = lclA.rowConst(i);
192 for (local_ordinal_type jj = 0; jj < rowA.length; ++jj) {
193 auto j = rowA.colidx(jj);
194 auto v = rowA.value(jj);
195 // Determine local index.
196 // Sequential search might not be a great idea.
197 for (local_ordinal_type m = 0; m < numUniqeColEntries; ++m) {
198 if (column_indices(m) == j) {
199 localA(m, ii) = v;
200 break;
201 }
202 }
203 }
204 }
205
206 shared_matrix ek(thread.team_scratch(scratchLevel), numUniqeColEntries, 1);
207 // set to zero, set diagonal entry to one
208 for (local_ordinal_type i = 0; i < numUniqeColEntries; ++i) {
209 ek(i, 0) = (i == diagOffset) ? impl_ATS::one() : impl_ATS::zero();
210 }
211
212 // QR solve
213 shared_vector tau(thread.team_scratch(scratchLevel), rowAinv.length);
214 shared_vector work(thread.team_scratch(scratchLevel), numUniqeColEntries);
215 // factorize localA = Q*R in-place
216 KokkosBatched::SerialQR<KokkosBatched::Algo::QR::Unblocked>::invoke(localA, tau, work);
217 // ek := Q^T ek
218 KokkosBatched::SerialApplyQ<KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose, KokkosBatched::Algo::ApplyQ::Unblocked>::invoke(localA, tau, ek, work);
219 // ek[:rowLength] := R^{-1} ek[:rowLength]
220 auto sub_A = Kokkos::subview(localA, Kokkos::make_pair(0, rowAinv.length), Kokkos::ALL());
221 auto sub_ek = Kokkos::subview(ek, Kokkos::make_pair(0, rowAinv.length), 0);
222 KokkosBatched::SerialTrsv<KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose, KokkosBatched::Diag::NonUnit, KokkosBatched::Algo::Trsv::Unblocked>::invoke(impl_ATS::one(), sub_A, sub_ek);
223
224 // Set entries of Ainv.
225 for (local_ordinal_type i = 0; i < rowAinv.length; ++i) {
226 rowAinv.value(i) = sub_ek(i);
227 }
228 }
229};
230
231template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
232RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
233InverseApproximationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::GetSparseInverse(const RCP<Matrix>& Aorg, const RCP<const CrsGraph>& sparsityPattern) const {
234 using execution_space = typename Node::execution_space;
235
236 // construct the inverse matrix with the given sparsity pattern
237 RCP<Matrix> Ainv = MatrixFactory::Build(sparsityPattern);
238 Ainv->resumeFill();
239
240 // gather missing rows from other procs to generate an overlapping map
241 RCP<Import> rowImport = ImportFactory::Build(sparsityPattern->getRowMap(), sparsityPattern->getColMap());
242 RCP<Matrix> A = MatrixFactory::Build(Aorg, *rowImport);
243
244 auto maxRowEntriesAinv = Ainv->getLocalMaxNumRowEntries();
245 auto maxRowEntriesA = A->getLocalMaxNumRowEntries();
246 auto maxUniqueColEntries = maxRowEntriesAinv * maxRowEntriesA;
247 {
248 auto lclA = A->getLocalMatrixDevice();
249 auto lclAinv = Ainv->getLocalMatrixDevice();
250
251 Kokkos::TeamPolicy<execution_space> policy(lclAinv.numRows(), 1);
252
253 using spai_functor_type = LocalSPAIFunctor<decltype(lclAinv)>;
254 using shared_matrix = typename spai_functor_type::shared_matrix;
255 using shared_vector = typename spai_functor_type::shared_vector;
256 using shared_lo_vector = typename spai_functor_type::shared_lo_vector;
257
258 int size = shared_matrix::shmem_size(maxUniqueColEntries, maxRowEntriesAinv) + shared_matrix::shmem_size(maxUniqueColEntries, 1) + shared_vector::shmem_size(3 * maxUniqueColEntries) + shared_vector::shmem_size(maxRowEntriesAinv) + shared_lo_vector::shmem_size(maxUniqueColEntries);
259
260 int scratchLevel = -1;
261 if (size < policy.scratch_size_max(/*level=*/(int)0)) {
262 policy.set_scratch_size(/*level=*/(int)0, Kokkos::PerTeam(size));
263 scratchLevel = 0;
264 } else if (size < policy.scratch_size_max(/*level=*/(int)1)) {
265 policy.set_scratch_size(/*level=*/(int)1, Kokkos::PerTeam(size));
266 scratchLevel = 1;
267 } else
268 throw Exceptions::RuntimeError("Neither L0 scratch memory (max size " + std::to_string(policy.scratch_size_max((int)0)) +
269 "), nor L1 scratch memory (max size " + std::to_string(policy.scratch_size_max((int)1)) +
270 ") is large enough for requested allocation of size " + std::to_string(size));
271
272 LocalSPAIFunctor spaiFunctor(lclA, lclAinv, maxUniqueColEntries, scratchLevel);
273
274 Kokkos::parallel_for("MueLu::InverseFactory::LocalSpai", policy, spaiFunctor);
275 }
276
277 Ainv->fillComplete();
278
279 return Ainv;
280}
281
282#else
283
284template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
285RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
286InverseApproximationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::GetSparseInverse(const RCP<Matrix>& Aorg, const RCP<const CrsGraph>& sparsityPattern) const {
287 // construct the inverse matrix with the given sparsity pattern
288 RCP<Matrix> Ainv = MatrixFactory::Build(sparsityPattern);
289 Ainv->resumeFill();
290
291 // gather missing rows from other procs to generate an overlapping map
292 RCP<Import> rowImport = ImportFactory::Build(sparsityPattern->getRowMap(), sparsityPattern->getColMap());
293 RCP<Matrix> A = MatrixFactory::Build(Aorg, *rowImport);
294
295 // loop over all rows of the inverse sparsity pattern (this can be done in parallel)
296 for (size_t k = 0; k < sparsityPattern->getLocalNumRows(); k++) {
297 // 1. get column indices Ik of local row k
298 ArrayView<const LO> Ik;
299 sparsityPattern->getLocalRowView(k, Ik);
300
301 // 2. get all local A(Ik,:) rows
302 Array<ArrayView<const LO>> J(Ik.size());
303 Array<ArrayView<const SC>> Ak(Ik.size());
304 Array<LO> Jk;
305 for (LO i = 0; i < Ik.size(); i++) {
306 A->getLocalRowView(Ik[i], J[i], Ak[i]);
307 for (LO j = 0; j < J[i].size(); j++)
308 Jk.append(J[i][j]);
309 }
310 // set of unique column indices Jk
311 std::sort(Jk.begin(), Jk.end());
312 Jk.erase(std::unique(Jk.begin(), Jk.end()), Jk.end());
313 // create map
314 std::map<LO, LO> G;
315 for (LO i = 0; i < Jk.size(); i++) G.insert(std::pair<LO, LO>(Jk[i], i));
316
317 // 3. merge rows together
318 Teuchos::SerialDenseMatrix<LO, SC> localA(Jk.size(), Ik.size(), true);
319 for (LO i = 0; i < Ik.size(); i++) {
320 for (LO j = 0; j < J[i].size(); j++) {
321 localA(G.at(J[i][j]), i) = Ak[i][j];
322 }
323 }
324
325 // 4. get direction-vector
326 // diagonal needs an entry!
327 Teuchos::SerialDenseVector<LO, SC> ek(Jk.size(), true);
328 ek[std::find(Jk.begin(), Jk.end(), k) - Jk.begin()] = Teuchos::ScalarTraits<Scalar>::one();
329 ;
330
331 // 5. solve linear system for x
332 Teuchos::SerialDenseVector<LO, SC> localX(Ik.size());
333 Teuchos::SerialQRDenseSolver<LO, SC> qrSolver;
334 qrSolver.setMatrix(Teuchos::rcp(&localA, false));
335 qrSolver.setVectors(Teuchos::rcp(&localX, false), Teuchos::rcp(&ek, false));
336 const int err = qrSolver.solve();
337 TEUCHOS_TEST_FOR_EXCEPTION(err != 0, Exceptions::RuntimeError,
338 "MueLu::InverseApproximationFactory::GetSparseInverse: Error in serial QR solve.");
339
340 // 6. set calculated row into Ainv
341 ArrayView<const SC> Mk(localX.values(), localX.length());
342 Ainv->replaceLocalValues(k, Ik, Mk);
343 }
344 Ainv->fillComplete();
345
346 return Ainv;
347}
348
349#endif
350
351} // namespace MueLu
352
353#endif /* MUELU_INVERSEAPPROXIMATIONFACTORY_DEF_HPP_ */
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Build(Level &currentLevel) const
Build an object with this factory.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
RCP< Matrix > GetSparseInverse(const RCP< Matrix > &A, const RCP< const CrsGraph > &sparsityPattern) const
Sparse inverse calculation method.
Class that holds all level-specific information.
static const RCP< const NoFactory > getRCP()
Static Get() functions.
static Teuchos::RCP< Vector > GetInverse(Teuchos::RCP< const Vector > v, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps() *100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero())
Return vector containing inverse of input vector.
static RCP< Xpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > GetThresholdedGraph(const RCP< Matrix > &A, const Magnitude threshold)
Threshold a graph.
static RCP< Matrix > GetThresholdedMatrix(const RCP< Matrix > &Ain, const Magnitude threshold, const bool keepDiagonal=true)
Threshold a matrix.
static Teuchos::RCP< Vector > GetLumpedMatrixDiagonal(Matrix const &A, const bool doReciprocal=false, Magnitude tol=Teuchos::ScalarTraits< Scalar >::magnitude(Teuchos::ScalarTraits< Scalar >::zero()), Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool replaceSingleEntryRowWithZero=false, const bool useAverageAbsDiagVal=false)
Extract Matrix Diagonal of lumped matrix.
Namespace for MueLu classes and methods.
@ Statistics1
Print more statistics.