MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_InverseApproximationFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_INVERSEAPPROXIMATIONFACTORY_DEF_HPP_
11#define MUELU_INVERSEAPPROXIMATIONFACTORY_DEF_HPP_
12
13#include <Xpetra_BlockedCrsMatrix.hpp>
14#include <Xpetra_CrsGraph.hpp>
15#include <Xpetra_CrsMatrixWrap.hpp>
16#include <Xpetra_CrsMatrix.hpp>
17#include <Xpetra_VectorFactory.hpp>
18#include <Xpetra_MatrixFactory.hpp>
19#include <Xpetra_Matrix.hpp>
20
21#include "Kokkos_Sort.hpp"
22#include "KokkosBlas1_set.hpp"
23#include "KokkosBatched_QR_Decl.hpp"
24#include "KokkosBatched_ApplyQ_Decl.hpp"
25#include "KokkosBatched_Trsv_Decl.hpp"
26#include "KokkosBatched_Util.hpp"
27
28#include "MueLu_Level.hpp"
29#include "MueLu_Monitor.hpp"
30#include "MueLu_Utilities.hpp"
32
33namespace MueLu {
34
35template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
37 RCP<ParameterList> validParamList = rcp(new ParameterList());
38 using Magnitude = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
39
40 validParamList->set<RCP<const FactoryBase>>("A", NoFactory::getRCP(), "Matrix to build the approximate inverse on.\n");
41
42 validParamList->set<std::string>("inverse: approximation type", "diagonal", "Method used to approximate the inverse.");
43 validParamList->set<Magnitude>("inverse: drop tolerance", 0.0, "Values below this threshold are dropped from the matrix (or fixed if diagonal fixing is active).");
44 validParamList->set<bool>("inverse: fixing", false, "Keep diagonal and fix small entries with 1.0");
45
46 return validParamList;
47}
48
49template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
51 Input(currentLevel, "A");
52}
53
54template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
56 FactoryMonitor m(*this, "Build", currentLevel);
57
58 using STS = Teuchos::ScalarTraits<SC>;
59 const SC one = STS::one();
60 using Magnitude = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
61
62 const ParameterList& pL = GetParameterList();
63 const bool fixing = pL.get<bool>("inverse: fixing");
64
65 // check which approximation type to use
66 const std::string method = pL.get<std::string>("inverse: approximation type");
67 TEUCHOS_TEST_FOR_EXCEPTION(method != "diagonal" && method != "lumping" && method != "sparseapproxinverse", Exceptions::RuntimeError,
68 "MueLu::InverseApproximationFactory::Build: Approximation type can be 'diagonal' or 'lumping' or "
69 "'sparseapproxinverse'.");
70
71 RCP<Matrix> A = Get<RCP<Matrix>>(currentLevel, "A");
72 RCP<BlockedCrsMatrix> bA = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(A);
73 const bool isBlocked = (bA == Teuchos::null ? false : true);
74
75 // if blocked operator is used, defaults to A(0,0)
76 if (isBlocked) A = bA->getMatrix(0, 0);
77
78 const Magnitude tol = pL.get<Magnitude>("inverse: drop tolerance");
79 RCP<Matrix> Ainv = Teuchos::null;
80
81 if (method == "diagonal") {
82 const auto diag = VectorFactory::Build(A->getRangeMap(), true);
83 A->getLocalDiagCopy(*diag);
84 const RCP<const Vector> D = (!fixing ? Utilities::GetInverse(diag) : Utilities::GetInverse(diag, tol, one));
85 Ainv = MatrixFactory::Build(D);
86 } else if (method == "lumping") {
87 const auto diag = Utilities::GetLumpedMatrixDiagonal(*A);
88 const RCP<const Vector> D = (!fixing ? Utilities::GetInverse(diag) : Utilities::GetInverse(diag, tol, one));
89 Ainv = MatrixFactory::Build(D);
90 } else if (method == "sparseapproxinverse") {
91 RCP<CrsGraph> sparsityPattern = Utilities::GetThresholdedGraph(A, tol);
92 if (IsPrint(Statistics1)) {
93 sparsityPattern->computeGlobalConstants();
94 GetOStream(Statistics1) << "NNZ Graph(A): " << A->getCrsGraph()->getGlobalNumEntries() << " , NNZ Tresholded Graph(A): " << sparsityPattern->getGlobalNumEntries() << std::endl;
95 }
96 RCP<Matrix> pAinv = GetSparseInverse(A, sparsityPattern);
97 Ainv = Utilities::GetThresholdedMatrix(pAinv, tol, fixing);
98 if (IsPrint(Statistics1)) {
99 rcp_const_cast<CrsGraph>(Ainv->getCrsGraph())->computeGlobalConstants();
100 GetOStream(Statistics1) << "NNZ Ainv: " << pAinv->getGlobalNumEntries() << ", NNZ Tresholded Ainv (parameter: " << tol << "): " << Ainv->getGlobalNumEntries() << std::endl;
101 }
102 }
103
104 GetOStream(Statistics1) << "Approximate inverse calculated by: " << method << "." << std::endl;
105 GetOStream(Statistics1) << "Ainv has " << Ainv->getGlobalNumRows() << "x" << Ainv->getGlobalNumCols() << " rows and columns." << std::endl;
106
107 Set(currentLevel, "Ainv", Ainv);
108}
109
110template <class local_matrix_type>
112 private:
113 using scalar_type = typename local_matrix_type::value_type;
114 using local_ordinal_type = typename local_matrix_type::ordinal_type;
115 using execution_space = typename local_matrix_type::execution_space;
116 using impl_scalar_type = typename KokkosKernels::ArithTraits<scalar_type>::val_type;
117 using impl_ATS = KokkosKernels::ArithTraits<impl_scalar_type>;
118
119 public:
120 using shared_matrix = Kokkos::View<impl_scalar_type**, typename execution_space::scratch_memory_space, Kokkos::MemoryUnmanaged>;
121 using shared_vector = Kokkos::View<impl_scalar_type*, typename execution_space::scratch_memory_space, Kokkos::MemoryUnmanaged>;
122 using shared_lo_vector = Kokkos::View<local_ordinal_type*, typename execution_space::scratch_memory_space, Kokkos::MemoryUnmanaged>;
123
124 private:
125 const local_matrix_type lclA;
126 local_matrix_type lclAinv;
128 const int scratchLevel;
129
130 public:
131 LocalSPAIFunctor(const local_matrix_type& lclA_, local_matrix_type& lclAinv_, local_ordinal_type maxUniqueColEntries_, int scratchLevel_)
132 : lclA(lclA_)
133 , lclAinv(lclAinv_)
134 , maxUniqueColEntries(maxUniqueColEntries_)
135 , scratchLevel(scratchLevel_) {}
136
137 KOKKOS_INLINE_FUNCTION
138 void operator()(const typename Kokkos::TeamPolicy<execution_space>::member_type& thread) const {
139 auto rlid = thread.league_rank();
140 auto rowAinv = lclAinv.row(rlid);
141
142 // Loop over entries in row rlid of Ainv and collect all of A's column indices.
143 shared_lo_vector column_indices(thread.team_scratch(scratchLevel), maxUniqueColEntries);
144 local_ordinal_type numColEntries = 0;
145 for (local_ordinal_type ii = 0; ii < rowAinv.length; ++ii) {
146 auto i = rowAinv.colidx(ii);
147 auto rowA = lclA.rowConst(i);
148 for (local_ordinal_type jj = 0; jj < rowA.length; ++jj) {
149 auto j = rowA.colidx(jj);
150 column_indices(numColEntries) = j;
151 ++numColEntries;
152 }
153 }
154
155 // Get merged list of column indices.
156 local_ordinal_type numUniqeColEntries = 0;
157 local_ordinal_type diagOffset = 0;
158 {
159 // Sort
160 Kokkos::Experimental::sort_team(thread, Kokkos::subview(column_indices, Kokkos::make_pair(0, numColEntries)));
161 // Merge
162 if (numColEntries > 0)
163 ++numUniqeColEntries;
164 local_ordinal_type pos = 0;
165 for (local_ordinal_type m = 1; m < numColEntries; ++m) {
166 if (column_indices(pos) != column_indices(m)) {
167 column_indices(pos + 1) = column_indices(m);
168 ++pos;
169 ++numUniqeColEntries;
170 if (column_indices(pos) == rlid)
171 diagOffset = pos;
172 }
173 }
174 }
175
176 // Extract local part of A into a dense view.
177 shared_matrix localA(thread.team_scratch(scratchLevel), numUniqeColEntries, rowAinv.length);
178 KokkosBlas::SerialSet::invoke(impl_ATS::zero(), localA);
179
180 // Now fill localA.
181 for (local_ordinal_type ii = 0; ii < rowAinv.length; ++ii) {
182 auto i = rowAinv.colidx(ii);
183 auto rowA = lclA.rowConst(i);
184 for (local_ordinal_type jj = 0; jj < rowA.length; ++jj) {
185 auto j = rowA.colidx(jj);
186 auto v = rowA.value(jj);
187 // Determine local index.
188 // Sequential search might not be a great idea.
189 for (local_ordinal_type m = 0; m < numUniqeColEntries; ++m) {
190 if (column_indices(m) == j) {
191 localA(m, ii) = v;
192 break;
193 }
194 }
195 }
196 }
197
198 shared_matrix ek(thread.team_scratch(scratchLevel), numUniqeColEntries, 1);
199 // set to zero, set diagonal entry to one
200 for (local_ordinal_type i = 0; i < numUniqeColEntries; ++i) {
201 ek(i, 0) = (i == diagOffset) ? impl_ATS::one() : impl_ATS::zero();
202 }
203
204 // QR solve
205 shared_vector tau(thread.team_scratch(scratchLevel), rowAinv.length);
206 shared_vector work(thread.team_scratch(scratchLevel), numUniqeColEntries);
207 // factorize localA = Q*R in-place
208 KokkosBatched::SerialQR<KokkosBatched::Algo::QR::Unblocked>::invoke(localA, tau, work);
209 // ek := Q^T ek
210 KokkosBatched::SerialApplyQ<KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose, KokkosBatched::Algo::ApplyQ::Unblocked>::invoke(localA, tau, ek, work);
211 // ek[:rowLength] := R^{-1} ek[:rowLength]
212 auto sub_A = Kokkos::subview(localA, Kokkos::make_pair(0, rowAinv.length), Kokkos::ALL());
213 auto sub_ek = Kokkos::subview(ek, Kokkos::make_pair(0, rowAinv.length), 0);
214 KokkosBatched::SerialTrsv<KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose, KokkosBatched::Diag::NonUnit, KokkosBatched::Algo::Trsv::Unblocked>::invoke(impl_ATS::one(), sub_A, sub_ek);
215
216 // Set entries of Ainv.
217 for (local_ordinal_type i = 0; i < rowAinv.length; ++i) {
218 rowAinv.value(i) = sub_ek(i);
219 }
220 }
221};
222
223template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
224RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
225InverseApproximationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::GetSparseInverse(const RCP<Matrix>& Aorg, const RCP<const CrsGraph>& sparsityPattern) const {
226 using execution_space = typename Node::execution_space;
227
228 // construct the inverse matrix with the given sparsity pattern
229 RCP<Matrix> Ainv = MatrixFactory::Build(sparsityPattern);
230 Ainv->resumeFill();
231
232 // gather missing rows from other procs to generate an overlapping map
233 RCP<Import> rowImport = ImportFactory::Build(sparsityPattern->getRowMap(), sparsityPattern->getColMap());
234 RCP<Matrix> A = MatrixFactory::Build(Aorg, *rowImport);
235
236 auto maxRowEntriesAinv = Ainv->getLocalMaxNumRowEntries();
237 auto maxRowEntriesA = A->getLocalMaxNumRowEntries();
238 auto maxUniqueColEntries = maxRowEntriesAinv * maxRowEntriesA;
239 {
240 auto lclA = A->getLocalMatrixDevice();
241 auto lclAinv = Ainv->getLocalMatrixDevice();
242
243 Kokkos::TeamPolicy<execution_space> policy(lclAinv.numRows(), 1);
244
245 using spai_functor_type = LocalSPAIFunctor<decltype(lclAinv)>;
246 using shared_matrix = typename spai_functor_type::shared_matrix;
247 using shared_vector = typename spai_functor_type::shared_vector;
248 using shared_lo_vector = typename spai_functor_type::shared_lo_vector;
249
250 int size = shared_matrix::shmem_size(maxUniqueColEntries, maxRowEntriesAinv) + shared_matrix::shmem_size(maxUniqueColEntries, 1) + shared_vector::shmem_size(3 * maxUniqueColEntries) + shared_vector::shmem_size(maxRowEntriesAinv) + shared_lo_vector::shmem_size(maxUniqueColEntries);
251
252 int scratchLevel = -1;
253 if (size < policy.scratch_size_max(/*level=*/(int)0)) {
254 policy.set_scratch_size(/*level=*/(int)0, Kokkos::PerTeam(size));
255 scratchLevel = 0;
256 } else if (size < policy.scratch_size_max(/*level=*/(int)1)) {
257 policy.set_scratch_size(/*level=*/(int)1, Kokkos::PerTeam(size));
258 scratchLevel = 1;
259 } else
260 throw Exceptions::RuntimeError("Neither L0 scratch memory (max size " + std::to_string(policy.scratch_size_max((int)0)) +
261 "), nor L1 scratch memory (max size " + std::to_string(policy.scratch_size_max((int)1)) +
262 ") is large enough for requested allocation of size " + std::to_string(size));
263
264 LocalSPAIFunctor spaiFunctor(lclA, lclAinv, maxUniqueColEntries, scratchLevel);
265
266 Kokkos::parallel_for("MueLu::InverseFactory::LocalSpai", policy, spaiFunctor);
267 }
268
269 Ainv->fillComplete();
270
271 return Ainv;
272}
273
274} // namespace MueLu
275
276#endif /* MUELU_INVERSEAPPROXIMATIONFACTORY_DEF_HPP_ */
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Build(Level &currentLevel) const
Build an object with this factory.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
RCP< Matrix > GetSparseInverse(const RCP< Matrix > &A, const RCP< const CrsGraph > &sparsityPattern) const
Sparse inverse calculation method.
Class that holds all level-specific information.
KokkosKernels::ArithTraits< impl_scalar_type > impl_ATS
Kokkos::View< impl_scalar_type *, typename execution_space::scratch_memory_space, Kokkos::MemoryUnmanaged > shared_vector
Kokkos::View< impl_scalar_type **, typename execution_space::scratch_memory_space, Kokkos::MemoryUnmanaged > shared_matrix
typename local_matrix_type::ordinal_type local_ordinal_type
typename KokkosKernels::ArithTraits< scalar_type >::val_type impl_scalar_type
Kokkos::View< local_ordinal_type *, typename execution_space::scratch_memory_space, Kokkos::MemoryUnmanaged > shared_lo_vector
typename local_matrix_type::execution_space execution_space
LocalSPAIFunctor(const local_matrix_type &lclA_, local_matrix_type &lclAinv_, local_ordinal_type maxUniqueColEntries_, int scratchLevel_)
typename local_matrix_type::value_type scalar_type
KOKKOS_INLINE_FUNCTION void operator()(const typename Kokkos::TeamPolicy< execution_space >::member_type &thread) const
static const RCP< const NoFactory > getRCP()
Static Get() functions.
static Teuchos::RCP< Vector > GetInverse(Teuchos::RCP< const Vector > v, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps() *100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero())
Return vector containing inverse of input vector.
static RCP< Xpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > GetThresholdedGraph(const RCP< Matrix > &A, const Magnitude threshold)
Threshold a graph.
static RCP< Matrix > GetThresholdedMatrix(const RCP< Matrix > &Ain, const Magnitude threshold, const bool keepDiagonal=true)
Threshold a matrix.
static Teuchos::RCP< Vector > GetLumpedMatrixDiagonal(Matrix const &A, const bool doReciprocal=false, Magnitude tol=Teuchos::ScalarTraits< Scalar >::magnitude(Teuchos::ScalarTraits< Scalar >::zero()), Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool replaceSingleEntryRowWithZero=false, const bool useAverageAbsDiagVal=false)
Extract Matrix Diagonal of lumped matrix.
Namespace for MueLu classes and methods.
@ Statistics1
Print more statistics.