10#ifndef MUELU_INVERSEAPPROXIMATIONFACTORY_DEF_HPP_
11#define MUELU_INVERSEAPPROXIMATIONFACTORY_DEF_HPP_
13#include <Xpetra_BlockedCrsMatrix.hpp>
14#include <Xpetra_CrsGraph.hpp>
15#include <Xpetra_CrsMatrixWrap.hpp>
16#include <Xpetra_CrsMatrix.hpp>
17#include <Xpetra_VectorFactory.hpp>
18#include <Xpetra_MatrixFactory.hpp>
19#include <Xpetra_Matrix.hpp>
21#include "Kokkos_Sort.hpp"
22#include "KokkosBlas1_set.hpp"
23#include "KokkosBatched_QR_Decl.hpp"
24#include "KokkosBatched_ApplyQ_Decl.hpp"
25#include "KokkosBatched_Trsv_Decl.hpp"
26#include "KokkosBatched_Util.hpp"
30#include "MueLu_Utilities.hpp"
33#if KOKKOSKERNELS_VERSION < 50102
34#include "Teuchos_SerialDenseVector.hpp"
35#include "Teuchos_SerialDenseMatrix.hpp"
36#include "Teuchos_SerialQRDenseSolver.hpp"
41template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
43 RCP<ParameterList> validParamList = rcp(
new ParameterList());
44 using Magnitude =
typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
46 validParamList->set<RCP<const FactoryBase>>(
"A",
NoFactory::getRCP(),
"Matrix to build the approximate inverse on.\n");
48 validParamList->set<std::string>(
"inverse: approximation type",
"diagonal",
"Method used to approximate the inverse.");
49 validParamList->set<Magnitude>(
"inverse: drop tolerance", 0.0,
"Values below this threshold are dropped from the matrix (or fixed if diagonal fixing is active).");
50 validParamList->set<
bool>(
"inverse: fixing",
false,
"Keep diagonal and fix small entries with 1.0");
52 return validParamList;
55template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
57 Input(currentLevel,
"A");
60template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
64 using STS = Teuchos::ScalarTraits<SC>;
65 const SC one = STS::one();
66 using Magnitude =
typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
68 const ParameterList& pL = GetParameterList();
69 const bool fixing = pL.get<
bool>(
"inverse: fixing");
72 const std::string method = pL.get<std::string>(
"inverse: approximation type");
73 TEUCHOS_TEST_FOR_EXCEPTION(method !=
"diagonal" && method !=
"lumping" && method !=
"sparseapproxinverse",
Exceptions::RuntimeError,
74 "MueLu::InverseApproximationFactory::Build: Approximation type can be 'diagonal' or 'lumping' or "
75 "'sparseapproxinverse'.");
77 RCP<Matrix> A = Get<RCP<Matrix>>(currentLevel,
"A");
78 RCP<BlockedCrsMatrix> bA = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(A);
79 const bool isBlocked = (bA == Teuchos::null ? false :
true);
82 if (isBlocked) A = bA->getMatrix(0, 0);
84 const Magnitude tol = pL.get<Magnitude>(
"inverse: drop tolerance");
85 RCP<Matrix> Ainv = Teuchos::null;
87 if (method ==
"diagonal") {
88 const auto diag = VectorFactory::Build(A->getRangeMap(),
true);
89 A->getLocalDiagCopy(*diag);
91 Ainv = MatrixFactory::Build(D);
92 }
else if (method ==
"lumping") {
95 Ainv = MatrixFactory::Build(D);
96 }
else if (method ==
"sparseapproxinverse") {
99 sparsityPattern->computeGlobalConstants();
100 GetOStream(
Statistics1) <<
"NNZ Graph(A): " << A->getCrsGraph()->getGlobalNumEntries() <<
" , NNZ Tresholded Graph(A): " << sparsityPattern->getGlobalNumEntries() << std::endl;
102 RCP<Matrix> pAinv = GetSparseInverse(A, sparsityPattern);
105 rcp_const_cast<CrsGraph>(Ainv->getCrsGraph())->computeGlobalConstants();
106 GetOStream(
Statistics1) <<
"NNZ Ainv: " << pAinv->getGlobalNumEntries() <<
", NNZ Tresholded Ainv (parameter: " << tol <<
"): " << Ainv->getGlobalNumEntries() << std::endl;
110 GetOStream(
Statistics1) <<
"Approximate inverse calculated by: " << method <<
"." << std::endl;
111 GetOStream(
Statistics1) <<
"Ainv has " << Ainv->getGlobalNumRows() <<
"x" << Ainv->getGlobalNumCols() <<
" rows and columns." << std::endl;
113 Set(currentLevel,
"Ainv", Ainv);
116#if KOKKOSKERNELS_VERSION >= 50102
118template <
class local_matrix_type>
119class LocalSPAIFunctor {
121 using scalar_type =
typename local_matrix_type::value_type;
122 using local_ordinal_type =
typename local_matrix_type::ordinal_type;
123 using execution_space =
typename local_matrix_type::execution_space;
124 using impl_scalar_type =
typename KokkosKernels::ArithTraits<scalar_type>::val_type;
125 using impl_ATS = KokkosKernels::ArithTraits<impl_scalar_type>;
128 using shared_matrix = Kokkos::View<impl_scalar_type**, typename execution_space::scratch_memory_space, Kokkos::MemoryUnmanaged>;
129 using shared_vector = Kokkos::View<impl_scalar_type*, typename execution_space::scratch_memory_space, Kokkos::MemoryUnmanaged>;
130 using shared_lo_vector = Kokkos::View<local_ordinal_type*, typename execution_space::scratch_memory_space, Kokkos::MemoryUnmanaged>;
133 const local_matrix_type lclA;
134 local_matrix_type lclAinv;
135 const local_ordinal_type maxUniqueColEntries;
136 const int scratchLevel;
139 LocalSPAIFunctor(
const local_matrix_type& lclA_, local_matrix_type& lclAinv_, local_ordinal_type maxUniqueColEntries_,
int scratchLevel_)
142 , maxUniqueColEntries(maxUniqueColEntries_)
143 , scratchLevel(scratchLevel_) {}
145 KOKKOS_INLINE_FUNCTION
146 void operator()(
const typename Kokkos::TeamPolicy<execution_space>::member_type& thread)
const {
147 auto rlid = thread.league_rank();
148 auto rowAinv = lclAinv.row(rlid);
151 shared_lo_vector column_indices(thread.team_scratch(scratchLevel), maxUniqueColEntries);
152 local_ordinal_type numColEntries = 0;
153 for (local_ordinal_type ii = 0; ii < rowAinv.length; ++ii) {
154 auto i = rowAinv.colidx(ii);
155 auto rowA = lclA.rowConst(i);
156 for (local_ordinal_type jj = 0; jj < rowA.length; ++jj) {
157 auto j = rowA.colidx(jj);
158 column_indices(numColEntries) = j;
164 local_ordinal_type numUniqeColEntries = 0;
165 local_ordinal_type diagOffset = 0;
168 Kokkos::Experimental::sort_team(thread, Kokkos::subview(column_indices, Kokkos::make_pair(0, numColEntries)));
170 if (numColEntries > 0)
171 ++numUniqeColEntries;
172 local_ordinal_type pos = 0;
173 for (local_ordinal_type m = 1; m < numColEntries; ++m) {
174 if (column_indices(pos) != column_indices(m)) {
175 column_indices(pos + 1) = column_indices(m);
177 ++numUniqeColEntries;
178 if (column_indices(pos) == rlid)
185 shared_matrix localA(thread.team_scratch(scratchLevel), numUniqeColEntries, rowAinv.length);
186 KokkosBlas::SerialSet::invoke(impl_ATS::zero(), localA);
189 for (local_ordinal_type ii = 0; ii < rowAinv.length; ++ii) {
190 auto i = rowAinv.colidx(ii);
191 auto rowA = lclA.rowConst(i);
192 for (local_ordinal_type jj = 0; jj < rowA.length; ++jj) {
193 auto j = rowA.colidx(jj);
194 auto v = rowA.value(jj);
197 for (local_ordinal_type m = 0; m < numUniqeColEntries; ++m) {
198 if (column_indices(m) == j) {
206 shared_matrix ek(thread.team_scratch(scratchLevel), numUniqeColEntries, 1);
208 for (local_ordinal_type i = 0; i < numUniqeColEntries; ++i) {
209 ek(i, 0) = (i == diagOffset) ? impl_ATS::one() : impl_ATS::zero();
213 shared_vector tau(thread.team_scratch(scratchLevel), rowAinv.length);
214 shared_vector work(thread.team_scratch(scratchLevel), numUniqeColEntries);
216 KokkosBatched::SerialQR<KokkosBatched::Algo::QR::Unblocked>::invoke(localA, tau, work);
218 KokkosBatched::SerialApplyQ<KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose, KokkosBatched::Algo::ApplyQ::Unblocked>::invoke(localA, tau, ek, work);
220 auto sub_A = Kokkos::subview(localA, Kokkos::make_pair(0, rowAinv.length), Kokkos::ALL());
221 auto sub_ek = Kokkos::subview(ek, Kokkos::make_pair(0, rowAinv.length), 0);
222 KokkosBatched::SerialTrsv<KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose, KokkosBatched::Diag::NonUnit, KokkosBatched::Algo::Trsv::Unblocked>::invoke(impl_ATS::one(), sub_A, sub_ek);
225 for (local_ordinal_type i = 0; i < rowAinv.length; ++i) {
226 rowAinv.value(i) = sub_ek(i);
231template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
232RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
234 using execution_space =
typename Node::execution_space;
237 RCP<Matrix> Ainv = MatrixFactory::Build(sparsityPattern);
241 RCP<Import> rowImport = ImportFactory::Build(sparsityPattern->getRowMap(), sparsityPattern->getColMap());
242 RCP<Matrix> A = MatrixFactory::Build(Aorg, *rowImport);
244 auto maxRowEntriesAinv = Ainv->getLocalMaxNumRowEntries();
245 auto maxRowEntriesA = A->getLocalMaxNumRowEntries();
246 auto maxUniqueColEntries = maxRowEntriesAinv * maxRowEntriesA;
248 auto lclA = A->getLocalMatrixDevice();
249 auto lclAinv = Ainv->getLocalMatrixDevice();
251 Kokkos::TeamPolicy<execution_space> policy(lclAinv.numRows(), 1);
253 using spai_functor_type = LocalSPAIFunctor<
decltype(lclAinv)>;
254 using shared_matrix =
typename spai_functor_type::shared_matrix;
255 using shared_vector =
typename spai_functor_type::shared_vector;
256 using shared_lo_vector =
typename spai_functor_type::shared_lo_vector;
258 int size = shared_matrix::shmem_size(maxUniqueColEntries, maxRowEntriesAinv) + shared_matrix::shmem_size(maxUniqueColEntries, 1) + shared_vector::shmem_size(3 * maxUniqueColEntries) + shared_vector::shmem_size(maxRowEntriesAinv) + shared_lo_vector::shmem_size(maxUniqueColEntries);
260 int scratchLevel = -1;
261 if (size < policy.scratch_size_max((
int)0)) {
262 policy.set_scratch_size((
int)0, Kokkos::PerTeam(size));
264 }
else if (size < policy.scratch_size_max((
int)1)) {
265 policy.set_scratch_size((
int)1, Kokkos::PerTeam(size));
268 throw Exceptions::RuntimeError(
"Neither L0 scratch memory (max size " + std::to_string(policy.scratch_size_max((
int)0)) +
269 "), nor L1 scratch memory (max size " + std::to_string(policy.scratch_size_max((
int)1)) +
270 ") is large enough for requested allocation of size " + std::to_string(size));
272 LocalSPAIFunctor spaiFunctor(lclA, lclAinv, maxUniqueColEntries, scratchLevel);
274 Kokkos::parallel_for(
"MueLu::InverseFactory::LocalSpai", policy, spaiFunctor);
277 Ainv->fillComplete();
284template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
285RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
288 RCP<Matrix> Ainv = MatrixFactory::Build(sparsityPattern);
292 RCP<Import> rowImport = ImportFactory::Build(sparsityPattern->getRowMap(), sparsityPattern->getColMap());
293 RCP<Matrix> A = MatrixFactory::Build(Aorg, *rowImport);
296 for (
size_t k = 0; k < sparsityPattern->getLocalNumRows(); k++) {
298 ArrayView<const LO> Ik;
299 sparsityPattern->getLocalRowView(k, Ik);
302 Array<ArrayView<const LO>> J(Ik.size());
303 Array<ArrayView<const SC>> Ak(Ik.size());
305 for (LO i = 0; i < Ik.size(); i++) {
306 A->getLocalRowView(Ik[i], J[i], Ak[i]);
307 for (LO j = 0; j < J[i].size(); j++)
311 std::sort(Jk.begin(), Jk.end());
312 Jk.erase(std::unique(Jk.begin(), Jk.end()), Jk.end());
315 for (LO i = 0; i < Jk.size(); i++) G.insert(std::pair<LO, LO>(Jk[i], i));
318 Teuchos::SerialDenseMatrix<LO, SC> localA(Jk.size(), Ik.size(),
true);
319 for (LO i = 0; i < Ik.size(); i++) {
320 for (LO j = 0; j < J[i].size(); j++) {
321 localA(G.at(J[i][j]), i) = Ak[i][j];
327 Teuchos::SerialDenseVector<LO, SC> ek(Jk.size(),
true);
328 ek[std::find(Jk.begin(), Jk.end(), k) - Jk.begin()] = Teuchos::ScalarTraits<Scalar>::one();
332 Teuchos::SerialDenseVector<LO, SC> localX(Ik.size());
333 Teuchos::SerialQRDenseSolver<LO, SC> qrSolver;
334 qrSolver.setMatrix(Teuchos::rcp(&localA,
false));
335 qrSolver.setVectors(Teuchos::rcp(&localX,
false), Teuchos::rcp(&ek,
false));
336 const int err = qrSolver.solve();
338 "MueLu::InverseApproximationFactory::GetSparseInverse: Error in serial QR solve.");
341 ArrayView<const SC> Mk(localX.values(), localX.length());
342 Ainv->replaceLocalValues(k, Ik, Mk);
344 Ainv->fillComplete();
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Build(Level ¤tLevel) const
Build an object with this factory.
void DeclareInput(Level ¤tLevel) const
Input.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
RCP< Matrix > GetSparseInverse(const RCP< Matrix > &A, const RCP< const CrsGraph > &sparsityPattern) const
Sparse inverse calculation method.
Class that holds all level-specific information.
static const RCP< const NoFactory > getRCP()
Static Get() functions.
static Teuchos::RCP< Vector > GetInverse(Teuchos::RCP< const Vector > v, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps() *100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero())
Return vector containing inverse of input vector.
static RCP< Xpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > GetThresholdedGraph(const RCP< Matrix > &A, const Magnitude threshold)
Threshold a graph.
static RCP< Matrix > GetThresholdedMatrix(const RCP< Matrix > &Ain, const Magnitude threshold, const bool keepDiagonal=true)
Threshold a matrix.
static Teuchos::RCP< Vector > GetLumpedMatrixDiagonal(Matrix const &A, const bool doReciprocal=false, Magnitude tol=Teuchos::ScalarTraits< Scalar >::magnitude(Teuchos::ScalarTraits< Scalar >::zero()), Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool replaceSingleEntryRowWithZero=false, const bool useAverageAbsDiagVal=false)
Extract Matrix Diagonal of lumped matrix.
Namespace for MueLu classes and methods.
@ Statistics1
Print more statistics.