MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_SaPFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_SAPFACTORY_DEF_HPP
11#define MUELU_SAPFACTORY_DEF_HPP
12
13#include "KokkosKernels_ArithTraits.hpp"
15
16#include <Xpetra_Matrix.hpp>
17
19#include "MueLu_Level.hpp"
20#include "MueLu_MasterList.hpp"
21#include "MueLu_Monitor.hpp"
22#include "MueLu_PerfUtils.hpp"
23#include "MueLu_TentativePFactory.hpp"
24#include "MueLu_IteratorOps.hpp"
25#include "MueLu_Utilities.hpp"
26
27#include <sstream>
28
29namespace MueLu {
30
31template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
33
34template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
36
37template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
39 RCP<ParameterList> validParamList = rcp(new ParameterList());
40
41#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
42 SET_VALID_ENTRY("sa: damping factor");
43 SET_VALID_ENTRY("sa: nodal damping factor");
44 SET_VALID_ENTRY("sa: calculate eigenvalue estimate");
45 SET_VALID_ENTRY("sa: eigenvalue estimate num iterations");
46 SET_VALID_ENTRY("sa: use rowsumabs diagonal scaling");
47 SET_VALID_ENTRY("sa: enforce constraints");
48 SET_VALID_ENTRY("tentative: calculate qr");
49 SET_VALID_ENTRY("sa: max eigenvalue");
50 SET_VALID_ENTRY("sa: diagonal replacement tolerance");
51 SET_VALID_ENTRY("sa: rowsumabs diagonal replacement tolerance");
52 SET_VALID_ENTRY("sa: rowsumabs diagonal replacement value");
53 SET_VALID_ENTRY("sa: rowsumabs replace single entry row with zero");
54 SET_VALID_ENTRY("sa: rowsumabs use automatic diagonal tolerance");
55 SET_VALID_ENTRY("use kokkos refactor");
56#undef SET_VALID_ENTRY
57 validParamList->set("sa: maxwell1 smoothing", false);
58
59 validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A used during the prolongator smoothing process");
60 validParamList->set<RCP<const FactoryBase>>("P", Teuchos::null, "Tentative prolongator factory");
61
62 validParamList->set<RCP<const FactoryBase>>("CurlCurl", Teuchos::null, "Generating factory of the matrix CurlCurl used during the prolongator smoothing process for Maxwell1");
63
64 // Make sure we don't recursively validate options for the matrixmatrix kernels
65 ParameterList norecurse;
66 norecurse.disableRecursiveValidation();
67 validParamList->set<ParameterList>("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
68
69 return validParamList;
70}
71
72template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
74 Input(fineLevel, "A");
75
76 // Get default tentative prolongator factory
77 // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
78 RCP<const FactoryBase> initialPFact = GetFactory("P");
79 if (initialPFact == Teuchos::null) {
80 initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent");
81 }
82 coarseLevel.DeclareInput("P", initialPFact.get(), this);
83
84 const ParameterList& pL = GetParameterList();
85 if (pL.get<bool>("sa: maxwell1 smoothing") && pL.get<double>("sa: damping factor") != 0.0)
86 fineLevel.DeclareInput("CurlCurl", GetFactory("CurlCurl").get(), this);
87}
88
89template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
91 return BuildP(fineLevel, coarseLevel);
92}
93
94template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
96 FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel);
97
98 std::string levelIDs = toString(coarseLevel.GetLevelID());
99
100 const std::string prefix = "MueLu::SaPFactory(" + levelIDs + "): ";
101
102 typedef typename Teuchos::ScalarTraits<SC>::coordinateType Coordinate;
103 typedef typename Teuchos::ScalarTraits<SC>::magnitudeType MT;
104
105 // Get default tentative prolongator factory
106 // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
107 // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere!
108 RCP<const FactoryBase> initialPFact = GetFactory("P");
109 if (initialPFact == Teuchos::null) {
110 initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent");
111 }
112 const ParameterList& pL = GetParameterList();
113
114 // Level Get
115 RCP<Matrix> A = Get<RCP<Matrix>>(fineLevel, "A");
116 RCP<Matrix> Ptent = coarseLevel.Get<RCP<Matrix>>("P", initialPFact.get());
117 RCP<Matrix> finalP;
118 // If Tentative facctory bailed out (e.g., number of global aggregates is 0), then SaPFactory bails
119 // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
120 if (Ptent == Teuchos::null) {
121 finalP = Teuchos::null;
122 Set(coarseLevel, "P", finalP);
123 return;
124 }
125
126 if (restrictionMode_) {
127 SubFactoryMonitor m2(*this, "Transpose A", coarseLevel);
128 A = Utilities::Transpose(*A, true); // build transpose of A explicitly
129 }
130
131 // Build final prolongator
132
133 // Reuse pattern if available
134 RCP<ParameterList> APparams;
135 if (pL.isSublist("matrixmatrix: kernel params"))
136 APparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
137 else
138 APparams = rcp(new ParameterList);
139 if (coarseLevel.IsAvailable("AP reuse data", this)) {
140 GetOStream(static_cast<MsgType>(Runtime0 | Test)) << "Reusing previous AP data" << std::endl;
141
142 APparams = coarseLevel.Get<RCP<ParameterList>>("AP reuse data", this);
143
144 if (APparams->isParameter("graph"))
145 finalP = APparams->get<RCP<Matrix>>("graph");
146 }
147 // By default, we don't need global constants for SaP
148 APparams->set("compute global constants: temporaries", APparams->get("compute global constants: temporaries", false));
149 APparams->set("compute global constants", APparams->get("compute global constants", false));
150
151 const SC dampingFactor = as<SC>(pL.get<double>("sa: damping factor"));
152 const LO maxEigenIterations = as<LO>(pL.get<int>("sa: eigenvalue estimate num iterations"));
153 const bool estimateMaxEigen = pL.get<bool>("sa: calculate eigenvalue estimate");
154 const bool useAbsValueRowSum = pL.get<bool>("sa: use rowsumabs diagonal scaling");
155 const bool doQRStep = pL.get<bool>("tentative: calculate qr");
156 const bool enforceConstraints = pL.get<bool>("sa: enforce constraints");
157 const MT userDefinedMaxEigen = as<MT>(pL.get<double>("sa: max eigenvalue"));
158 double dTol = pL.get<double>("sa: diagonal replacement tolerance");
159 double dTol_rs = pL.get<double>("sa: rowsumabs diagonal replacement tolerance");
160 const MT diagonalReplacementTolerance = (dTol == as<double>(-1) ? Teuchos::ScalarTraits<MT>::eps() * 100 : as<MT>(pL.get<double>("sa: diagonal replacement tolerance")));
161 const MT diagonalReplacementTolerance_rs = (dTol_rs == as<double>(-1) ? Teuchos::ScalarTraits<MT>::eps() * 100 : as<MT>(pL.get<double>("sa: rowsumabs diagonal replacement tolerance")));
162
163 const SC diagonalReplacementValue = as<SC>(pL.get<double>("sa: rowsumabs diagonal replacement value"));
164 const bool replaceSingleEntryRowWithZero = pL.get<bool>("sa: rowsumabs replace single entry row with zero");
165 const bool useAutomaticDiagTol = pL.get<bool>("sa: rowsumabs use automatic diagonal tolerance");
166
167 // Sanity checking
168 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && enforceConstraints, Exceptions::RuntimeError,
169 "MueLu::TentativePFactory::MakeTentative: cannot use 'enforce constraints' and 'calculate qr' at the same time");
170
171 // Are we using the special prolongator smoothing for smoothed Reitzinger-Schoeberl?
172 const bool specialMaxwell1Smoothing = pL.get<bool>("sa: maxwell1 smoothing");
173
174 if (!specialMaxwell1Smoothing) {
175 if (dampingFactor != Teuchos::ScalarTraits<SC>::zero()) {
176 Scalar lambdaMax;
177 Teuchos::RCP<Vector> invDiag;
178 if (userDefinedMaxEigen == -1.) {
179 SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
180 lambdaMax = A->GetMaxEigenvalueEstimate();
181 if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
182 GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = " << maxEigenIterations << ((useAbsValueRowSum) ? ", use rowSumAbs diagonal)" : ", use point diagonal)") << std::endl;
183 Coordinate stopTol = 1e-4;
184 if (useAbsValueRowSum) {
185 const bool returnReciprocal = true;
186 invDiag = Utilities::GetLumpedMatrixDiagonal(*A, returnReciprocal,
187 diagonalReplacementTolerance_rs,
188 diagonalReplacementValue,
189 replaceSingleEntryRowWithZero,
190 useAutomaticDiagTol);
191 TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError,
192 "SaPFactory: eigenvalue estimate: diagonal reciprocal is null.");
193 lambdaMax = Utilities::PowerMethod(*A, invDiag, maxEigenIterations, stopTol);
194 } else
195 lambdaMax = Utilities::PowerMethod(*A, true, maxEigenIterations, stopTol, diagonalReplacementTolerance);
196 A->SetMaxEigenvalueEstimate(lambdaMax);
197 } else {
198 GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl;
199 }
200 } else {
201 lambdaMax = userDefinedMaxEigen;
202 A->SetMaxEigenvalueEstimate(lambdaMax);
203 }
204 MT omega = Teuchos::ScalarTraits<SC>::magnitude(dampingFactor) / Teuchos::ScalarTraits<SC>::magnitude(lambdaMax);
205 GetOStream(Statistics1) << "Prolongator damping factor = " << omega << " (|" << dampingFactor << " / " << lambdaMax << "|)" << std::endl;
206
207 {
208 SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel);
209 if (!useAbsValueRowSum)
210 invDiag = Utilities::GetMatrixDiagonalInverse(*A); // default
211 else if (invDiag == Teuchos::null) {
212 GetOStream(Runtime0) << "Using rowsumabs diagonal" << std::endl;
213 const bool returnReciprocal = true;
214 invDiag = Utilities::GetLumpedMatrixDiagonal(*A, returnReciprocal,
215 diagonalReplacementTolerance_rs,
216 diagonalReplacementValue,
217 replaceSingleEntryRowWithZero,
218 useAutomaticDiagTol);
219 TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError, "SaPFactory: diagonal reciprocal is null.");
220 }
221
222 TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(omega), Exceptions::RuntimeError, "Prolongator damping factor needs to be finite.");
223
224 {
225 SubFactoryMonitor m3(*this, "MueLu::IteratorOps::Jacobi", coarseLevel);
226 // finalP = (I - \omega D^{-1}A) Ptent
227 finalP = MueLu::IteratorOps<SC, LO, GO, NO>::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(Statistics2), std::string("MueLu::SaP-") + toString(coarseLevel.GetLevelID()), APparams);
228 if (enforceConstraints) {
229 if (!pL.get<bool>("use kokkos refactor")) {
230 if (A->GetFixedBlockSize() == 1)
231 optimalSatisfyPConstraintsForScalarPDEsNonKokkos(finalP);
232 else
233 SatisfyPConstraintsNonKokkos(A, finalP);
234 } else {
235 // if (A->GetFixedBlockSize() == 1)
236 // optimalSatisfyPConstraintsForScalarPDEs(finalP);
237 // else
238 SatisfyPConstraints(A, finalP);
239 }
240 }
241 }
242 }
243
244 } else {
245 finalP = Ptent;
246 }
247 } else {
248 // We are on the (1,1) hierarchy for Maxwell1.
249 // Instead of smoothing with I - omega * Dinv(A) * A, we are smoothing with
250 // I - beta * D0 * Dinv(NodeMatrix) * D0^T * A
251 // I - alpha * Dinv(CurlCurl) * CurlCurl, and
252 // where beta matches the damping that was used for the nodal operator.
253
254 const SC nodalDampingFactor = as<SC>(pL.get<double>("sa: nodal damping factor"));
255
256 const bool nodalAndEdgeSmoothing = ((fineLevel.IsAvailable("D0") && fineLevel.IsAvailable("NodeMatrix")) && (nodalDampingFactor != Teuchos::ScalarTraits<SC>::zero()) && (fineLevel.Get<RCP<Matrix>>("NodeMatrix")->GetMaxEigenvalueEstimate() != -Teuchos::ScalarTraits<SC>::one()));
257 const bool edgeSmoothing = (fineLevel.IsAvailable("CurlCurl") && (dampingFactor != Teuchos::ScalarTraits<SC>::zero()));
258
259 RCP<Matrix> P_afterNodalAndEdgeSmoothing;
260 if (nodalAndEdgeSmoothing) {
261 auto D0 = fineLevel.Get<RCP<Matrix>>("D0");
262 auto NodeMatrix = fineLevel.Get<RCP<Matrix>>("NodeMatrix");
263
264 auto lambdaMaxNodal = NodeMatrix->GetMaxEigenvalueEstimate();
265 MT beta = Teuchos::ScalarTraits<SC>::magnitude(nodalDampingFactor) / Teuchos::ScalarTraits<SC>::magnitude(lambdaMaxNodal);
266 GetOStream(Statistics1) << "Maxwell1 prolongator nodal damping factor = " << beta << " (|" << nodalDampingFactor << " / " << lambdaMaxNodal << "|)" << std::endl;
267 TEUCHOS_TEST_FOR_EXCEPTION(lambdaMaxNodal == -Teuchos::ScalarTraits<SC>::one(), Exceptions::RuntimeError, "Prolongator damping factor obtained from NodeMatrix is -1.")
268
269 TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(beta), Exceptions::RuntimeError, "Prolongator damping factor needs to be finite.");
270
271 {
272 SubFactoryMonitor m3(*this, "MueLu::IteratorOps::JacobiMaxwell1", coarseLevel);
273 P_afterNodalAndEdgeSmoothing = JacobiMaxwell1(A, NodeMatrix, D0, Ptent, beta);
274 }
275 } else {
276 if (!fineLevel.IsAvailable("D0") || !fineLevel.IsAvailable("NodeMatrix"))
277 GetOStream(Warnings) << "Skipping nodal&edge smoothing step since D0 or NodeMatrix matrix is not available\n";
278 else if (nodalDampingFactor == Teuchos::ScalarTraits<SC>::zero())
279 GetOStream(Statistics1) << "Skipping nodal&edge smoothing step since nodal damping factor is zero\n";
280 else if (fineLevel.Get<RCP<Matrix>>("NodeMatrix")->GetMaxEigenvalueEstimate() == -Teuchos::ScalarTraits<SC>::one())
281 GetOStream(Warnings) << "Skipping nodal&edge smoothing step since NodeMatrix matrix has invalid eigenvalue estimate\n";
282 P_afterNodalAndEdgeSmoothing = Ptent;
283 }
284
285 if (edgeSmoothing) {
286 auto CurlCurl = fineLevel.Get<RCP<Matrix>>("CurlCurl", GetFactory("CurlCurl").get());
287
288 Scalar lambdaMax;
289 Teuchos::RCP<Vector> invDiag;
290 if (userDefinedMaxEigen == -1.) {
291 SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
292 lambdaMax = CurlCurl->GetMaxEigenvalueEstimate();
293 if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
294 GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = " << maxEigenIterations << ((useAbsValueRowSum) ? ", use rowSumAbs diagonal)" : ", use point diagonal)") << std::endl;
295 Coordinate stopTol = 1e-4;
296 if (useAbsValueRowSum) {
297 const bool returnReciprocal = true;
298 invDiag = Utilities::GetLumpedMatrixDiagonal(*CurlCurl, returnReciprocal,
299 diagonalReplacementTolerance_rs,
300 diagonalReplacementValue,
301 replaceSingleEntryRowWithZero,
302 useAutomaticDiagTol);
303 TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError,
304 "SaPFactory: eigenvalue estimate: diagonal reciprocal is null.");
305 lambdaMax = Utilities::PowerMethod(*CurlCurl, invDiag, maxEigenIterations, stopTol);
306 } else
307 lambdaMax = Utilities::PowerMethod(*CurlCurl, true, maxEigenIterations, stopTol, diagonalReplacementTolerance);
308 CurlCurl->SetMaxEigenvalueEstimate(lambdaMax);
309 } else {
310 GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl;
311 }
312 } else {
313 lambdaMax = userDefinedMaxEigen;
314 CurlCurl->SetMaxEigenvalueEstimate(lambdaMax);
315 }
316
317 if (!useAbsValueRowSum)
318 invDiag = Utilities::GetMatrixDiagonalInverse(*CurlCurl); // default
319 else if (invDiag == Teuchos::null) {
320 GetOStream(Runtime0) << "Using rowsumabs diagonal" << std::endl;
321 const bool returnReciprocal = true;
322 invDiag = Utilities::GetLumpedMatrixDiagonal(*CurlCurl, returnReciprocal,
323 diagonalReplacementTolerance_rs,
324 diagonalReplacementValue,
325 replaceSingleEntryRowWithZero,
326 useAutomaticDiagTol);
327 TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError, "SaPFactory: diagonal reciprocal is null.");
328 }
329
330 MT alpha = Teuchos::ScalarTraits<SC>::magnitude(dampingFactor) / Teuchos::ScalarTraits<SC>::magnitude(lambdaMax);
331 TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(alpha), Exceptions::RuntimeError, "Prolongator damping factor needs to be finite.");
332
333 GetOStream(Statistics1) << "Maxwell1 prolongator edge damping factor = " << alpha << " (|" << dampingFactor << " / " << lambdaMax << "|)" << std::endl;
334
335 {
336 SubFactoryMonitor m3(*this, "MueLu::IteratorOps::Jacobi", coarseLevel);
337 // finalP = (I - \omega D^{-1}CurlCurl) P_afterNodalAndEdgeSmoothing
338 finalP = MueLu::IteratorOps<SC, LO, GO, NO>::Jacobi(alpha, *invDiag, *CurlCurl, *P_afterNodalAndEdgeSmoothing, finalP, GetOStream(Statistics2), std::string("MueLu::SaP-") + toString(coarseLevel.GetLevelID()), APparams);
339 }
340 } else {
341 if (!fineLevel.IsAvailable("CurlCurl") && (dampingFactor != Teuchos::ScalarTraits<SC>::zero()))
342 GetOStream(Warnings) << "Skipping edge-only smoothing step since CurlCurl matrix is not available\n";
343 else
344 GetOStream(Statistics1) << "Maxwell1 prolongator edge smoothing skipped" << std::endl;
345 finalP = P_afterNodalAndEdgeSmoothing;
346 }
347 }
348
349 // Level Set
350 RCP<Matrix> R;
351 if (!restrictionMode_) {
352 // prolongation factory is in prolongation mode
353 if (!finalP.is_null()) {
354 std::ostringstream oss;
355 oss << "P_" << coarseLevel.GetLevelID();
356 finalP->setObjectLabel(oss.str());
357 }
358 Set(coarseLevel, "P", finalP);
359
360 APparams->set("graph", finalP);
361 Set(coarseLevel, "AP reuse data", APparams);
362
363 // NOTE: EXPERIMENTAL
364 if (Ptent->IsView("stridedMaps"))
365 finalP->CreateView("stridedMaps", Ptent);
366
367 } else {
368 // prolongation factory is in restriction mode
369 {
370 SubFactoryMonitor m2(*this, "Transpose P", coarseLevel);
371 R = Utilities::Transpose(*finalP, true);
372 if (!R.is_null()) {
373 std::ostringstream oss;
374 oss << "R_" << coarseLevel.GetLevelID();
375 R->setObjectLabel(oss.str());
376 }
377 }
378
379 Set(coarseLevel, "R", R);
380
381 // NOTE: EXPERIMENTAL
382 if (Ptent->IsView("stridedMaps"))
383 R->CreateView("stridedMaps", Ptent, true /*transposeA*/);
384 }
385
386 if (IsPrint(Statistics2)) {
387 RCP<ParameterList> params = rcp(new ParameterList());
388 params->set("printLoadBalancingInfo", true);
389 params->set("printCommInfo", true);
390 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo((!restrictionMode_ ? *finalP : *R), (!restrictionMode_ ? "P" : "R"), params);
391 }
392
393} // Build()
394
395// Analyze the grid transfer produced by smoothed aggregation and make
396// modifications if it does not look right. In particular, if there are
397// negative entries or entries larger than 1, modify P's rows.
398//
399// Note: this kind of evaluation probably only makes sense if not doing QR
400// when constructing tentative P.
401//
402// For entries that do not satisfy the two constraints (>= 0 or <=1) we set
403// these entries to the constraint value and modify the rest of the row
404// so that the row sum remains the same as before by adding an equal
405// amount to each remaining entry. However, if the original row sum value
406// violates the constraints, we set the row sum back to 1 (the row sum of
407// tentative P). After doing the modification to a row, we need to check
408// again the entire row to make sure that the modified row does not violate
409// the constraints.
410
411template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
413 const Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
414 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
415 LO nPDEs = A->GetFixedBlockSize();
416 Teuchos::ArrayRCP<Scalar> ConstraintViolationSum(nPDEs);
417 Teuchos::ArrayRCP<Scalar> Rsum(nPDEs);
418 Teuchos::ArrayRCP<size_t> nPositive(nPDEs);
419 for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum[k] = zero;
420 for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum[k] = zero;
421 for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive[k] = 0;
422
423 for (size_t i = 0; i < as<size_t>(P->getRowMap()->getLocalNumElements()); i++) {
424 Teuchos::ArrayView<const LocalOrdinal> indices;
425 Teuchos::ArrayView<const Scalar> vals1;
426 Teuchos::ArrayView<Scalar> vals;
427 P->getLocalRowView((LO)i, indices, vals1);
428 size_t nnz = indices.size();
429 if (nnz == 0) continue;
430
431 vals = ArrayView<Scalar>(const_cast<SC*>(vals1.getRawPtr()), nnz);
432
433 bool checkRow = true;
434
435 while (checkRow) {
436 // check constraints and compute the row sum
437
438 for (LO j = 0; j < indices.size(); j++) {
439 Rsum[j % nPDEs] += vals[j];
440 if (Teuchos::ScalarTraits<SC>::real(vals[j]) < Teuchos::ScalarTraits<SC>::real(zero)) {
441 ConstraintViolationSum[j % nPDEs] += vals[j];
442 vals[j] = zero;
443 } else {
444 if (Teuchos::ScalarTraits<SC>::real(vals[j]) != Teuchos::ScalarTraits<SC>::real(zero))
445 (nPositive[j % nPDEs])++;
446
447 if (Teuchos::ScalarTraits<SC>::real(vals[j]) > Teuchos::ScalarTraits<SC>::real(1.00001)) {
448 ConstraintViolationSum[j % nPDEs] += (vals[j] - one);
449 vals[j] = one;
450 }
451 }
452 }
453
454 checkRow = false;
455
456 // take into account any row sum that violates the contraints
457
458 for (size_t k = 0; k < (size_t)nPDEs; k++) {
459 if (Teuchos::ScalarTraits<SC>::real(Rsum[k]) < Teuchos::ScalarTraits<SC>::magnitude(zero)) {
460 ConstraintViolationSum[k] += (-Rsum[k]); // rstumin
461 } else if (Teuchos::ScalarTraits<SC>::real(Rsum[k]) > Teuchos::ScalarTraits<SC>::magnitude(1.00001)) {
462 ConstraintViolationSum[k] += (one - Rsum[k]); // rstumin
463 }
464 }
465
466 // check if row need modification
467 for (size_t k = 0; k < (size_t)nPDEs; k++) {
468 if (Teuchos::ScalarTraits<SC>::magnitude(ConstraintViolationSum[k]) != Teuchos::ScalarTraits<SC>::magnitude(zero))
469 checkRow = true;
470 }
471 // modify row
472 if (checkRow) {
473 for (LO j = 0; j < indices.size(); j++) {
474 if (Teuchos::ScalarTraits<SC>::real(vals[j]) > Teuchos::ScalarTraits<SC>::real(zero)) {
475 vals[j] += (ConstraintViolationSum[j % nPDEs] / (as<Scalar>(nPositive[j % nPDEs])));
476 }
477 }
478 for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum[k] = zero;
479 }
480 for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum[k] = zero;
481 for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive[k] = 0;
482 } // while (checkRow) ...
483 } // for (size_t i = 0; i < as<size_t>(P->getRowMap()->getNumNodeElements()); i++) ...
484} // SatsifyPConstraints()
485
486template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
488 const Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
489 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
490
491 LocalOrdinal maxEntriesPerRow = 100; // increased later if needed
492 Teuchos::ArrayRCP<Scalar> scalarData(3 * maxEntriesPerRow);
493 bool hasFeasible;
494
495 for (size_t i = 0; i < as<size_t>(P->getRowMap()->getLocalNumElements()); i++) {
496 Teuchos::ArrayView<const LocalOrdinal> indices;
497 Teuchos::ArrayView<const Scalar> vals1;
498 Teuchos::ArrayView<Scalar> vals;
499 P->getLocalRowView((LocalOrdinal)i, indices, vals1);
500 size_t nnz = indices.size();
501 if (nnz != 0) {
502 vals = ArrayView<Scalar>(const_cast<SC*>(vals1.getRawPtr()), nnz);
503 Scalar rsumTarget = zero;
504 for (size_t j = 0; j < nnz; j++) rsumTarget += vals[j];
505
506 if (nnz > as<size_t>(maxEntriesPerRow)) {
507 maxEntriesPerRow = nnz * 3;
508 scalarData.resize(3 * maxEntriesPerRow);
509 }
510 hasFeasible = constrainRow(vals.getRawPtr(), as<LocalOrdinal>(nnz), zero, one, rsumTarget, vals.getRawPtr(), scalarData.getRawPtr());
511
512 if (!hasFeasible) { // just set all entries to the same value giving a row sum of 1
513 for (size_t j = 0; j < nnz; j++) vals[j] = one / as<Scalar>(nnz);
514 }
515 }
516
517 } // for (size_t i = 0; i < as<size_t>(P->getRowMap()->getNumNodeElements()); i++) ...
518} // SatsifyPConstraints()
519
520template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
521bool SaPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::constrainRow(Scalar* orig, LocalOrdinal nEntries, Scalar leftBound, Scalar rghtBound, Scalar rsumTarget, Scalar* fixedUnsorted, Scalar* scalarData) const {
522 /*
523 Input
524 orig data that should be adjusted to satisfy bound constraints and
525 row sum constraint. orig is not modified by this function.
526
527 nEntries length or 'orig'
528
529 leftBound, define bound constraints for the results.
530 rghtBound
531
532 rsumTarget defines an equality constraint for the row sum
533
534 fixedUnsorted on output, if a feasible solutuion exists then
535 || orig - fixedUnsorted || = min when also
536 leftBound <= fixedUnsorted[i] <= rghtBound for all i
537 and sum(fixedUnsorted) = rsumTarget.
538
539 Note: it is possible to use the same pointer for
540 fixedUnsorted and orig. In this case, orig gets
541 overwritten with the new constraint satisfying values.
542
543 scalarData a work array that should be 3x nEntries.
544
545 On return constrain() indicates whether or not a feasible solution exists.
546 */
547
548 /*
549 Given a sequence of numbers o1 ... on, fix these so that they are as
550 close as possible to the original but satisfy bound constraints and also
551 have the same row sum as the oi's. If we know who is going to lie on a
552 bound, then the "best" answer (i.e., || o - f ||_2 = min) perturbs
553 each element that doesn't lie on a bound by the same amount.
554
555 We can represent the oi's by considering scattered points on a number line
556
557 | |
558 | |
559 o o o | o o o o o o |o o
560 | |
561
562 \____/ \____/
563 <---- <----
564 delta delta
565
566 Bounds are shown by vertical lines. The fi's must lie within the bounds, so
567 the 3 leftmost points must be shifted to the right and the 2 rightmost must
568 be shifted to the left. If these fi points are all shifted to the bounds
569 while the others remain in the same location, the row sum constraint is
570 likely not satisfied and so more shifting is necessary. In the figure, the f
571 rowsum is too large and so there must be more shifting to the left.
572
573 To minimize || o - f ||_2, we basically shift all "interiors" by the same
574 amount, denoted delta. The only trick is that some points near bounds are
575 still affected by the bounds and so these points might be shifted more or less
576 than delta. In the example,t he 3 rightmost points are shifted in the opposite
577 direction as delta to the bound. The 4th point is shifted by something less
578 than delta so it does not violate the lower bound. The rightmost point is
579 shifted to the bound by some amount larger than delta. However, the 2nd point
580 is shifted by delta (i.e., it lies inside the two bounds).
581
582 If we know delta, we can figure out everything. If we know which points
583 are special (not shifted by delta), we can also figure out everything.
584 The problem is these two things (delta and the special points) are
585 inter-connected. An algorithm for computing follows.
586
587 1) move exterior points to the bounds and compute how much the row sum is off
588 (rowSumDeviation). We assume now that the new row sum is high, so interior
589 points must be shifted left.
590
591 2) Mark closest point just left of the leftmost bound, closestToLeftBound,
592 and compute its distance to the leftmost bound. Mark closest point to the
593 left of the rightmost bound, closestToRghtBound, and compute its distance to
594 right bound. There are two cases to consider.
595
596 3) Case 1: closestToLeftBound is closer than closestToRghtBound.
597 Assume that shifting by delta does not move closestToLeftBound past the
598 left bound. This means that it will be shifted by delta. However,
599 closestToRghtBound will be shifted by more than delta. So the total
600 number of points shifted by delta (|interiorToBounds|) includes
601 closestToLeftBound up to and including the point just to the left of
602 closestToRghtBound. So
603
604 delta = rowSumDeviation/ |interiorToBounds| .
605
606 Recall that rowSumDeviation already accounts for the non-delta shift of
607 of closestToRightBound. Now check whether our assumption is valid.
608
609 If delta <= closestToLeftBoundDist, assumption is true so delta can be
610 applied to interiorToBounds ... and we are done.
611 Else assumption is false. Shift closestToLeftBound to the left bound.
612 Update rowSumDeviation, interiorToBounds, and identify new
613 closestToLeftBound. Repeat step 3).
614
615 Case 2: closestToRghtBound is closer than closestToLeftBound.
616 Assume that shifting by delta does not move closestToRghtBound past right
617 bound. This means that it must be shifted by more than delta to right
618 bound. So the total number of points shifted by delta again includes
619 closestToLeftBound up to and including the point just to the left of
620 closestToRghtBound. So again compute
621
622 delta = rowSumDeviation/ |interiorToBounds| .
623
624 If delta <= closestToRghtBoundDist, assumption is true so delta is
625 can be applied to interiorToBounds ... and we are done
626 Else assumption is false. Put closestToRghtBound in the
627 interiorToBounds set. Remove it's contribution to rowSumDeviation,
628 identify new closestToRghtBound. Repeat step 3)
629
630
631 To implement, sort the oi's so things like closestToLeftBound is just index
632 into sorted array. Updaing closestToLeftBound or closestToRghtBound requires
633 increment by 1. |interiorToBounds|= closestToRghtBound - closestToLeftBound
634 To handle the case when the rowsum is low (requiring right interior shifts),
635 just flip the signs on data and use the left-shift code (and then flip back
636 before exiting function.
637 */
638 bool hasFeasibleSol;
639 Scalar notFlippedLeftBound, notFlippedRghtBound, aBigNumber, *origSorted;
640 Scalar rowSumDeviation, temp, *fixedSorted, delta;
641 Scalar closestToLeftBoundDist, closestToRghtBoundDist;
642 LocalOrdinal closestToLeftBound, closestToRghtBound;
643 LocalOrdinal* inds;
644 bool flipped;
645
646 notFlippedLeftBound = leftBound;
647 notFlippedRghtBound = rghtBound;
648
649 if ((Teuchos::ScalarTraits<SC>::real(rsumTarget) >= Teuchos::ScalarTraits<SC>::real(leftBound * as<Scalar>(nEntries))) &&
650 (Teuchos::ScalarTraits<SC>::real(rsumTarget) <= Teuchos::ScalarTraits<SC>::real(rghtBound * as<Scalar>(nEntries))))
651 hasFeasibleSol = true;
652 else {
653 hasFeasibleSol = false;
654 return hasFeasibleSol;
655 }
656 flipped = false;
657 // compute aBigNumber to handle some corner cases where we need
658 // something large so that an if statement will be false
659 aBigNumber = Teuchos::ScalarTraits<SC>::zero();
660 for (LocalOrdinal i = 0; i < nEntries; i++) {
661 if (Teuchos::ScalarTraits<SC>::magnitude(orig[i]) > Teuchos::ScalarTraits<SC>::magnitude(aBigNumber))
662 aBigNumber = Teuchos::ScalarTraits<SC>::magnitude(orig[i]);
663 }
664 aBigNumber = aBigNumber + (Teuchos::ScalarTraits<SC>::magnitude(leftBound) + Teuchos::ScalarTraits<SC>::magnitude(rghtBound)) * as<Scalar>(100.0);
665
666 origSorted = &scalarData[0];
667 fixedSorted = &(scalarData[nEntries]);
668 inds = (LocalOrdinal*)&(scalarData[2 * nEntries]);
669
670 for (LocalOrdinal i = 0; i < nEntries; i++) inds[i] = i;
671 for (LocalOrdinal i = 0; i < nEntries; i++) origSorted[i] = orig[i]; /* orig no longer used */
672
673 // sort so that orig[inds] is sorted.
674 std::sort(inds, inds + nEntries,
675 [origSorted](LocalOrdinal leftIndex, LocalOrdinal rightIndex) { return Teuchos::ScalarTraits<SC>::real(origSorted[leftIndex]) < Teuchos::ScalarTraits<SC>::real(origSorted[rightIndex]); });
676
677 for (LocalOrdinal i = 0; i < nEntries; i++) origSorted[i] = orig[inds[i]];
678 // find entry in origSorted just to the right of the leftBound
679 closestToLeftBound = 0;
680 while ((closestToLeftBound < nEntries) && (Teuchos::ScalarTraits<SC>::real(origSorted[closestToLeftBound]) <= Teuchos::ScalarTraits<SC>::real(leftBound))) closestToLeftBound++;
681
682 // find entry in origSorted just to the right of the rghtBound
683 closestToRghtBound = closestToLeftBound;
684 while ((closestToRghtBound < nEntries) && (Teuchos::ScalarTraits<SC>::real(origSorted[closestToRghtBound]) <= Teuchos::ScalarTraits<SC>::real(rghtBound))) closestToRghtBound++;
685
686 // compute distance between closestToLeftBound and the left bound and the
687 // distance between closestToRghtBound and the right bound.
688
689 closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
690 if (closestToRghtBound == nEntries)
691 closestToRghtBoundDist = aBigNumber;
692 else
693 closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
694
695 // compute how far the rowSum is off from the target row sum taking into account
696 // numbers that have been shifted to satisfy bound constraint
697
698 rowSumDeviation = leftBound * as<Scalar>(closestToLeftBound) + as<Scalar>((nEntries - closestToRghtBound)) * rghtBound - rsumTarget;
699 for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation += origSorted[i];
700
701 // the code that follow after this if statement assumes that rowSumDeviation is positive. If this
702 // is not the case, flip the signs of everything so that rowSumDeviation is now positive.
703 // Later we will flip the data back to its original form.
704 if (Teuchos::ScalarTraits<SC>::real(rowSumDeviation) < Teuchos::ScalarTraits<SC>::real(Teuchos::ScalarTraits<SC>::zero())) {
705 flipped = true;
706 temp = leftBound;
707 leftBound = -rghtBound;
708 rghtBound = temp;
709
710 /* flip sign of origSorted and reverse ordering so that the negative version is sorted */
711
712 if ((nEntries % 2) == 1) origSorted[(nEntries / 2)] = -origSorted[(nEntries / 2)];
713 for (LocalOrdinal i = 0; i < nEntries / 2; i++) {
714 temp = origSorted[i];
715 origSorted[i] = -origSorted[nEntries - 1 - i];
716 origSorted[nEntries - i - 1] = -temp;
717 }
718
719 /* reverse bounds */
720
721 LocalOrdinal itemp = closestToLeftBound;
722 closestToLeftBound = nEntries - closestToRghtBound;
723 closestToRghtBound = nEntries - itemp;
724 closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
725 if (closestToRghtBound == nEntries)
726 closestToRghtBoundDist = aBigNumber;
727 else
728 closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
729
730 rowSumDeviation = -rowSumDeviation;
731 }
732
733 // initial fixedSorted so bounds are satisfied and interiors correspond to origSorted
734
735 for (LocalOrdinal i = 0; i < closestToLeftBound; i++) fixedSorted[i] = leftBound;
736 for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i];
737 for (LocalOrdinal i = closestToRghtBound; i < nEntries; i++) fixedSorted[i] = rghtBound;
738
739 while ((Teuchos::ScalarTraits<SC>::magnitude(rowSumDeviation) > Teuchos::ScalarTraits<SC>::magnitude(as<Scalar>(1.e-10) * rsumTarget))) { // && ( (closestToLeftBound < nEntries ) || (closestToRghtBound < nEntries))) {
740 if (closestToRghtBound != closestToLeftBound)
741 delta = rowSumDeviation / as<Scalar>(closestToRghtBound - closestToLeftBound);
742 else
743 delta = aBigNumber;
744
745 if (Teuchos::ScalarTraits<SC>::magnitude(closestToLeftBoundDist) <= Teuchos::ScalarTraits<SC>::magnitude(closestToRghtBoundDist)) {
746 if (Teuchos::ScalarTraits<SC>::magnitude(delta) <= Teuchos::ScalarTraits<SC>::magnitude(closestToLeftBoundDist)) {
747 rowSumDeviation = Teuchos::ScalarTraits<SC>::zero();
748 for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i] - delta;
749 } else {
750 rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
751 fixedSorted[closestToLeftBound] = leftBound;
752 closestToLeftBound++;
753 if (closestToLeftBound < nEntries)
754 closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
755 else
756 closestToLeftBoundDist = aBigNumber;
757 }
758 } else {
759 if (Teuchos::ScalarTraits<SC>::magnitude(delta) <= Teuchos::ScalarTraits<SC>::magnitude(closestToRghtBoundDist)) {
760 rowSumDeviation = 0;
761 for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i] - delta;
762 } else {
763 rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
764 // if (closestToRghtBound < nEntries) {
765 fixedSorted[closestToRghtBound] = origSorted[closestToRghtBound];
766 closestToRghtBound++;
767 // }
768 if (closestToRghtBound >= nEntries)
769 closestToRghtBoundDist = aBigNumber;
770 else
771 closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
772 }
773 }
774 }
775
776 if (flipped) {
777 /* flip sign of fixedSorted and reverse ordering so that the positve version is sorted */
778
779 if ((nEntries % 2) == 1) fixedSorted[(nEntries / 2)] = -fixedSorted[(nEntries / 2)];
780 for (LocalOrdinal i = 0; i < nEntries / 2; i++) {
781 temp = fixedSorted[i];
782 fixedSorted[i] = -fixedSorted[nEntries - 1 - i];
783 fixedSorted[nEntries - i - 1] = -temp;
784 }
785 }
786 for (LocalOrdinal i = 0; i < nEntries; i++) fixedUnsorted[inds[i]] = fixedSorted[i];
787
788 /* check that no constraints are violated */
789
790 bool lowerViolation = false;
791 bool upperViolation = false;
792 bool sumViolation = false;
793 using TST = Teuchos::ScalarTraits<SC>;
794 temp = TST::zero();
795 for (LocalOrdinal i = 0; i < nEntries; i++) {
796 if (TST::real(fixedUnsorted[i]) < TST::real(notFlippedLeftBound)) lowerViolation = true;
797 if (TST::real(fixedUnsorted[i]) > TST::real(notFlippedRghtBound)) upperViolation = true;
798 temp += fixedUnsorted[i];
799 }
800 SC tol = as<Scalar>(std::max(1.0e-8, as<double>(100 * TST::eps())));
801 if (TST::magnitude(temp - rsumTarget) > TST::magnitude(tol * rsumTarget)) sumViolation = true;
802
803 TEUCHOS_TEST_FOR_EXCEPTION(lowerViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in a lower bound violation??? ");
804 TEUCHOS_TEST_FOR_EXCEPTION(upperViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in an upper bound violation??? ");
805 TEUCHOS_TEST_FOR_EXCEPTION(sumViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in a row sum violation??? ");
806
807 return hasFeasibleSol;
808}
809
810template <typename local_matrix_type>
812 using Scalar = typename local_matrix_type::non_const_value_type;
813 using SC = Scalar;
814 using LO = typename local_matrix_type::non_const_ordinal_type;
815 using Device = typename local_matrix_type::device_type;
816 using KAT = KokkosKernels::ArithTraits<SC>;
817 const Scalar zero = KAT::zero();
818 const Scalar one = KAT::one();
820 local_matrix_type localP;
821 Kokkos::View<SC**, Device> ConstraintViolationSum;
822 Kokkos::View<SC**, Device> Rsum;
823 Kokkos::View<size_t**, Device> nPositive;
824
825 constraintKernel(LO nPDEs_, local_matrix_type localP_)
826 : nPDEs(nPDEs_)
827 , localP(localP_) {
828 ConstraintViolationSum = Kokkos::View<SC**, Device>("ConstraintViolationSum", localP_.numRows(), nPDEs);
829 Rsum = Kokkos::View<SC**, Device>("Rsum", localP_.numRows(), nPDEs);
830 nPositive = Kokkos::View<size_t**, Device>("nPositive", localP_.numRows(), nPDEs);
831 }
832
833 KOKKOS_INLINE_FUNCTION
834 void operator()(const size_t rowIdx) const {
835 auto rowPtr = localP.graph.row_map;
836 auto values = localP.values;
837
838 bool checkRow = true;
839
840 if (rowPtr(rowIdx + 1) == rowPtr(rowIdx)) checkRow = false;
841
842 while (checkRow) {
843 // check constraints and compute the row sum
844
845 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
846 Rsum(rowIdx, entryIdx % nPDEs) += values(entryIdx);
847 if (KAT::real(values(entryIdx)) < KAT::real(zero)) {
848 ConstraintViolationSum(rowIdx, entryIdx % nPDEs) += values(entryIdx);
849 values(entryIdx) = zero;
850 } else {
851 if (KAT::real(values(entryIdx)) != KAT::real(zero))
852 nPositive(rowIdx, entryIdx % nPDEs) = nPositive(rowIdx, entryIdx % nPDEs) + 1;
853
854 if (KAT::real(values(entryIdx)) > KAT::real(1.00001)) {
855 ConstraintViolationSum(rowIdx, entryIdx % nPDEs) += (values(entryIdx) - one);
856 values(entryIdx) = one;
857 }
858 }
859 }
860
861 checkRow = false;
862
863 // take into account any row sum that violates the contraints
864
865 for (size_t k = 0; k < (size_t)nPDEs; k++) {
866 if (KAT::real(Rsum(rowIdx, k)) < KAT::magnitude(zero)) {
867 ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) - Rsum(rowIdx, k); // rstumin
868 } else if (KAT::real(Rsum(rowIdx, k)) > KAT::magnitude(1.00001)) {
869 ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) + (one - Rsum(rowIdx, k)); // rstumin
870 }
871 }
872
873 // check if row need modification
874 for (size_t k = 0; k < (size_t)nPDEs; k++) {
875 if (KAT::magnitude(ConstraintViolationSum(rowIdx, k)) != KAT::magnitude(zero))
876 checkRow = true;
877 }
878 // modify row
879 if (checkRow) {
880 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
881 if (KAT::real(values(entryIdx)) > KAT::real(zero)) {
882 values(entryIdx) = values(entryIdx) +
883 (ConstraintViolationSum(rowIdx, entryIdx % nPDEs) / (Scalar(nPositive(rowIdx, entryIdx % nPDEs)) != zero ? Scalar(nPositive(rowIdx, entryIdx % nPDEs)) : one));
884 }
885 }
886 for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum(rowIdx, k) = zero;
887 }
888 for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum(rowIdx, k) = zero;
889 for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive(rowIdx, k) = 0;
890 } // while (checkRow) ...
891 }
892};
893
894template <typename local_matrix_type>
896 using Scalar = typename local_matrix_type::non_const_value_type;
897 using SC = Scalar;
898 using LO = typename local_matrix_type::non_const_ordinal_type;
899 using Device = typename local_matrix_type::device_type;
900 using KAT = KokkosKernels::ArithTraits<SC>;
901 const Scalar zero = KAT::zero();
902 const Scalar one = KAT::one();
904 local_matrix_type localP;
905 Kokkos::View<SC**, Device> origSorted;
906 Kokkos::View<SC**, Device> fixedSorted;
907 Kokkos::View<LO**, Device> inds;
908
909 optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)
910 : nPDEs(nPDEs_)
911 , localP(localP_) {
912 origSorted = Kokkos::View<SC**, Device>("origSorted", localP_.numRows(), maxRowEntries_);
913 fixedSorted = Kokkos::View<SC**, Device>("fixedSorted", localP_.numRows(), maxRowEntries_);
914 inds = Kokkos::View<LO**, Device>("inds", localP_.numRows(), maxRowEntries_);
915 }
916
917 KOKKOS_INLINE_FUNCTION
918 void operator()(const size_t rowIdx) const {
919 auto rowPtr = localP.graph.row_map;
920 auto values = localP.values;
921
922 auto nnz = rowPtr(rowIdx + 1) - rowPtr(rowIdx);
923
924 if (nnz != 0) {
925 Scalar rsumTarget = zero;
926 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) rsumTarget += values(entryIdx);
927
928 {
929 SC aBigNumber;
930 SC rowSumDeviation, temp, delta;
931 SC closestToLeftBoundDist, closestToRghtBoundDist;
932 LO closestToLeftBound, closestToRghtBound;
933 bool flipped;
934
935 SC leftBound = zero;
936 SC rghtBound = one;
937 if ((KAT::real(rsumTarget) >= KAT::real(leftBound * (static_cast<SC>(nnz)))) &&
938 (KAT::real(rsumTarget) <= KAT::real(rghtBound * (static_cast<SC>(nnz))))) { // has Feasible solution
939
940 flipped = false;
941 // compute aBigNumber to handle some corner cases where we need
942 // something large so that an if statement will be false
943 aBigNumber = KAT::zero();
944 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
945 if (KAT::magnitude(values(entryIdx)) > KAT::magnitude(aBigNumber))
946 aBigNumber = KAT::magnitude(values(entryIdx));
947 }
948 aBigNumber = aBigNumber + (KAT::magnitude(leftBound) + KAT::magnitude(rghtBound)) * (100 * one);
949
950 LO ind = 0;
951 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
952 origSorted(rowIdx, ind) = values(entryIdx);
953 inds(rowIdx, ind) = ind;
954 ind++;
955 }
956
957 auto sortVals = Kokkos::subview(origSorted, rowIdx, Kokkos::ALL());
958 auto sortInds = Kokkos::subview(inds, rowIdx, Kokkos::make_pair(0, ind));
959 // Need permutation indices to sort row values from smallest to largest,
960 // and unsort once row constraints are applied.
961 // serial insertion sort workaround from https://github.com/kokkos/kokkos/issues/645
962 for (LO i = 1; i < static_cast<LO>(nnz); ++i) {
963 ind = sortInds(i);
964 LO j = i;
965
966 if (KAT::real(sortVals(sortInds(i))) < KAT::real(sortVals(sortInds(0)))) {
967 for (; j > 0; --j) sortInds(j) = sortInds(j - 1);
968
969 sortInds(0) = ind;
970 } else {
971 for (; KAT::real(sortVals(ind)) < KAT::real(sortVals(sortInds(j - 1))); --j) sortInds(j) = sortInds(j - 1);
972
973 sortInds(j) = ind;
974 }
975 }
976
977 for (LO i = 0; i < static_cast<LO>(nnz); i++) origSorted(rowIdx, i) = values(rowPtr(rowIdx) + inds(rowIdx, i)); // values is no longer used
978 // find entry in origSorted just to the right of the leftBound
979 closestToLeftBound = 0;
980 while ((closestToLeftBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToLeftBound)) <= KAT::real(leftBound))) closestToLeftBound++;
981
982 // find entry in origSorted just to the right of the rghtBound
983 closestToRghtBound = closestToLeftBound;
984 while ((closestToRghtBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToRghtBound)) <= KAT::real(rghtBound))) closestToRghtBound++;
985
986 // compute distance between closestToLeftBound and the left bound and the
987 // distance between closestToRghtBound and the right bound.
988
989 closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
990 if (closestToRghtBound == static_cast<LO>(nnz))
991 closestToRghtBoundDist = aBigNumber;
992 else
993 closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
994
995 // compute how far the rowSum is off from the target row sum taking into account
996 // numbers that have been shifted to satisfy bound constraint
997
998 rowSumDeviation = leftBound * (static_cast<SC>(closestToLeftBound)) + (static_cast<SC>(nnz - closestToRghtBound)) * rghtBound - rsumTarget;
999 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation += origSorted(rowIdx, i);
1000
1001 // the code that follow after this if statement assumes that rowSumDeviation is positive. If this
1002 // is not the case, flip the signs of everything so that rowSumDeviation is now positive.
1003 // Later we will flip the data back to its original form.
1004 if (KAT::real(rowSumDeviation) < KAT::real(KAT::zero())) {
1005 flipped = true;
1006 temp = leftBound;
1007 leftBound = -rghtBound;
1008 rghtBound = temp;
1009
1010 /* flip sign of origSorted and reverse ordering so that the negative version is sorted */
1011
1012 // TODO: the following is bad for GPU performance. Switch to bit shifting if brave.
1013 if ((nnz % 2) == 1) origSorted(rowIdx, (nnz / 2)) = -origSorted(rowIdx, (nnz / 2));
1014 for (LO i = 0; i < static_cast<LO>(nnz / 2); i++) {
1015 temp = origSorted(rowIdx, i);
1016 origSorted(rowIdx, i) = -origSorted(rowIdx, nnz - 1 - i);
1017 origSorted(rowIdx, nnz - i - 1) = -temp;
1018 }
1019
1020 /* reverse bounds */
1021
1022 LO itemp = closestToLeftBound;
1023 closestToLeftBound = nnz - closestToRghtBound;
1024 closestToRghtBound = nnz - itemp;
1025 closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
1026 if (closestToRghtBound == static_cast<LO>(nnz))
1027 closestToRghtBoundDist = aBigNumber;
1028 else
1029 closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
1030
1031 rowSumDeviation = -rowSumDeviation;
1032 }
1033
1034 // initial fixedSorted so bounds are satisfied and interiors correspond to origSorted
1035
1036 for (LO i = 0; i < closestToLeftBound; i++) fixedSorted(rowIdx, i) = leftBound;
1037 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i);
1038 for (LO i = closestToRghtBound; i < static_cast<LO>(nnz); i++) fixedSorted(rowIdx, i) = rghtBound;
1039
1040 while ((KAT::magnitude(rowSumDeviation) > KAT::magnitude((one * 1.e-10) * rsumTarget))) { // && ( (closestToLeftBound < nEntries ) || (closestToRghtBound < nEntries))) {
1041 if (closestToRghtBound != closestToLeftBound)
1042 delta = rowSumDeviation / static_cast<SC>(closestToRghtBound - closestToLeftBound);
1043 else
1044 delta = aBigNumber;
1045
1046 if (KAT::magnitude(closestToLeftBoundDist) <= KAT::magnitude(closestToRghtBoundDist)) {
1047 if (KAT::magnitude(delta) <= KAT::magnitude(closestToLeftBoundDist)) {
1048 rowSumDeviation = zero;
1049 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
1050 } else {
1051 rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
1052 fixedSorted(rowIdx, closestToLeftBound) = leftBound;
1053 closestToLeftBound++;
1054 if (closestToLeftBound < static_cast<LO>(nnz))
1055 closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
1056 else
1057 closestToLeftBoundDist = aBigNumber;
1058 }
1059 } else {
1060 if (KAT::magnitude(delta) <= KAT::magnitude(closestToRghtBoundDist)) {
1061 rowSumDeviation = 0;
1062 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
1063 } else {
1064 rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
1065 // if (closestToRghtBound < nEntries) {
1066 fixedSorted(rowIdx, closestToRghtBound) = origSorted(rowIdx, closestToRghtBound);
1067 closestToRghtBound++;
1068 // }
1069 if (closestToRghtBound >= static_cast<LO>(nnz))
1070 closestToRghtBoundDist = aBigNumber;
1071 else
1072 closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
1073 }
1074 }
1075 }
1076
1077 auto rowStart = rowPtr(rowIdx);
1078 if (flipped) {
1079 /* flip sign of fixedSorted and reverse ordering so that the positve version is sorted */
1080
1081 if ((nnz % 2) == 1) fixedSorted(rowIdx, (nnz / 2)) = -fixedSorted(rowIdx, (nnz / 2));
1082 for (LO i = 0; i < static_cast<LO>(nnz / 2); i++) {
1083 temp = fixedSorted(rowIdx, i);
1084 fixedSorted(rowIdx, i) = -fixedSorted(rowIdx, nnz - 1 - i);
1085 fixedSorted(rowIdx, nnz - i - 1) = -temp;
1086 }
1087 }
1088 // unsort and update row values with new values
1089 for (LO i = 0; i < static_cast<LO>(nnz); i++) values(rowStart + inds(rowIdx, i)) = fixedSorted(rowIdx, i);
1090
1091 } else { // row does not have feasible solution to match constraint
1092 // just set all entries to the same value giving a row sum of 1
1093 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) values(entryIdx) = one / (static_cast<SC>(nnz));
1094 }
1095 }
1096 }
1097 }
1098};
1099
1100template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1102 using Device = typename Matrix::local_matrix_device_type::device_type;
1103 LO nPDEs = A->GetFixedBlockSize();
1104
1105 using local_mat_type = typename Matrix::local_matrix_device_type;
1106 constraintKernel<local_mat_type> myKernel(nPDEs, P->getLocalMatrixDevice());
1107 Kokkos::parallel_for("enforce constraint", Kokkos::RangePolicy<typename Device::execution_space>(0, P->getRowMap()->getLocalNumElements()),
1108 myKernel);
1109
1110} // SatsifyPConstraints()
1111
1112template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1114 using Device = typename Matrix::local_matrix_device_type::device_type;
1115 LO nPDEs = 1; // A->GetFixedBlockSize();
1116
1117 using local_mat_type = typename Matrix::local_matrix_device_type;
1118 optimalSatisfyConstraintsForScalarPDEsKernel<local_mat_type> myKernel(nPDEs, P->getLocalMaxNumRowEntries(), P->getLocalMatrixDevice());
1119 Kokkos::parallel_for("enforce constraint", Kokkos::RangePolicy<typename Device::execution_space>(0, P->getLocalNumRows()),
1120 myKernel);
1121
1122} // SatsifyPConstraints()
1123
1124template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1125RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> SaPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::JacobiMaxwell1(const RCP<Matrix>& SM,
1126 const RCP<Matrix>& Kn,
1127 const RCP<Matrix>& D0,
1128 const RCP<Matrix>& Pe_tent,
1129 const Scalar beta) const {
1130 // Compute Pe := (I - \beta D0 * diag(Kn)^-1 * D0^T SM) * Pe_tent
1131 using XMM = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
1132 auto one = Teuchos::ScalarTraits<Scalar>::one();
1133
1134 auto diagKnInv = Utilities::GetMatrixDiagonalInverse(*Kn);
1135
1136 auto D0_diagKnInv = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BuildCopy(D0, Teuchos::Copy);
1137 D0_diagKnInv->rightScale(*diagKnInv);
1138
1139 RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> dummy;
1140 auto D0_diagKnInv_D0T = XMM::Multiply(*D0_diagKnInv, false, *D0, true, dummy, GetOStream(Runtime0));
1141 D0_diagKnInv = Teuchos::null;
1142
1143 auto SM_Pe_tent = XMM::Multiply(*SM, false, *Pe_tent, false, dummy, GetOStream(Runtime0));
1144 auto D0_diagKnInv_D0T_SM_Pe_tent = XMM::Multiply(*D0_diagKnInv_D0T, false, *SM_Pe_tent, false, dummy, GetOStream(Runtime0));
1145
1146 RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Pe;
1147 XMM::TwoMatrixAdd(*Pe_tent, false, one, *D0_diagKnInv_D0T_SM_Pe_tent, false, -beta, Pe, GetOStream(Runtime0));
1148 Pe->fillComplete(Pe_tent->getDomainMap(), Pe_tent->getRangeMap());
1149
1150 return Pe;
1151}
1152
1153} // namespace MueLu
1154
1155#endif // MUELU_SAPFACTORY_DEF_HPP
1156
1157// TODO: restrictionMode_ should use the parameter list.
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Jacobi(typename Teuchos::ScalarTraits< Scalar >::magnitudeType omega, const Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Dinv, const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > C_in, Teuchos::FancyOStream &fos, const std::string &label, RCP< ParameterList > &params)
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
void SatisfyPConstraints(RCP< Matrix > A, RCP< Matrix > &P) const
void Build(Level &fineLevel, Level &coarseLevel) const
virtual ~SaPFactory()
Destructor.
void optimalSatisfyPConstraintsForScalarPDEsNonKokkos(RCP< Matrix > &P) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void optimalSatisfyPConstraintsForScalarPDEs(RCP< Matrix > &P) const
bool constrainRow(Scalar *orig, LocalOrdinal nEntries, Scalar leftBound, Scalar rghtBound, Scalar rsumTarget, Scalar *fixedUnsorted, Scalar *scalarData) const
SaPFactory()
Constructor. User can supply a factory for generating the tentative prolongator.
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void SatisfyPConstraintsNonKokkos(RCP< Matrix > A, RCP< Matrix > &P) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
RCP< Matrix > JacobiMaxwell1(const RCP< Matrix > &SM, const RCP< Matrix > &Kn, const RCP< Matrix > &D0, const RCP< Matrix > &Pe_tent, const Scalar beta) const
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static Scalar PowerMethod(const Matrix &A, bool scaleByDiag=true, LocalOrdinal niters=10, Magnitude tolerance=1e-2, Magnitude diagonalReplacementTol=Teuchos::ScalarTraits< Scalar >::eps() *100, bool verbose=false, unsigned int seed=123)
Power method.
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps() *100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool doLumped=false)
Extract Matrix Diagonal.
static Teuchos::RCP< Vector > GetLumpedMatrixDiagonal(Matrix const &A, const bool doReciprocal=false, Magnitude tol=Teuchos::ScalarTraits< Scalar >::magnitude(Teuchos::ScalarTraits< Scalar >::zero()), Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool replaceSingleEntryRowWithZero=false, const bool useAverageAbsDiagVal=false)
Extract Matrix Diagonal of lumped matrix.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Namespace for MueLu classes and methods.
@ Statistics2
Print even more statistics.
@ Warnings
Print all warning messages.
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
scalar_type dampingFactor
typename local_matrix_type::device_type Device
typename local_matrix_type::non_const_value_type Scalar
KokkosKernels::ArithTraits< SC > KAT
Kokkos::View< SC **, Device > ConstraintViolationSum
Kokkos::View< size_t **, Device > nPositive
constraintKernel(LO nPDEs_, local_matrix_type localP_)
typename local_matrix_type::non_const_ordinal_type LO
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
Kokkos::View< SC **, Device > Rsum
typename local_matrix_type::non_const_value_type Scalar
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)
typename local_matrix_type::non_const_ordinal_type LO