MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_SaPFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_SAPFACTORY_DEF_HPP
11#define MUELU_SAPFACTORY_DEF_HPP
12
13#if KOKKOS_VERSION >= 40799
14#include "KokkosKernels_ArithTraits.hpp"
15#else
16#include "Kokkos_ArithTraits.hpp"
17#endif
19
20#include <Xpetra_Matrix.hpp>
21
23#include "MueLu_Level.hpp"
24#include "MueLu_MasterList.hpp"
25#include "MueLu_Monitor.hpp"
26#include "MueLu_PerfUtils.hpp"
27#include "MueLu_TentativePFactory.hpp"
28#include "MueLu_IteratorOps.hpp"
29#include "MueLu_Utilities.hpp"
30
31#include <sstream>
32
33namespace MueLu {
34
35template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
37
38template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
40
41template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
43 RCP<ParameterList> validParamList = rcp(new ParameterList());
44
45#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
46 SET_VALID_ENTRY("sa: damping factor");
47 SET_VALID_ENTRY("sa: calculate eigenvalue estimate");
48 SET_VALID_ENTRY("sa: eigenvalue estimate num iterations");
49 SET_VALID_ENTRY("sa: use rowsumabs diagonal scaling");
50 SET_VALID_ENTRY("sa: enforce constraints");
51 SET_VALID_ENTRY("tentative: calculate qr");
52 SET_VALID_ENTRY("sa: max eigenvalue");
53 SET_VALID_ENTRY("sa: rowsumabs diagonal replacement tolerance");
54 SET_VALID_ENTRY("sa: rowsumabs diagonal replacement value");
55 SET_VALID_ENTRY("sa: rowsumabs replace single entry row with zero");
56 SET_VALID_ENTRY("sa: rowsumabs use automatic diagonal tolerance");
57 SET_VALID_ENTRY("use kokkos refactor");
58#undef SET_VALID_ENTRY
59
60 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A used during the prolongator smoothing process");
61 validParamList->set<RCP<const FactoryBase> >("P", Teuchos::null, "Tentative prolongator factory");
62
63 // Make sure we don't recursively validate options for the matrixmatrix kernels
64 ParameterList norecurse;
65 norecurse.disableRecursiveValidation();
66 validParamList->set<ParameterList>("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
67
68 return validParamList;
69}
70
71template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
73 Input(fineLevel, "A");
74
75 // Get default tentative prolongator factory
76 // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
77 RCP<const FactoryBase> initialPFact = GetFactory("P");
78 if (initialPFact == Teuchos::null) {
79 initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent");
80 }
81 coarseLevel.DeclareInput("P", initialPFact.get(), this);
82}
83
84template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
86 return BuildP(fineLevel, coarseLevel);
87}
88
89template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
91 FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel);
93 std::string levelIDs = toString(coarseLevel.GetLevelID());
94
95 const std::string prefix = "MueLu::SaPFactory(" + levelIDs + "): ";
96
97 typedef typename Teuchos::ScalarTraits<SC>::coordinateType Coordinate;
98 typedef typename Teuchos::ScalarTraits<SC>::magnitudeType MT;
99
100 // Get default tentative prolongator factory
101 // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
102 // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere!
103 RCP<const FactoryBase> initialPFact = GetFactory("P");
104 if (initialPFact == Teuchos::null) {
105 initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent");
106 }
107 const ParameterList& pL = GetParameterList();
108
109 // Level Get
110 RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
111 RCP<Matrix> Ptent = coarseLevel.Get<RCP<Matrix> >("P", initialPFact.get());
112 RCP<Matrix> finalP;
113 // If Tentative facctory bailed out (e.g., number of global aggregates is 0), then SaPFactory bails
114 // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
115 if (Ptent == Teuchos::null) {
116 finalP = Teuchos::null;
117 Set(coarseLevel, "P", finalP);
118 return;
119 }
120
121 if (restrictionMode_) {
122 SubFactoryMonitor m2(*this, "Transpose A", coarseLevel);
123 A = Utilities::Transpose(*A, true); // build transpose of A explicitly
124 }
125
126 // Build final prolongator
127
128 // Reuse pattern if available
129 RCP<ParameterList> APparams;
130 if (pL.isSublist("matrixmatrix: kernel params"))
131 APparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
132 else
133 APparams = rcp(new ParameterList);
134 if (coarseLevel.IsAvailable("AP reuse data", this)) {
135 GetOStream(static_cast<MsgType>(Runtime0 | Test)) << "Reusing previous AP data" << std::endl;
136
137 APparams = coarseLevel.Get<RCP<ParameterList> >("AP reuse data", this);
138
139 if (APparams->isParameter("graph"))
140 finalP = APparams->get<RCP<Matrix> >("graph");
141 }
142 // By default, we don't need global constants for SaP
143 APparams->set("compute global constants: temporaries", APparams->get("compute global constants: temporaries", false));
144 APparams->set("compute global constants", APparams->get("compute global constants", false));
145
146 const SC dampingFactor = as<SC>(pL.get<double>("sa: damping factor"));
147 const LO maxEigenIterations = as<LO>(pL.get<int>("sa: eigenvalue estimate num iterations"));
148 const bool estimateMaxEigen = pL.get<bool>("sa: calculate eigenvalue estimate");
149 const bool useAbsValueRowSum = pL.get<bool>("sa: use rowsumabs diagonal scaling");
150 const bool doQRStep = pL.get<bool>("tentative: calculate qr");
151 const bool enforceConstraints = pL.get<bool>("sa: enforce constraints");
152 const MT userDefinedMaxEigen = as<MT>(pL.get<double>("sa: max eigenvalue"));
153 double dTol = pL.get<double>("sa: rowsumabs diagonal replacement tolerance");
154 const MT diagonalReplacementTolerance = (dTol == as<double>(-1) ? Teuchos::ScalarTraits<MT>::eps() * 100 : as<MT>(pL.get<double>("sa: rowsumabs diagonal replacement tolerance")));
155 const SC diagonalReplacementValue = as<SC>(pL.get<double>("sa: rowsumabs diagonal replacement value"));
156 const bool replaceSingleEntryRowWithZero = pL.get<bool>("sa: rowsumabs replace single entry row with zero");
157 const bool useAutomaticDiagTol = pL.get<bool>("sa: rowsumabs use automatic diagonal tolerance");
158
159 // Sanity checking
160 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && enforceConstraints, Exceptions::RuntimeError,
161 "MueLu::TentativePFactory::MakeTentative: cannot use 'enforce constraints' and 'calculate qr' at the same time");
162
163 if (dampingFactor != Teuchos::ScalarTraits<SC>::zero()) {
164 Scalar lambdaMax;
165 Teuchos::RCP<Vector> invDiag;
166 if (userDefinedMaxEigen == -1.) {
167 SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
168 lambdaMax = A->GetMaxEigenvalueEstimate();
169 if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
170 GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = " << maxEigenIterations << ((useAbsValueRowSum) ? ", use rowSumAbs diagonal)" : ", use point diagonal)") << std::endl;
171 Coordinate stopTol = 1e-4;
172 if (useAbsValueRowSum) {
173 const bool returnReciprocal = true;
174 invDiag = Utilities::GetLumpedMatrixDiagonal(*A, returnReciprocal,
175 diagonalReplacementTolerance,
176 diagonalReplacementValue,
177 replaceSingleEntryRowWithZero,
178 useAutomaticDiagTol);
179 TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError,
180 "SaPFactory: eigenvalue estimate: diagonal reciprocal is null.");
181 lambdaMax = Utilities::PowerMethod(*A, invDiag, maxEigenIterations, stopTol);
182 } else
183 lambdaMax = Utilities::PowerMethod(*A, true, maxEigenIterations, stopTol);
184 A->SetMaxEigenvalueEstimate(lambdaMax);
185 } else {
186 GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl;
187 }
188 } else {
189 lambdaMax = userDefinedMaxEigen;
190 A->SetMaxEigenvalueEstimate(lambdaMax);
191 }
192 GetOStream(Statistics1) << "Prolongator damping factor = " << dampingFactor / lambdaMax << " (" << dampingFactor << " / " << lambdaMax << ")" << std::endl;
193
194 {
195 SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel);
196 if (!useAbsValueRowSum)
197 invDiag = Utilities::GetMatrixDiagonalInverse(*A); // default
198 else if (invDiag == Teuchos::null) {
199 GetOStream(Runtime0) << "Using rowsumabs diagonal" << std::endl;
200 const bool returnReciprocal = true;
201 invDiag = Utilities::GetLumpedMatrixDiagonal(*A, returnReciprocal,
202 diagonalReplacementTolerance,
203 diagonalReplacementValue,
204 replaceSingleEntryRowWithZero,
205 useAutomaticDiagTol);
206 TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError, "SaPFactory: diagonal reciprocal is null.");
207 }
208
209 SC omega = dampingFactor / lambdaMax;
210 TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(Teuchos::ScalarTraits<SC>::magnitude(omega)), Exceptions::RuntimeError, "Prolongator damping factor needs to be finite.");
211
212 {
213 SubFactoryMonitor m3(*this, "MueLu::IteratorOps::Jacobi", coarseLevel);
214 // finalP = Ptent + (I - \omega D^{-1}A) Ptent
215 finalP = MueLu::IteratorOps<SC, LO, GO, NO>::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(Statistics2), std::string("MueLu::SaP-") + toString(coarseLevel.GetLevelID()), APparams);
216 if (enforceConstraints) {
217 if (!pL.get<bool>("use kokkos refactor")) {
218 if (A->GetFixedBlockSize() == 1)
219 optimalSatisfyPConstraintsForScalarPDEsNonKokkos(finalP);
220 else
221 SatisfyPConstraintsNonKokkos(A, finalP);
222 } else {
223 // if (A->GetFixedBlockSize() == 1)
224 // optimalSatisfyPConstraintsForScalarPDEs(finalP);
225 // else
226 SatisfyPConstraints(A, finalP);
227 }
228 }
229 }
230 }
231
232 } else {
233 finalP = Ptent;
234 }
235
236 // Level Set
237 RCP<Matrix> R;
238 if (!restrictionMode_) {
239 // prolongation factory is in prolongation mode
240 if (!finalP.is_null()) {
241 std::ostringstream oss;
242 oss << "P_" << coarseLevel.GetLevelID();
243 finalP->setObjectLabel(oss.str());
244 }
245 Set(coarseLevel, "P", finalP);
246
247 APparams->set("graph", finalP);
248 Set(coarseLevel, "AP reuse data", APparams);
249
250 // NOTE: EXPERIMENTAL
251 if (Ptent->IsView("stridedMaps"))
252 finalP->CreateView("stridedMaps", Ptent);
253
254 } else {
255 // prolongation factory is in restriction mode
256 {
257 SubFactoryMonitor m2(*this, "Transpose P", coarseLevel);
258 R = Utilities::Transpose(*finalP, true);
259 if (!R.is_null()) {
260 std::ostringstream oss;
261 oss << "R_" << coarseLevel.GetLevelID();
262 R->setObjectLabel(oss.str());
263 }
264 }
265
266 Set(coarseLevel, "R", R);
267
268 // NOTE: EXPERIMENTAL
269 if (Ptent->IsView("stridedMaps"))
270 R->CreateView("stridedMaps", Ptent, true /*transposeA*/);
271 }
272
273 if (IsPrint(Statistics2)) {
274 RCP<ParameterList> params = rcp(new ParameterList());
275 params->set("printLoadBalancingInfo", true);
276 params->set("printCommInfo", true);
277 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo((!restrictionMode_ ? *finalP : *R), (!restrictionMode_ ? "P" : "R"), params);
278 }
279
280} // Build()
281
282// Analyze the grid transfer produced by smoothed aggregation and make
283// modifications if it does not look right. In particular, if there are
284// negative entries or entries larger than 1, modify P's rows.
285//
286// Note: this kind of evaluation probably only makes sense if not doing QR
287// when constructing tentative P.
288//
289// For entries that do not satisfy the two constraints (>= 0 or <=1) we set
290// these entries to the constraint value and modify the rest of the row
291// so that the row sum remains the same as before by adding an equal
292// amount to each remaining entry. However, if the original row sum value
293// violates the constraints, we set the row sum back to 1 (the row sum of
294// tentative P). After doing the modification to a row, we need to check
295// again the entire row to make sure that the modified row does not violate
296// the constraints.
297
298template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
300 const Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
301 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
302 LO nPDEs = A->GetFixedBlockSize();
303 Teuchos::ArrayRCP<Scalar> ConstraintViolationSum(nPDEs);
304 Teuchos::ArrayRCP<Scalar> Rsum(nPDEs);
305 Teuchos::ArrayRCP<size_t> nPositive(nPDEs);
306 for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum[k] = zero;
307 for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum[k] = zero;
308 for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive[k] = 0;
309
310 for (size_t i = 0; i < as<size_t>(P->getRowMap()->getLocalNumElements()); i++) {
311 Teuchos::ArrayView<const LocalOrdinal> indices;
312 Teuchos::ArrayView<const Scalar> vals1;
313 Teuchos::ArrayView<Scalar> vals;
314 P->getLocalRowView((LO)i, indices, vals1);
315 size_t nnz = indices.size();
316 if (nnz == 0) continue;
317
318 vals = ArrayView<Scalar>(const_cast<SC*>(vals1.getRawPtr()), nnz);
319
320 bool checkRow = true;
321
322 while (checkRow) {
323 // check constraints and compute the row sum
324
325 for (LO j = 0; j < indices.size(); j++) {
326 Rsum[j % nPDEs] += vals[j];
327 if (Teuchos::ScalarTraits<SC>::real(vals[j]) < Teuchos::ScalarTraits<SC>::real(zero)) {
328 ConstraintViolationSum[j % nPDEs] += vals[j];
329 vals[j] = zero;
330 } else {
331 if (Teuchos::ScalarTraits<SC>::real(vals[j]) != Teuchos::ScalarTraits<SC>::real(zero))
332 (nPositive[j % nPDEs])++;
333
334 if (Teuchos::ScalarTraits<SC>::real(vals[j]) > Teuchos::ScalarTraits<SC>::real(1.00001)) {
335 ConstraintViolationSum[j % nPDEs] += (vals[j] - one);
336 vals[j] = one;
337 }
338 }
339 }
340
341 checkRow = false;
342
343 // take into account any row sum that violates the contraints
344
345 for (size_t k = 0; k < (size_t)nPDEs; k++) {
346 if (Teuchos::ScalarTraits<SC>::real(Rsum[k]) < Teuchos::ScalarTraits<SC>::magnitude(zero)) {
347 ConstraintViolationSum[k] += (-Rsum[k]); // rstumin
348 } else if (Teuchos::ScalarTraits<SC>::real(Rsum[k]) > Teuchos::ScalarTraits<SC>::magnitude(1.00001)) {
349 ConstraintViolationSum[k] += (one - Rsum[k]); // rstumin
350 }
351 }
352
353 // check if row need modification
354 for (size_t k = 0; k < (size_t)nPDEs; k++) {
355 if (Teuchos::ScalarTraits<SC>::magnitude(ConstraintViolationSum[k]) != Teuchos::ScalarTraits<SC>::magnitude(zero))
356 checkRow = true;
357 }
358 // modify row
359 if (checkRow) {
360 for (LO j = 0; j < indices.size(); j++) {
361 if (Teuchos::ScalarTraits<SC>::real(vals[j]) > Teuchos::ScalarTraits<SC>::real(zero)) {
362 vals[j] += (ConstraintViolationSum[j % nPDEs] / (as<Scalar>(nPositive[j % nPDEs])));
363 }
364 }
365 for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum[k] = zero;
366 }
367 for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum[k] = zero;
368 for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive[k] = 0;
369 } // while (checkRow) ...
370 } // for (size_t i = 0; i < as<size_t>(P->getRowMap()->getNumNodeElements()); i++) ...
371} // SatsifyPConstraints()
372
373template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
375 const Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
376 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
377
378 LocalOrdinal maxEntriesPerRow = 100; // increased later if needed
379 Teuchos::ArrayRCP<Scalar> scalarData(3 * maxEntriesPerRow);
380 bool hasFeasible;
381
382 for (size_t i = 0; i < as<size_t>(P->getRowMap()->getLocalNumElements()); i++) {
383 Teuchos::ArrayView<const LocalOrdinal> indices;
384 Teuchos::ArrayView<const Scalar> vals1;
385 Teuchos::ArrayView<Scalar> vals;
386 P->getLocalRowView((LocalOrdinal)i, indices, vals1);
387 size_t nnz = indices.size();
388 if (nnz != 0) {
389 vals = ArrayView<Scalar>(const_cast<SC*>(vals1.getRawPtr()), nnz);
390 Scalar rsumTarget = zero;
391 for (size_t j = 0; j < nnz; j++) rsumTarget += vals[j];
392
393 if (nnz > as<size_t>(maxEntriesPerRow)) {
394 maxEntriesPerRow = nnz * 3;
395 scalarData.resize(3 * maxEntriesPerRow);
396 }
397 hasFeasible = constrainRow(vals.getRawPtr(), as<LocalOrdinal>(nnz), zero, one, rsumTarget, vals.getRawPtr(), scalarData.getRawPtr());
398
399 if (!hasFeasible) { // just set all entries to the same value giving a row sum of 1
400 for (size_t j = 0; j < nnz; j++) vals[j] = one / as<Scalar>(nnz);
401 }
402 }
403
404 } // for (size_t i = 0; i < as<size_t>(P->getRowMap()->getNumNodeElements()); i++) ...
405} // SatsifyPConstraints()
406
407template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
408bool SaPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::constrainRow(Scalar* orig, LocalOrdinal nEntries, Scalar leftBound, Scalar rghtBound, Scalar rsumTarget, Scalar* fixedUnsorted, Scalar* scalarData) const {
409 /*
410 Input
411 orig data that should be adjusted to satisfy bound constraints and
412 row sum constraint. orig is not modified by this function.
413
414 nEntries length or 'orig'
415
416 leftBound, define bound constraints for the results.
417 rghtBound
418
419 rsumTarget defines an equality constraint for the row sum
420
421 fixedUnsorted on output, if a feasible solutuion exists then
422 || orig - fixedUnsorted || = min when also
423 leftBound <= fixedUnsorted[i] <= rghtBound for all i
424 and sum(fixedUnsorted) = rsumTarget.
425
426 Note: it is possible to use the same pointer for
427 fixedUnsorted and orig. In this case, orig gets
428 overwritten with the new constraint satisfying values.
429
430 scalarData a work array that should be 3x nEntries.
431
432 On return constrain() indicates whether or not a feasible solution exists.
433 */
434
435 /*
436 Given a sequence of numbers o1 ... on, fix these so that they are as
437 close as possible to the original but satisfy bound constraints and also
438 have the same row sum as the oi's. If we know who is going to lie on a
439 bound, then the "best" answer (i.e., || o - f ||_2 = min) perturbs
440 each element that doesn't lie on a bound by the same amount.
441
442 We can represent the oi's by considering scattered points on a number line
443
444 | |
445 | |
446 o o o | o o o o o o |o o
447 | |
448
449 \____/ \____/
450 <---- <----
451 delta delta
452
453 Bounds are shown by vertical lines. The fi's must lie within the bounds, so
454 the 3 leftmost points must be shifted to the right and the 2 rightmost must
455 be shifted to the left. If these fi points are all shifted to the bounds
456 while the others remain in the same location, the row sum constraint is
457 likely not satisfied and so more shifting is necessary. In the figure, the f
458 rowsum is too large and so there must be more shifting to the left.
459
460 To minimize || o - f ||_2, we basically shift all "interiors" by the same
461 amount, denoted delta. The only trick is that some points near bounds are
462 still affected by the bounds and so these points might be shifted more or less
463 than delta. In the example,t he 3 rightmost points are shifted in the opposite
464 direction as delta to the bound. The 4th point is shifted by something less
465 than delta so it does not violate the lower bound. The rightmost point is
466 shifted to the bound by some amount larger than delta. However, the 2nd point
467 is shifted by delta (i.e., it lies inside the two bounds).
468
469 If we know delta, we can figure out everything. If we know which points
470 are special (not shifted by delta), we can also figure out everything.
471 The problem is these two things (delta and the special points) are
472 inter-connected. An algorithm for computing follows.
473
474 1) move exterior points to the bounds and compute how much the row sum is off
475 (rowSumDeviation). We assume now that the new row sum is high, so interior
476 points must be shifted left.
477
478 2) Mark closest point just left of the leftmost bound, closestToLeftBound,
479 and compute its distance to the leftmost bound. Mark closest point to the
480 left of the rightmost bound, closestToRghtBound, and compute its distance to
481 right bound. There are two cases to consider.
482
483 3) Case 1: closestToLeftBound is closer than closestToRghtBound.
484 Assume that shifting by delta does not move closestToLeftBound past the
485 left bound. This means that it will be shifted by delta. However,
486 closestToRghtBound will be shifted by more than delta. So the total
487 number of points shifted by delta (|interiorToBounds|) includes
488 closestToLeftBound up to and including the point just to the left of
489 closestToRghtBound. So
490
491 delta = rowSumDeviation/ |interiorToBounds| .
492
493 Recall that rowSumDeviation already accounts for the non-delta shift of
494 of closestToRightBound. Now check whether our assumption is valid.
495
496 If delta <= closestToLeftBoundDist, assumption is true so delta can be
497 applied to interiorToBounds ... and we are done.
498 Else assumption is false. Shift closestToLeftBound to the left bound.
499 Update rowSumDeviation, interiorToBounds, and identify new
500 closestToLeftBound. Repeat step 3).
501
502 Case 2: closestToRghtBound is closer than closestToLeftBound.
503 Assume that shifting by delta does not move closestToRghtBound past right
504 bound. This means that it must be shifted by more than delta to right
505 bound. So the total number of points shifted by delta again includes
506 closestToLeftBound up to and including the point just to the left of
507 closestToRghtBound. So again compute
508
509 delta = rowSumDeviation/ |interiorToBounds| .
510
511 If delta <= closestToRghtBoundDist, assumption is true so delta is
512 can be applied to interiorToBounds ... and we are done
513 Else assumption is false. Put closestToRghtBound in the
514 interiorToBounds set. Remove it's contribution to rowSumDeviation,
515 identify new closestToRghtBound. Repeat step 3)
516
517
518 To implement, sort the oi's so things like closestToLeftBound is just index
519 into sorted array. Updaing closestToLeftBound or closestToRghtBound requires
520 increment by 1. |interiorToBounds|= closestToRghtBound - closestToLeftBound
521 To handle the case when the rowsum is low (requiring right interior shifts),
522 just flip the signs on data and use the left-shift code (and then flip back
523 before exiting function.
524 */
525 bool hasFeasibleSol;
526 Scalar notFlippedLeftBound, notFlippedRghtBound, aBigNumber, *origSorted;
527 Scalar rowSumDeviation, temp, *fixedSorted, delta;
528 Scalar closestToLeftBoundDist, closestToRghtBoundDist;
529 LocalOrdinal closestToLeftBound, closestToRghtBound;
530 LocalOrdinal* inds;
531 bool flipped;
532
533 notFlippedLeftBound = leftBound;
534 notFlippedRghtBound = rghtBound;
535
536 if ((Teuchos::ScalarTraits<SC>::real(rsumTarget) >= Teuchos::ScalarTraits<SC>::real(leftBound * as<Scalar>(nEntries))) &&
537 (Teuchos::ScalarTraits<SC>::real(rsumTarget) <= Teuchos::ScalarTraits<SC>::real(rghtBound * as<Scalar>(nEntries))))
538 hasFeasibleSol = true;
539 else {
540 hasFeasibleSol = false;
541 return hasFeasibleSol;
542 }
543 flipped = false;
544 // compute aBigNumber to handle some corner cases where we need
545 // something large so that an if statement will be false
546 aBigNumber = Teuchos::ScalarTraits<SC>::zero();
547 for (LocalOrdinal i = 0; i < nEntries; i++) {
548 if (Teuchos::ScalarTraits<SC>::magnitude(orig[i]) > Teuchos::ScalarTraits<SC>::magnitude(aBigNumber))
549 aBigNumber = Teuchos::ScalarTraits<SC>::magnitude(orig[i]);
550 }
551 aBigNumber = aBigNumber + (Teuchos::ScalarTraits<SC>::magnitude(leftBound) + Teuchos::ScalarTraits<SC>::magnitude(rghtBound)) * as<Scalar>(100.0);
552
553 origSorted = &scalarData[0];
554 fixedSorted = &(scalarData[nEntries]);
555 inds = (LocalOrdinal*)&(scalarData[2 * nEntries]);
556
557 for (LocalOrdinal i = 0; i < nEntries; i++) inds[i] = i;
558 for (LocalOrdinal i = 0; i < nEntries; i++) origSorted[i] = orig[i]; /* orig no longer used */
559
560 // sort so that orig[inds] is sorted.
561 std::sort(inds, inds + nEntries,
562 [origSorted](LocalOrdinal leftIndex, LocalOrdinal rightIndex) { return Teuchos::ScalarTraits<SC>::real(origSorted[leftIndex]) < Teuchos::ScalarTraits<SC>::real(origSorted[rightIndex]); });
563
564 for (LocalOrdinal i = 0; i < nEntries; i++) origSorted[i] = orig[inds[i]];
565 // find entry in origSorted just to the right of the leftBound
566 closestToLeftBound = 0;
567 while ((closestToLeftBound < nEntries) && (Teuchos::ScalarTraits<SC>::real(origSorted[closestToLeftBound]) <= Teuchos::ScalarTraits<SC>::real(leftBound))) closestToLeftBound++;
568
569 // find entry in origSorted just to the right of the rghtBound
570 closestToRghtBound = closestToLeftBound;
571 while ((closestToRghtBound < nEntries) && (Teuchos::ScalarTraits<SC>::real(origSorted[closestToRghtBound]) <= Teuchos::ScalarTraits<SC>::real(rghtBound))) closestToRghtBound++;
572
573 // compute distance between closestToLeftBound and the left bound and the
574 // distance between closestToRghtBound and the right bound.
575
576 closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
577 if (closestToRghtBound == nEntries)
578 closestToRghtBoundDist = aBigNumber;
579 else
580 closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
581
582 // compute how far the rowSum is off from the target row sum taking into account
583 // numbers that have been shifted to satisfy bound constraint
584
585 rowSumDeviation = leftBound * as<Scalar>(closestToLeftBound) + as<Scalar>((nEntries - closestToRghtBound)) * rghtBound - rsumTarget;
586 for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation += origSorted[i];
587
588 // the code that follow after this if statement assumes that rowSumDeviation is positive. If this
589 // is not the case, flip the signs of everything so that rowSumDeviation is now positive.
590 // Later we will flip the data back to its original form.
591 if (Teuchos::ScalarTraits<SC>::real(rowSumDeviation) < Teuchos::ScalarTraits<SC>::real(Teuchos::ScalarTraits<SC>::zero())) {
592 flipped = true;
593 temp = leftBound;
594 leftBound = -rghtBound;
595 rghtBound = temp;
596
597 /* flip sign of origSorted and reverse ordering so that the negative version is sorted */
598
599 if ((nEntries % 2) == 1) origSorted[(nEntries / 2)] = -origSorted[(nEntries / 2)];
600 for (LocalOrdinal i = 0; i < nEntries / 2; i++) {
601 temp = origSorted[i];
602 origSorted[i] = -origSorted[nEntries - 1 - i];
603 origSorted[nEntries - i - 1] = -temp;
604 }
605
606 /* reverse bounds */
607
608 LocalOrdinal itemp = closestToLeftBound;
609 closestToLeftBound = nEntries - closestToRghtBound;
610 closestToRghtBound = nEntries - itemp;
611 closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
612 if (closestToRghtBound == nEntries)
613 closestToRghtBoundDist = aBigNumber;
614 else
615 closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
616
617 rowSumDeviation = -rowSumDeviation;
618 }
619
620 // initial fixedSorted so bounds are satisfied and interiors correspond to origSorted
621
622 for (LocalOrdinal i = 0; i < closestToLeftBound; i++) fixedSorted[i] = leftBound;
623 for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i];
624 for (LocalOrdinal i = closestToRghtBound; i < nEntries; i++) fixedSorted[i] = rghtBound;
625
626 while ((Teuchos::ScalarTraits<SC>::magnitude(rowSumDeviation) > Teuchos::ScalarTraits<SC>::magnitude(as<Scalar>(1.e-10) * rsumTarget))) { // && ( (closestToLeftBound < nEntries ) || (closestToRghtBound < nEntries))) {
627 if (closestToRghtBound != closestToLeftBound)
628 delta = rowSumDeviation / as<Scalar>(closestToRghtBound - closestToLeftBound);
629 else
630 delta = aBigNumber;
631
632 if (Teuchos::ScalarTraits<SC>::magnitude(closestToLeftBoundDist) <= Teuchos::ScalarTraits<SC>::magnitude(closestToRghtBoundDist)) {
633 if (Teuchos::ScalarTraits<SC>::magnitude(delta) <= Teuchos::ScalarTraits<SC>::magnitude(closestToLeftBoundDist)) {
634 rowSumDeviation = Teuchos::ScalarTraits<SC>::zero();
635 for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i] - delta;
636 } else {
637 rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
638 fixedSorted[closestToLeftBound] = leftBound;
639 closestToLeftBound++;
640 if (closestToLeftBound < nEntries)
641 closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
642 else
643 closestToLeftBoundDist = aBigNumber;
644 }
645 } else {
646 if (Teuchos::ScalarTraits<SC>::magnitude(delta) <= Teuchos::ScalarTraits<SC>::magnitude(closestToRghtBoundDist)) {
647 rowSumDeviation = 0;
648 for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i] - delta;
649 } else {
650 rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
651 // if (closestToRghtBound < nEntries) {
652 fixedSorted[closestToRghtBound] = origSorted[closestToRghtBound];
653 closestToRghtBound++;
654 // }
655 if (closestToRghtBound >= nEntries)
656 closestToRghtBoundDist = aBigNumber;
657 else
658 closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
659 }
660 }
661 }
662
663 if (flipped) {
664 /* flip sign of fixedSorted and reverse ordering so that the positve version is sorted */
665
666 if ((nEntries % 2) == 1) fixedSorted[(nEntries / 2)] = -fixedSorted[(nEntries / 2)];
667 for (LocalOrdinal i = 0; i < nEntries / 2; i++) {
668 temp = fixedSorted[i];
669 fixedSorted[i] = -fixedSorted[nEntries - 1 - i];
670 fixedSorted[nEntries - i - 1] = -temp;
671 }
672 }
673 for (LocalOrdinal i = 0; i < nEntries; i++) fixedUnsorted[inds[i]] = fixedSorted[i];
674
675 /* check that no constraints are violated */
676
677 bool lowerViolation = false;
678 bool upperViolation = false;
679 bool sumViolation = false;
680 using TST = Teuchos::ScalarTraits<SC>;
681 temp = TST::zero();
682 for (LocalOrdinal i = 0; i < nEntries; i++) {
683 if (TST::real(fixedUnsorted[i]) < TST::real(notFlippedLeftBound)) lowerViolation = true;
684 if (TST::real(fixedUnsorted[i]) > TST::real(notFlippedRghtBound)) upperViolation = true;
685 temp += fixedUnsorted[i];
686 }
687 SC tol = as<Scalar>(std::max(1.0e-8, as<double>(100 * TST::eps())));
688 if (TST::magnitude(temp - rsumTarget) > TST::magnitude(tol * rsumTarget)) sumViolation = true;
689
690 TEUCHOS_TEST_FOR_EXCEPTION(lowerViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in a lower bound violation??? ");
691 TEUCHOS_TEST_FOR_EXCEPTION(upperViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in an upper bound violation??? ");
692 TEUCHOS_TEST_FOR_EXCEPTION(sumViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in a row sum violation??? ");
693
694 return hasFeasibleSol;
695}
696
697template <typename local_matrix_type>
699 using Scalar = typename local_matrix_type::non_const_value_type;
700 using SC = Scalar;
701 using LO = typename local_matrix_type::non_const_ordinal_type;
702 using Device = typename local_matrix_type::device_type;
703#if KOKKOS_VERSION >= 40799
704 using KAT = KokkosKernels::ArithTraits<SC>;
705#else
706 using KAT = Kokkos::ArithTraits<SC>;
707#endif
708 const Scalar zero = KAT::zero();
709 const Scalar one = KAT::one();
711 local_matrix_type localP;
712 Kokkos::View<SC**, Device> ConstraintViolationSum;
713 Kokkos::View<SC**, Device> Rsum;
714 Kokkos::View<size_t**, Device> nPositive;
715
716 constraintKernel(LO nPDEs_, local_matrix_type localP_)
717 : nPDEs(nPDEs_)
718 , localP(localP_) {
719 ConstraintViolationSum = Kokkos::View<SC**, Device>("ConstraintViolationSum", localP_.numRows(), nPDEs);
720 Rsum = Kokkos::View<SC**, Device>("Rsum", localP_.numRows(), nPDEs);
721 nPositive = Kokkos::View<size_t**, Device>("nPositive", localP_.numRows(), nPDEs);
722 }
723
724 KOKKOS_INLINE_FUNCTION
725 void operator()(const size_t rowIdx) const {
726 auto rowPtr = localP.graph.row_map;
727 auto values = localP.values;
728
729 bool checkRow = true;
730
731 if (rowPtr(rowIdx + 1) == rowPtr(rowIdx)) checkRow = false;
732
733 while (checkRow) {
734 // check constraints and compute the row sum
735
736 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
737 Rsum(rowIdx, entryIdx % nPDEs) += values(entryIdx);
738 if (KAT::real(values(entryIdx)) < KAT::real(zero)) {
739 ConstraintViolationSum(rowIdx, entryIdx % nPDEs) += values(entryIdx);
740 values(entryIdx) = zero;
741 } else {
742 if (KAT::real(values(entryIdx)) != KAT::real(zero))
743 nPositive(rowIdx, entryIdx % nPDEs) = nPositive(rowIdx, entryIdx % nPDEs) + 1;
744
745 if (KAT::real(values(entryIdx)) > KAT::real(1.00001)) {
746 ConstraintViolationSum(rowIdx, entryIdx % nPDEs) += (values(entryIdx) - one);
747 values(entryIdx) = one;
748 }
749 }
750 }
751
752 checkRow = false;
753
754 // take into account any row sum that violates the contraints
755
756 for (size_t k = 0; k < (size_t)nPDEs; k++) {
757 if (KAT::real(Rsum(rowIdx, k)) < KAT::magnitude(zero)) {
758 ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) - Rsum(rowIdx, k); // rstumin
759 } else if (KAT::real(Rsum(rowIdx, k)) > KAT::magnitude(1.00001)) {
760 ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) + (one - Rsum(rowIdx, k)); // rstumin
761 }
762 }
763
764 // check if row need modification
765 for (size_t k = 0; k < (size_t)nPDEs; k++) {
766 if (KAT::magnitude(ConstraintViolationSum(rowIdx, k)) != KAT::magnitude(zero))
767 checkRow = true;
768 }
769 // modify row
770 if (checkRow) {
771 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
772 if (KAT::real(values(entryIdx)) > KAT::real(zero)) {
773 values(entryIdx) = values(entryIdx) +
774 (ConstraintViolationSum(rowIdx, entryIdx % nPDEs) / (Scalar(nPositive(rowIdx, entryIdx % nPDEs)) != zero ? Scalar(nPositive(rowIdx, entryIdx % nPDEs)) : one));
775 }
776 }
777 for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum(rowIdx, k) = zero;
778 }
779 for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum(rowIdx, k) = zero;
780 for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive(rowIdx, k) = 0;
781 } // while (checkRow) ...
782 }
783};
784
785template <typename local_matrix_type>
787 using Scalar = typename local_matrix_type::non_const_value_type;
788 using SC = Scalar;
789 using LO = typename local_matrix_type::non_const_ordinal_type;
790 using Device = typename local_matrix_type::device_type;
791#if KOKKOS_VERSION >= 40799
792 using KAT = KokkosKernels::ArithTraits<SC>;
793#else
794 using KAT = Kokkos::ArithTraits<SC>;
795#endif
796 const Scalar zero = KAT::zero();
797 const Scalar one = KAT::one();
799 local_matrix_type localP;
800 Kokkos::View<SC**, Device> origSorted;
801 Kokkos::View<SC**, Device> fixedSorted;
802 Kokkos::View<LO**, Device> inds;
803
804 optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)
805 : nPDEs(nPDEs_)
806 , localP(localP_) {
807 origSorted = Kokkos::View<SC**, Device>("origSorted", localP_.numRows(), maxRowEntries_);
808 fixedSorted = Kokkos::View<SC**, Device>("fixedSorted", localP_.numRows(), maxRowEntries_);
809 inds = Kokkos::View<LO**, Device>("inds", localP_.numRows(), maxRowEntries_);
810 }
811
812 KOKKOS_INLINE_FUNCTION
813 void operator()(const size_t rowIdx) const {
814 auto rowPtr = localP.graph.row_map;
815 auto values = localP.values;
816
817 auto nnz = rowPtr(rowIdx + 1) - rowPtr(rowIdx);
818
819 if (nnz != 0) {
820 Scalar rsumTarget = zero;
821 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) rsumTarget += values(entryIdx);
822
823 {
824 SC aBigNumber;
825 SC rowSumDeviation, temp, delta;
826 SC closestToLeftBoundDist, closestToRghtBoundDist;
827 LO closestToLeftBound, closestToRghtBound;
828 bool flipped;
829
830 SC leftBound = zero;
831 SC rghtBound = one;
832 if ((KAT::real(rsumTarget) >= KAT::real(leftBound * (static_cast<SC>(nnz)))) &&
833 (KAT::real(rsumTarget) <= KAT::real(rghtBound * (static_cast<SC>(nnz))))) { // has Feasible solution
834
835 flipped = false;
836 // compute aBigNumber to handle some corner cases where we need
837 // something large so that an if statement will be false
838 aBigNumber = KAT::zero();
839 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
840 if (KAT::magnitude(values(entryIdx)) > KAT::magnitude(aBigNumber))
841 aBigNumber = KAT::magnitude(values(entryIdx));
842 }
843 aBigNumber = aBigNumber + (KAT::magnitude(leftBound) + KAT::magnitude(rghtBound)) * (100 * one);
844
845 LO ind = 0;
846 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
847 origSorted(rowIdx, ind) = values(entryIdx);
848 inds(rowIdx, ind) = ind;
849 ind++;
850 }
851
852 auto sortVals = Kokkos::subview(origSorted, rowIdx, Kokkos::ALL());
853 auto sortInds = Kokkos::subview(inds, rowIdx, Kokkos::make_pair(0, ind));
854 // Need permutation indices to sort row values from smallest to largest,
855 // and unsort once row constraints are applied.
856 // serial insertion sort workaround from https://github.com/kokkos/kokkos/issues/645
857 for (LO i = 1; i < static_cast<LO>(nnz); ++i) {
858 ind = sortInds(i);
859 LO j = i;
860
861 if (KAT::real(sortVals(sortInds(i))) < KAT::real(sortVals(sortInds(0)))) {
862 for (; j > 0; --j) sortInds(j) = sortInds(j - 1);
863
864 sortInds(0) = ind;
865 } else {
866 for (; KAT::real(sortVals(ind)) < KAT::real(sortVals(sortInds(j - 1))); --j) sortInds(j) = sortInds(j - 1);
867
868 sortInds(j) = ind;
869 }
870 }
871
872 for (LO i = 0; i < static_cast<LO>(nnz); i++) origSorted(rowIdx, i) = values(rowPtr(rowIdx) + inds(rowIdx, i)); // values is no longer used
873 // find entry in origSorted just to the right of the leftBound
874 closestToLeftBound = 0;
875 while ((closestToLeftBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToLeftBound)) <= KAT::real(leftBound))) closestToLeftBound++;
876
877 // find entry in origSorted just to the right of the rghtBound
878 closestToRghtBound = closestToLeftBound;
879 while ((closestToRghtBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToRghtBound)) <= KAT::real(rghtBound))) closestToRghtBound++;
880
881 // compute distance between closestToLeftBound and the left bound and the
882 // distance between closestToRghtBound and the right bound.
883
884 closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
885 if (closestToRghtBound == static_cast<LO>(nnz))
886 closestToRghtBoundDist = aBigNumber;
887 else
888 closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
889
890 // compute how far the rowSum is off from the target row sum taking into account
891 // numbers that have been shifted to satisfy bound constraint
892
893 rowSumDeviation = leftBound * (static_cast<SC>(closestToLeftBound)) + (static_cast<SC>(nnz - closestToRghtBound)) * rghtBound - rsumTarget;
894 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation += origSorted(rowIdx, i);
895
896 // the code that follow after this if statement assumes that rowSumDeviation is positive. If this
897 // is not the case, flip the signs of everything so that rowSumDeviation is now positive.
898 // Later we will flip the data back to its original form.
899 if (KAT::real(rowSumDeviation) < KAT::real(KAT::zero())) {
900 flipped = true;
901 temp = leftBound;
902 leftBound = -rghtBound;
903 rghtBound = temp;
904
905 /* flip sign of origSorted and reverse ordering so that the negative version is sorted */
906
907 // TODO: the following is bad for GPU performance. Switch to bit shifting if brave.
908 if ((nnz % 2) == 1) origSorted(rowIdx, (nnz / 2)) = -origSorted(rowIdx, (nnz / 2));
909 for (LO i = 0; i < static_cast<LO>(nnz / 2); i++) {
910 temp = origSorted(rowIdx, i);
911 origSorted(rowIdx, i) = -origSorted(rowIdx, nnz - 1 - i);
912 origSorted(rowIdx, nnz - i - 1) = -temp;
913 }
914
915 /* reverse bounds */
916
917 LO itemp = closestToLeftBound;
918 closestToLeftBound = nnz - closestToRghtBound;
919 closestToRghtBound = nnz - itemp;
920 closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
921 if (closestToRghtBound == static_cast<LO>(nnz))
922 closestToRghtBoundDist = aBigNumber;
923 else
924 closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
925
926 rowSumDeviation = -rowSumDeviation;
927 }
928
929 // initial fixedSorted so bounds are satisfied and interiors correspond to origSorted
930
931 for (LO i = 0; i < closestToLeftBound; i++) fixedSorted(rowIdx, i) = leftBound;
932 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i);
933 for (LO i = closestToRghtBound; i < static_cast<LO>(nnz); i++) fixedSorted(rowIdx, i) = rghtBound;
934
935 while ((KAT::magnitude(rowSumDeviation) > KAT::magnitude((one * 1.e-10) * rsumTarget))) { // && ( (closestToLeftBound < nEntries ) || (closestToRghtBound < nEntries))) {
936 if (closestToRghtBound != closestToLeftBound)
937 delta = rowSumDeviation / static_cast<SC>(closestToRghtBound - closestToLeftBound);
938 else
939 delta = aBigNumber;
940
941 if (KAT::magnitude(closestToLeftBoundDist) <= KAT::magnitude(closestToRghtBoundDist)) {
942 if (KAT::magnitude(delta) <= KAT::magnitude(closestToLeftBoundDist)) {
943 rowSumDeviation = zero;
944 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
945 } else {
946 rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
947 fixedSorted(rowIdx, closestToLeftBound) = leftBound;
948 closestToLeftBound++;
949 if (closestToLeftBound < static_cast<LO>(nnz))
950 closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
951 else
952 closestToLeftBoundDist = aBigNumber;
953 }
954 } else {
955 if (KAT::magnitude(delta) <= KAT::magnitude(closestToRghtBoundDist)) {
956 rowSumDeviation = 0;
957 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
958 } else {
959 rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
960 // if (closestToRghtBound < nEntries) {
961 fixedSorted(rowIdx, closestToRghtBound) = origSorted(rowIdx, closestToRghtBound);
962 closestToRghtBound++;
963 // }
964 if (closestToRghtBound >= static_cast<LO>(nnz))
965 closestToRghtBoundDist = aBigNumber;
966 else
967 closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
968 }
969 }
970 }
971
972 auto rowStart = rowPtr(rowIdx);
973 if (flipped) {
974 /* flip sign of fixedSorted and reverse ordering so that the positve version is sorted */
975
976 if ((nnz % 2) == 1) fixedSorted(rowIdx, (nnz / 2)) = -fixedSorted(rowIdx, (nnz / 2));
977 for (LO i = 0; i < static_cast<LO>(nnz / 2); i++) {
978 temp = fixedSorted(rowIdx, i);
979 fixedSorted(rowIdx, i) = -fixedSorted(rowIdx, nnz - 1 - i);
980 fixedSorted(rowIdx, nnz - i - 1) = -temp;
981 }
982 }
983 // unsort and update row values with new values
984 for (LO i = 0; i < static_cast<LO>(nnz); i++) values(rowStart + inds(rowIdx, i)) = fixedSorted(rowIdx, i);
985
986 } else { // row does not have feasible solution to match constraint
987 // just set all entries to the same value giving a row sum of 1
988 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) values(entryIdx) = one / (static_cast<SC>(nnz));
989 }
990 }
991 }
992 }
993};
994
995template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
997 using Device = typename Matrix::local_matrix_device_type::device_type;
998 LO nPDEs = A->GetFixedBlockSize();
999
1000 using local_mat_type = typename Matrix::local_matrix_device_type;
1001 constraintKernel<local_mat_type> myKernel(nPDEs, P->getLocalMatrixDevice());
1002 Kokkos::parallel_for("enforce constraint", Kokkos::RangePolicy<typename Device::execution_space>(0, P->getRowMap()->getLocalNumElements()),
1003 myKernel);
1004
1005} // SatsifyPConstraints()
1006
1007template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1009 using Device = typename Matrix::local_matrix_device_type::device_type;
1010 LO nPDEs = 1; // A->GetFixedBlockSize();
1011
1012 using local_mat_type = typename Matrix::local_matrix_device_type;
1013 optimalSatisfyConstraintsForScalarPDEsKernel<local_mat_type> myKernel(nPDEs, P->getLocalMaxNumRowEntries(), P->getLocalMatrixDevice());
1014 Kokkos::parallel_for("enforce constraint", Kokkos::RangePolicy<typename Device::execution_space>(0, P->getLocalNumRows()),
1015 myKernel);
1016
1017} // SatsifyPConstraints()
1018
1019} // namespace MueLu
1020
1021#endif // MUELU_SAPFACTORY_DEF_HPP
1022
1023// TODO: restrictionMode_ should use the parameter list.
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Jacobi(Scalar omega, const Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Dinv, const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > C_in, Teuchos::FancyOStream &fos, const std::string &label, RCP< ParameterList > &params)
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
void SatisfyPConstraints(RCP< Matrix > A, RCP< Matrix > &P) const
void Build(Level &fineLevel, Level &coarseLevel) const
virtual ~SaPFactory()
Destructor.
void optimalSatisfyPConstraintsForScalarPDEsNonKokkos(RCP< Matrix > &P) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void optimalSatisfyPConstraintsForScalarPDEs(RCP< Matrix > &P) const
bool constrainRow(Scalar *orig, LocalOrdinal nEntries, Scalar leftBound, Scalar rghtBound, Scalar rsumTarget, Scalar *fixedUnsorted, Scalar *scalarData) const
SaPFactory()
Constructor. User can supply a factory for generating the tentative prolongator.
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void SatisfyPConstraintsNonKokkos(RCP< Matrix > A, RCP< Matrix > &P) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps() *100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool doLumped=false)
Extract Matrix Diagonal.
static Scalar PowerMethod(const Matrix &A, bool scaleByDiag=true, LocalOrdinal niters=10, Magnitude tolerance=1e-2, bool verbose=false, unsigned int seed=123)
Power method.
static Teuchos::RCP< Vector > GetLumpedMatrixDiagonal(Matrix const &A, const bool doReciprocal=false, Magnitude tol=Teuchos::ScalarTraits< Scalar >::magnitude(Teuchos::ScalarTraits< Scalar >::zero()), Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool replaceSingleEntryRowWithZero=false, const bool useAverageAbsDiagVal=false)
Extract Matrix Diagonal of lumped matrix.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Namespace for MueLu classes and methods.
@ Statistics2
Print even more statistics.
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
scalar_type dampingFactor
typename local_matrix_type::device_type Device
typename local_matrix_type::non_const_value_type Scalar
Kokkos::ArithTraits< SC > KAT
Kokkos::View< SC **, Device > ConstraintViolationSum
Kokkos::View< size_t **, Device > nPositive
constraintKernel(LO nPDEs_, local_matrix_type localP_)
typename local_matrix_type::non_const_ordinal_type LO
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
Kokkos::View< SC **, Device > Rsum
typename local_matrix_type::non_const_value_type Scalar
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)
typename local_matrix_type::non_const_ordinal_type LO