MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_SaPFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_SAPFACTORY_DEF_HPP
11#define MUELU_SAPFACTORY_DEF_HPP
12
13#include "KokkosKernels_ArithTraits.hpp"
15
16#include <Xpetra_Matrix.hpp>
17
19#include "MueLu_Level.hpp"
20#include "MueLu_MasterList.hpp"
21#include "MueLu_Monitor.hpp"
22#include "MueLu_PerfUtils.hpp"
23#include "MueLu_TentativePFactory.hpp"
24#include "MueLu_IteratorOps.hpp"
25#include "MueLu_Utilities.hpp"
26
27#include <sstream>
28
29namespace MueLu {
30
31template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
33
34template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
36
37template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
39 RCP<ParameterList> validParamList = rcp(new ParameterList());
40
41#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
42 SET_VALID_ENTRY("sa: damping factor");
43 SET_VALID_ENTRY("sa: calculate eigenvalue estimate");
44 SET_VALID_ENTRY("sa: eigenvalue estimate num iterations");
45 SET_VALID_ENTRY("sa: use rowsumabs diagonal scaling");
46 SET_VALID_ENTRY("sa: enforce constraints");
47 SET_VALID_ENTRY("tentative: calculate qr");
48 SET_VALID_ENTRY("sa: max eigenvalue");
49 SET_VALID_ENTRY("sa: rowsumabs diagonal replacement tolerance");
50 SET_VALID_ENTRY("sa: rowsumabs diagonal replacement value");
51 SET_VALID_ENTRY("sa: rowsumabs replace single entry row with zero");
52 SET_VALID_ENTRY("sa: rowsumabs use automatic diagonal tolerance");
53 SET_VALID_ENTRY("use kokkos refactor");
54#undef SET_VALID_ENTRY
55
56 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A used during the prolongator smoothing process");
57 validParamList->set<RCP<const FactoryBase> >("P", Teuchos::null, "Tentative prolongator factory");
58
59 // Make sure we don't recursively validate options for the matrixmatrix kernels
60 ParameterList norecurse;
61 norecurse.disableRecursiveValidation();
62 validParamList->set<ParameterList>("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
63
64 return validParamList;
65}
66
67template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
69 Input(fineLevel, "A");
70
71 // Get default tentative prolongator factory
72 // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
73 RCP<const FactoryBase> initialPFact = GetFactory("P");
74 if (initialPFact == Teuchos::null) {
75 initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent");
76 }
77 coarseLevel.DeclareInput("P", initialPFact.get(), this);
78}
79
80template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
82 return BuildP(fineLevel, coarseLevel);
83}
84
85template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
87 FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel);
88
89 std::string levelIDs = toString(coarseLevel.GetLevelID());
90
91 const std::string prefix = "MueLu::SaPFactory(" + levelIDs + "): ";
92
93 typedef typename Teuchos::ScalarTraits<SC>::coordinateType Coordinate;
94 typedef typename Teuchos::ScalarTraits<SC>::magnitudeType MT;
95
96 // Get default tentative prolongator factory
97 // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
98 // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere!
99 RCP<const FactoryBase> initialPFact = GetFactory("P");
100 if (initialPFact == Teuchos::null) {
101 initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent");
102 }
103 const ParameterList& pL = GetParameterList();
104
105 // Level Get
106 RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
107 RCP<Matrix> Ptent = coarseLevel.Get<RCP<Matrix> >("P", initialPFact.get());
108 RCP<Matrix> finalP;
109 // If Tentative facctory bailed out (e.g., number of global aggregates is 0), then SaPFactory bails
110 // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
111 if (Ptent == Teuchos::null) {
112 finalP = Teuchos::null;
113 Set(coarseLevel, "P", finalP);
114 return;
115 }
116
117 if (restrictionMode_) {
118 SubFactoryMonitor m2(*this, "Transpose A", coarseLevel);
119 A = Utilities::Transpose(*A, true); // build transpose of A explicitly
120 }
121
122 // Build final prolongator
123
124 // Reuse pattern if available
125 RCP<ParameterList> APparams;
126 if (pL.isSublist("matrixmatrix: kernel params"))
127 APparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
128 else
129 APparams = rcp(new ParameterList);
130 if (coarseLevel.IsAvailable("AP reuse data", this)) {
131 GetOStream(static_cast<MsgType>(Runtime0 | Test)) << "Reusing previous AP data" << std::endl;
132
133 APparams = coarseLevel.Get<RCP<ParameterList> >("AP reuse data", this);
134
135 if (APparams->isParameter("graph"))
136 finalP = APparams->get<RCP<Matrix> >("graph");
137 }
138 // By default, we don't need global constants for SaP
139 APparams->set("compute global constants: temporaries", APparams->get("compute global constants: temporaries", false));
140 APparams->set("compute global constants", APparams->get("compute global constants", false));
141
142 const SC dampingFactor = as<SC>(pL.get<double>("sa: damping factor"));
143 const LO maxEigenIterations = as<LO>(pL.get<int>("sa: eigenvalue estimate num iterations"));
144 const bool estimateMaxEigen = pL.get<bool>("sa: calculate eigenvalue estimate");
145 const bool useAbsValueRowSum = pL.get<bool>("sa: use rowsumabs diagonal scaling");
146 const bool doQRStep = pL.get<bool>("tentative: calculate qr");
147 const bool enforceConstraints = pL.get<bool>("sa: enforce constraints");
148 const MT userDefinedMaxEigen = as<MT>(pL.get<double>("sa: max eigenvalue"));
149 double dTol = pL.get<double>("sa: rowsumabs diagonal replacement tolerance");
150 const MT diagonalReplacementTolerance = (dTol == as<double>(-1) ? Teuchos::ScalarTraits<MT>::eps() * 100 : as<MT>(pL.get<double>("sa: rowsumabs diagonal replacement tolerance")));
151 const SC diagonalReplacementValue = as<SC>(pL.get<double>("sa: rowsumabs diagonal replacement value"));
152 const bool replaceSingleEntryRowWithZero = pL.get<bool>("sa: rowsumabs replace single entry row with zero");
153 const bool useAutomaticDiagTol = pL.get<bool>("sa: rowsumabs use automatic diagonal tolerance");
154
155 // Sanity checking
156 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && enforceConstraints, Exceptions::RuntimeError,
157 "MueLu::TentativePFactory::MakeTentative: cannot use 'enforce constraints' and 'calculate qr' at the same time");
158
159 if (dampingFactor != Teuchos::ScalarTraits<SC>::zero()) {
160 Scalar lambdaMax;
161 Teuchos::RCP<Vector> invDiag;
162 if (userDefinedMaxEigen == -1.) {
163 SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
164 lambdaMax = A->GetMaxEigenvalueEstimate();
165 if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
166 GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = " << maxEigenIterations << ((useAbsValueRowSum) ? ", use rowSumAbs diagonal)" : ", use point diagonal)") << std::endl;
167 Coordinate stopTol = 1e-4;
168 if (useAbsValueRowSum) {
169 const bool returnReciprocal = true;
170 invDiag = Utilities::GetLumpedMatrixDiagonal(*A, returnReciprocal,
171 diagonalReplacementTolerance,
172 diagonalReplacementValue,
173 replaceSingleEntryRowWithZero,
174 useAutomaticDiagTol);
175 TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError,
176 "SaPFactory: eigenvalue estimate: diagonal reciprocal is null.");
177 lambdaMax = Utilities::PowerMethod(*A, invDiag, maxEigenIterations, stopTol);
178 } else
179 lambdaMax = Utilities::PowerMethod(*A, true, maxEigenIterations, stopTol);
180 A->SetMaxEigenvalueEstimate(lambdaMax);
181 } else {
182 GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl;
183 }
184 } else {
185 lambdaMax = userDefinedMaxEigen;
186 A->SetMaxEigenvalueEstimate(lambdaMax);
187 }
188 GetOStream(Statistics1) << "Prolongator damping factor = " << dampingFactor / lambdaMax << " (" << dampingFactor << " / " << lambdaMax << ")" << std::endl;
189
190 {
191 SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel);
192 if (!useAbsValueRowSum)
193 invDiag = Utilities::GetMatrixDiagonalInverse(*A); // default
194 else if (invDiag == Teuchos::null) {
195 GetOStream(Runtime0) << "Using rowsumabs diagonal" << std::endl;
196 const bool returnReciprocal = true;
197 invDiag = Utilities::GetLumpedMatrixDiagonal(*A, returnReciprocal,
198 diagonalReplacementTolerance,
199 diagonalReplacementValue,
200 replaceSingleEntryRowWithZero,
201 useAutomaticDiagTol);
202 TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError, "SaPFactory: diagonal reciprocal is null.");
203 }
204
205 SC omega = dampingFactor / lambdaMax;
206 TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(Teuchos::ScalarTraits<SC>::magnitude(omega)), Exceptions::RuntimeError, "Prolongator damping factor needs to be finite.");
207
208 {
209 SubFactoryMonitor m3(*this, "MueLu::IteratorOps::Jacobi", coarseLevel);
210 // finalP = Ptent + (I - \omega D^{-1}A) Ptent
211 finalP = MueLu::IteratorOps<SC, LO, GO, NO>::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(Statistics2), std::string("MueLu::SaP-") + toString(coarseLevel.GetLevelID()), APparams);
212 if (enforceConstraints) {
213 if (!pL.get<bool>("use kokkos refactor")) {
214 if (A->GetFixedBlockSize() == 1)
215 optimalSatisfyPConstraintsForScalarPDEsNonKokkos(finalP);
216 else
217 SatisfyPConstraintsNonKokkos(A, finalP);
218 } else {
219 // if (A->GetFixedBlockSize() == 1)
220 // optimalSatisfyPConstraintsForScalarPDEs(finalP);
221 // else
222 SatisfyPConstraints(A, finalP);
223 }
224 }
225 }
226 }
227
228 } else {
229 finalP = Ptent;
230 }
231
232 // Level Set
233 RCP<Matrix> R;
234 if (!restrictionMode_) {
235 // prolongation factory is in prolongation mode
236 if (!finalP.is_null()) {
237 std::ostringstream oss;
238 oss << "P_" << coarseLevel.GetLevelID();
239 finalP->setObjectLabel(oss.str());
240 }
241 Set(coarseLevel, "P", finalP);
242
243 APparams->set("graph", finalP);
244 Set(coarseLevel, "AP reuse data", APparams);
245
246 // NOTE: EXPERIMENTAL
247 if (Ptent->IsView("stridedMaps"))
248 finalP->CreateView("stridedMaps", Ptent);
249
250 } else {
251 // prolongation factory is in restriction mode
252 {
253 SubFactoryMonitor m2(*this, "Transpose P", coarseLevel);
254 R = Utilities::Transpose(*finalP, true);
255 if (!R.is_null()) {
256 std::ostringstream oss;
257 oss << "R_" << coarseLevel.GetLevelID();
258 R->setObjectLabel(oss.str());
259 }
260 }
261
262 Set(coarseLevel, "R", R);
263
264 // NOTE: EXPERIMENTAL
265 if (Ptent->IsView("stridedMaps"))
266 R->CreateView("stridedMaps", Ptent, true /*transposeA*/);
267 }
268
269 if (IsPrint(Statistics2)) {
270 RCP<ParameterList> params = rcp(new ParameterList());
271 params->set("printLoadBalancingInfo", true);
272 params->set("printCommInfo", true);
273 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo((!restrictionMode_ ? *finalP : *R), (!restrictionMode_ ? "P" : "R"), params);
274 }
275
276} // Build()
277
278// Analyze the grid transfer produced by smoothed aggregation and make
279// modifications if it does not look right. In particular, if there are
280// negative entries or entries larger than 1, modify P's rows.
281//
282// Note: this kind of evaluation probably only makes sense if not doing QR
283// when constructing tentative P.
284//
285// For entries that do not satisfy the two constraints (>= 0 or <=1) we set
286// these entries to the constraint value and modify the rest of the row
287// so that the row sum remains the same as before by adding an equal
288// amount to each remaining entry. However, if the original row sum value
289// violates the constraints, we set the row sum back to 1 (the row sum of
290// tentative P). After doing the modification to a row, we need to check
291// again the entire row to make sure that the modified row does not violate
292// the constraints.
293
294template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
296 const Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
297 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
298 LO nPDEs = A->GetFixedBlockSize();
299 Teuchos::ArrayRCP<Scalar> ConstraintViolationSum(nPDEs);
300 Teuchos::ArrayRCP<Scalar> Rsum(nPDEs);
301 Teuchos::ArrayRCP<size_t> nPositive(nPDEs);
302 for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum[k] = zero;
303 for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum[k] = zero;
304 for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive[k] = 0;
305
306 for (size_t i = 0; i < as<size_t>(P->getRowMap()->getLocalNumElements()); i++) {
307 Teuchos::ArrayView<const LocalOrdinal> indices;
308 Teuchos::ArrayView<const Scalar> vals1;
309 Teuchos::ArrayView<Scalar> vals;
310 P->getLocalRowView((LO)i, indices, vals1);
311 size_t nnz = indices.size();
312 if (nnz == 0) continue;
313
314 vals = ArrayView<Scalar>(const_cast<SC*>(vals1.getRawPtr()), nnz);
315
316 bool checkRow = true;
317
318 while (checkRow) {
319 // check constraints and compute the row sum
320
321 for (LO j = 0; j < indices.size(); j++) {
322 Rsum[j % nPDEs] += vals[j];
323 if (Teuchos::ScalarTraits<SC>::real(vals[j]) < Teuchos::ScalarTraits<SC>::real(zero)) {
324 ConstraintViolationSum[j % nPDEs] += vals[j];
325 vals[j] = zero;
326 } else {
327 if (Teuchos::ScalarTraits<SC>::real(vals[j]) != Teuchos::ScalarTraits<SC>::real(zero))
328 (nPositive[j % nPDEs])++;
329
330 if (Teuchos::ScalarTraits<SC>::real(vals[j]) > Teuchos::ScalarTraits<SC>::real(1.00001)) {
331 ConstraintViolationSum[j % nPDEs] += (vals[j] - one);
332 vals[j] = one;
333 }
334 }
335 }
336
337 checkRow = false;
338
339 // take into account any row sum that violates the contraints
340
341 for (size_t k = 0; k < (size_t)nPDEs; k++) {
342 if (Teuchos::ScalarTraits<SC>::real(Rsum[k]) < Teuchos::ScalarTraits<SC>::magnitude(zero)) {
343 ConstraintViolationSum[k] += (-Rsum[k]); // rstumin
344 } else if (Teuchos::ScalarTraits<SC>::real(Rsum[k]) > Teuchos::ScalarTraits<SC>::magnitude(1.00001)) {
345 ConstraintViolationSum[k] += (one - Rsum[k]); // rstumin
346 }
347 }
348
349 // check if row need modification
350 for (size_t k = 0; k < (size_t)nPDEs; k++) {
351 if (Teuchos::ScalarTraits<SC>::magnitude(ConstraintViolationSum[k]) != Teuchos::ScalarTraits<SC>::magnitude(zero))
352 checkRow = true;
353 }
354 // modify row
355 if (checkRow) {
356 for (LO j = 0; j < indices.size(); j++) {
357 if (Teuchos::ScalarTraits<SC>::real(vals[j]) > Teuchos::ScalarTraits<SC>::real(zero)) {
358 vals[j] += (ConstraintViolationSum[j % nPDEs] / (as<Scalar>(nPositive[j % nPDEs])));
359 }
360 }
361 for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum[k] = zero;
362 }
363 for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum[k] = zero;
364 for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive[k] = 0;
365 } // while (checkRow) ...
366 } // for (size_t i = 0; i < as<size_t>(P->getRowMap()->getNumNodeElements()); i++) ...
367} // SatsifyPConstraints()
368
369template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
371 const Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
372 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
373
374 LocalOrdinal maxEntriesPerRow = 100; // increased later if needed
375 Teuchos::ArrayRCP<Scalar> scalarData(3 * maxEntriesPerRow);
376 bool hasFeasible;
377
378 for (size_t i = 0; i < as<size_t>(P->getRowMap()->getLocalNumElements()); i++) {
379 Teuchos::ArrayView<const LocalOrdinal> indices;
380 Teuchos::ArrayView<const Scalar> vals1;
381 Teuchos::ArrayView<Scalar> vals;
382 P->getLocalRowView((LocalOrdinal)i, indices, vals1);
383 size_t nnz = indices.size();
384 if (nnz != 0) {
385 vals = ArrayView<Scalar>(const_cast<SC*>(vals1.getRawPtr()), nnz);
386 Scalar rsumTarget = zero;
387 for (size_t j = 0; j < nnz; j++) rsumTarget += vals[j];
388
389 if (nnz > as<size_t>(maxEntriesPerRow)) {
390 maxEntriesPerRow = nnz * 3;
391 scalarData.resize(3 * maxEntriesPerRow);
392 }
393 hasFeasible = constrainRow(vals.getRawPtr(), as<LocalOrdinal>(nnz), zero, one, rsumTarget, vals.getRawPtr(), scalarData.getRawPtr());
394
395 if (!hasFeasible) { // just set all entries to the same value giving a row sum of 1
396 for (size_t j = 0; j < nnz; j++) vals[j] = one / as<Scalar>(nnz);
397 }
398 }
399
400 } // for (size_t i = 0; i < as<size_t>(P->getRowMap()->getNumNodeElements()); i++) ...
401} // SatsifyPConstraints()
402
403template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
404bool SaPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::constrainRow(Scalar* orig, LocalOrdinal nEntries, Scalar leftBound, Scalar rghtBound, Scalar rsumTarget, Scalar* fixedUnsorted, Scalar* scalarData) const {
405 /*
406 Input
407 orig data that should be adjusted to satisfy bound constraints and
408 row sum constraint. orig is not modified by this function.
409
410 nEntries length or 'orig'
411
412 leftBound, define bound constraints for the results.
413 rghtBound
414
415 rsumTarget defines an equality constraint for the row sum
416
417 fixedUnsorted on output, if a feasible solutuion exists then
418 || orig - fixedUnsorted || = min when also
419 leftBound <= fixedUnsorted[i] <= rghtBound for all i
420 and sum(fixedUnsorted) = rsumTarget.
421
422 Note: it is possible to use the same pointer for
423 fixedUnsorted and orig. In this case, orig gets
424 overwritten with the new constraint satisfying values.
425
426 scalarData a work array that should be 3x nEntries.
427
428 On return constrain() indicates whether or not a feasible solution exists.
429 */
430
431 /*
432 Given a sequence of numbers o1 ... on, fix these so that they are as
433 close as possible to the original but satisfy bound constraints and also
434 have the same row sum as the oi's. If we know who is going to lie on a
435 bound, then the "best" answer (i.e., || o - f ||_2 = min) perturbs
436 each element that doesn't lie on a bound by the same amount.
437
438 We can represent the oi's by considering scattered points on a number line
439
440 | |
441 | |
442 o o o | o o o o o o |o o
443 | |
444
445 \____/ \____/
446 <---- <----
447 delta delta
448
449 Bounds are shown by vertical lines. The fi's must lie within the bounds, so
450 the 3 leftmost points must be shifted to the right and the 2 rightmost must
451 be shifted to the left. If these fi points are all shifted to the bounds
452 while the others remain in the same location, the row sum constraint is
453 likely not satisfied and so more shifting is necessary. In the figure, the f
454 rowsum is too large and so there must be more shifting to the left.
455
456 To minimize || o - f ||_2, we basically shift all "interiors" by the same
457 amount, denoted delta. The only trick is that some points near bounds are
458 still affected by the bounds and so these points might be shifted more or less
459 than delta. In the example,t he 3 rightmost points are shifted in the opposite
460 direction as delta to the bound. The 4th point is shifted by something less
461 than delta so it does not violate the lower bound. The rightmost point is
462 shifted to the bound by some amount larger than delta. However, the 2nd point
463 is shifted by delta (i.e., it lies inside the two bounds).
464
465 If we know delta, we can figure out everything. If we know which points
466 are special (not shifted by delta), we can also figure out everything.
467 The problem is these two things (delta and the special points) are
468 inter-connected. An algorithm for computing follows.
469
470 1) move exterior points to the bounds and compute how much the row sum is off
471 (rowSumDeviation). We assume now that the new row sum is high, so interior
472 points must be shifted left.
473
474 2) Mark closest point just left of the leftmost bound, closestToLeftBound,
475 and compute its distance to the leftmost bound. Mark closest point to the
476 left of the rightmost bound, closestToRghtBound, and compute its distance to
477 right bound. There are two cases to consider.
478
479 3) Case 1: closestToLeftBound is closer than closestToRghtBound.
480 Assume that shifting by delta does not move closestToLeftBound past the
481 left bound. This means that it will be shifted by delta. However,
482 closestToRghtBound will be shifted by more than delta. So the total
483 number of points shifted by delta (|interiorToBounds|) includes
484 closestToLeftBound up to and including the point just to the left of
485 closestToRghtBound. So
486
487 delta = rowSumDeviation/ |interiorToBounds| .
488
489 Recall that rowSumDeviation already accounts for the non-delta shift of
490 of closestToRightBound. Now check whether our assumption is valid.
491
492 If delta <= closestToLeftBoundDist, assumption is true so delta can be
493 applied to interiorToBounds ... and we are done.
494 Else assumption is false. Shift closestToLeftBound to the left bound.
495 Update rowSumDeviation, interiorToBounds, and identify new
496 closestToLeftBound. Repeat step 3).
497
498 Case 2: closestToRghtBound is closer than closestToLeftBound.
499 Assume that shifting by delta does not move closestToRghtBound past right
500 bound. This means that it must be shifted by more than delta to right
501 bound. So the total number of points shifted by delta again includes
502 closestToLeftBound up to and including the point just to the left of
503 closestToRghtBound. So again compute
504
505 delta = rowSumDeviation/ |interiorToBounds| .
506
507 If delta <= closestToRghtBoundDist, assumption is true so delta is
508 can be applied to interiorToBounds ... and we are done
509 Else assumption is false. Put closestToRghtBound in the
510 interiorToBounds set. Remove it's contribution to rowSumDeviation,
511 identify new closestToRghtBound. Repeat step 3)
512
513
514 To implement, sort the oi's so things like closestToLeftBound is just index
515 into sorted array. Updaing closestToLeftBound or closestToRghtBound requires
516 increment by 1. |interiorToBounds|= closestToRghtBound - closestToLeftBound
517 To handle the case when the rowsum is low (requiring right interior shifts),
518 just flip the signs on data and use the left-shift code (and then flip back
519 before exiting function.
520 */
521 bool hasFeasibleSol;
522 Scalar notFlippedLeftBound, notFlippedRghtBound, aBigNumber, *origSorted;
523 Scalar rowSumDeviation, temp, *fixedSorted, delta;
524 Scalar closestToLeftBoundDist, closestToRghtBoundDist;
525 LocalOrdinal closestToLeftBound, closestToRghtBound;
526 LocalOrdinal* inds;
527 bool flipped;
528
529 notFlippedLeftBound = leftBound;
530 notFlippedRghtBound = rghtBound;
531
532 if ((Teuchos::ScalarTraits<SC>::real(rsumTarget) >= Teuchos::ScalarTraits<SC>::real(leftBound * as<Scalar>(nEntries))) &&
533 (Teuchos::ScalarTraits<SC>::real(rsumTarget) <= Teuchos::ScalarTraits<SC>::real(rghtBound * as<Scalar>(nEntries))))
534 hasFeasibleSol = true;
535 else {
536 hasFeasibleSol = false;
537 return hasFeasibleSol;
538 }
539 flipped = false;
540 // compute aBigNumber to handle some corner cases where we need
541 // something large so that an if statement will be false
542 aBigNumber = Teuchos::ScalarTraits<SC>::zero();
543 for (LocalOrdinal i = 0; i < nEntries; i++) {
544 if (Teuchos::ScalarTraits<SC>::magnitude(orig[i]) > Teuchos::ScalarTraits<SC>::magnitude(aBigNumber))
545 aBigNumber = Teuchos::ScalarTraits<SC>::magnitude(orig[i]);
546 }
547 aBigNumber = aBigNumber + (Teuchos::ScalarTraits<SC>::magnitude(leftBound) + Teuchos::ScalarTraits<SC>::magnitude(rghtBound)) * as<Scalar>(100.0);
548
549 origSorted = &scalarData[0];
550 fixedSorted = &(scalarData[nEntries]);
551 inds = (LocalOrdinal*)&(scalarData[2 * nEntries]);
552
553 for (LocalOrdinal i = 0; i < nEntries; i++) inds[i] = i;
554 for (LocalOrdinal i = 0; i < nEntries; i++) origSorted[i] = orig[i]; /* orig no longer used */
555
556 // sort so that orig[inds] is sorted.
557 std::sort(inds, inds + nEntries,
558 [origSorted](LocalOrdinal leftIndex, LocalOrdinal rightIndex) { return Teuchos::ScalarTraits<SC>::real(origSorted[leftIndex]) < Teuchos::ScalarTraits<SC>::real(origSorted[rightIndex]); });
559
560 for (LocalOrdinal i = 0; i < nEntries; i++) origSorted[i] = orig[inds[i]];
561 // find entry in origSorted just to the right of the leftBound
562 closestToLeftBound = 0;
563 while ((closestToLeftBound < nEntries) && (Teuchos::ScalarTraits<SC>::real(origSorted[closestToLeftBound]) <= Teuchos::ScalarTraits<SC>::real(leftBound))) closestToLeftBound++;
564
565 // find entry in origSorted just to the right of the rghtBound
566 closestToRghtBound = closestToLeftBound;
567 while ((closestToRghtBound < nEntries) && (Teuchos::ScalarTraits<SC>::real(origSorted[closestToRghtBound]) <= Teuchos::ScalarTraits<SC>::real(rghtBound))) closestToRghtBound++;
568
569 // compute distance between closestToLeftBound and the left bound and the
570 // distance between closestToRghtBound and the right bound.
571
572 closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
573 if (closestToRghtBound == nEntries)
574 closestToRghtBoundDist = aBigNumber;
575 else
576 closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
577
578 // compute how far the rowSum is off from the target row sum taking into account
579 // numbers that have been shifted to satisfy bound constraint
580
581 rowSumDeviation = leftBound * as<Scalar>(closestToLeftBound) + as<Scalar>((nEntries - closestToRghtBound)) * rghtBound - rsumTarget;
582 for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation += origSorted[i];
583
584 // the code that follow after this if statement assumes that rowSumDeviation is positive. If this
585 // is not the case, flip the signs of everything so that rowSumDeviation is now positive.
586 // Later we will flip the data back to its original form.
587 if (Teuchos::ScalarTraits<SC>::real(rowSumDeviation) < Teuchos::ScalarTraits<SC>::real(Teuchos::ScalarTraits<SC>::zero())) {
588 flipped = true;
589 temp = leftBound;
590 leftBound = -rghtBound;
591 rghtBound = temp;
592
593 /* flip sign of origSorted and reverse ordering so that the negative version is sorted */
594
595 if ((nEntries % 2) == 1) origSorted[(nEntries / 2)] = -origSorted[(nEntries / 2)];
596 for (LocalOrdinal i = 0; i < nEntries / 2; i++) {
597 temp = origSorted[i];
598 origSorted[i] = -origSorted[nEntries - 1 - i];
599 origSorted[nEntries - i - 1] = -temp;
600 }
601
602 /* reverse bounds */
603
604 LocalOrdinal itemp = closestToLeftBound;
605 closestToLeftBound = nEntries - closestToRghtBound;
606 closestToRghtBound = nEntries - itemp;
607 closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
608 if (closestToRghtBound == nEntries)
609 closestToRghtBoundDist = aBigNumber;
610 else
611 closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
612
613 rowSumDeviation = -rowSumDeviation;
614 }
615
616 // initial fixedSorted so bounds are satisfied and interiors correspond to origSorted
617
618 for (LocalOrdinal i = 0; i < closestToLeftBound; i++) fixedSorted[i] = leftBound;
619 for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i];
620 for (LocalOrdinal i = closestToRghtBound; i < nEntries; i++) fixedSorted[i] = rghtBound;
621
622 while ((Teuchos::ScalarTraits<SC>::magnitude(rowSumDeviation) > Teuchos::ScalarTraits<SC>::magnitude(as<Scalar>(1.e-10) * rsumTarget))) { // && ( (closestToLeftBound < nEntries ) || (closestToRghtBound < nEntries))) {
623 if (closestToRghtBound != closestToLeftBound)
624 delta = rowSumDeviation / as<Scalar>(closestToRghtBound - closestToLeftBound);
625 else
626 delta = aBigNumber;
627
628 if (Teuchos::ScalarTraits<SC>::magnitude(closestToLeftBoundDist) <= Teuchos::ScalarTraits<SC>::magnitude(closestToRghtBoundDist)) {
629 if (Teuchos::ScalarTraits<SC>::magnitude(delta) <= Teuchos::ScalarTraits<SC>::magnitude(closestToLeftBoundDist)) {
630 rowSumDeviation = Teuchos::ScalarTraits<SC>::zero();
631 for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i] - delta;
632 } else {
633 rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
634 fixedSorted[closestToLeftBound] = leftBound;
635 closestToLeftBound++;
636 if (closestToLeftBound < nEntries)
637 closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
638 else
639 closestToLeftBoundDist = aBigNumber;
640 }
641 } else {
642 if (Teuchos::ScalarTraits<SC>::magnitude(delta) <= Teuchos::ScalarTraits<SC>::magnitude(closestToRghtBoundDist)) {
643 rowSumDeviation = 0;
644 for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i] - delta;
645 } else {
646 rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
647 // if (closestToRghtBound < nEntries) {
648 fixedSorted[closestToRghtBound] = origSorted[closestToRghtBound];
649 closestToRghtBound++;
650 // }
651 if (closestToRghtBound >= nEntries)
652 closestToRghtBoundDist = aBigNumber;
653 else
654 closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
655 }
656 }
657 }
658
659 if (flipped) {
660 /* flip sign of fixedSorted and reverse ordering so that the positve version is sorted */
661
662 if ((nEntries % 2) == 1) fixedSorted[(nEntries / 2)] = -fixedSorted[(nEntries / 2)];
663 for (LocalOrdinal i = 0; i < nEntries / 2; i++) {
664 temp = fixedSorted[i];
665 fixedSorted[i] = -fixedSorted[nEntries - 1 - i];
666 fixedSorted[nEntries - i - 1] = -temp;
667 }
668 }
669 for (LocalOrdinal i = 0; i < nEntries; i++) fixedUnsorted[inds[i]] = fixedSorted[i];
670
671 /* check that no constraints are violated */
672
673 bool lowerViolation = false;
674 bool upperViolation = false;
675 bool sumViolation = false;
676 using TST = Teuchos::ScalarTraits<SC>;
677 temp = TST::zero();
678 for (LocalOrdinal i = 0; i < nEntries; i++) {
679 if (TST::real(fixedUnsorted[i]) < TST::real(notFlippedLeftBound)) lowerViolation = true;
680 if (TST::real(fixedUnsorted[i]) > TST::real(notFlippedRghtBound)) upperViolation = true;
681 temp += fixedUnsorted[i];
682 }
683 SC tol = as<Scalar>(std::max(1.0e-8, as<double>(100 * TST::eps())));
684 if (TST::magnitude(temp - rsumTarget) > TST::magnitude(tol * rsumTarget)) sumViolation = true;
685
686 TEUCHOS_TEST_FOR_EXCEPTION(lowerViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in a lower bound violation??? ");
687 TEUCHOS_TEST_FOR_EXCEPTION(upperViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in an upper bound violation??? ");
688 TEUCHOS_TEST_FOR_EXCEPTION(sumViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in a row sum violation??? ");
689
690 return hasFeasibleSol;
691}
692
693template <typename local_matrix_type>
695 using Scalar = typename local_matrix_type::non_const_value_type;
696 using SC = Scalar;
697 using LO = typename local_matrix_type::non_const_ordinal_type;
698 using Device = typename local_matrix_type::device_type;
699 using KAT = KokkosKernels::ArithTraits<SC>;
700 const Scalar zero = KAT::zero();
701 const Scalar one = KAT::one();
703 local_matrix_type localP;
704 Kokkos::View<SC**, Device> ConstraintViolationSum;
705 Kokkos::View<SC**, Device> Rsum;
706 Kokkos::View<size_t**, Device> nPositive;
707
708 constraintKernel(LO nPDEs_, local_matrix_type localP_)
709 : nPDEs(nPDEs_)
710 , localP(localP_) {
711 ConstraintViolationSum = Kokkos::View<SC**, Device>("ConstraintViolationSum", localP_.numRows(), nPDEs);
712 Rsum = Kokkos::View<SC**, Device>("Rsum", localP_.numRows(), nPDEs);
713 nPositive = Kokkos::View<size_t**, Device>("nPositive", localP_.numRows(), nPDEs);
714 }
715
716 KOKKOS_INLINE_FUNCTION
717 void operator()(const size_t rowIdx) const {
718 auto rowPtr = localP.graph.row_map;
719 auto values = localP.values;
720
721 bool checkRow = true;
722
723 if (rowPtr(rowIdx + 1) == rowPtr(rowIdx)) checkRow = false;
724
725 while (checkRow) {
726 // check constraints and compute the row sum
727
728 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
729 Rsum(rowIdx, entryIdx % nPDEs) += values(entryIdx);
730 if (KAT::real(values(entryIdx)) < KAT::real(zero)) {
731 ConstraintViolationSum(rowIdx, entryIdx % nPDEs) += values(entryIdx);
732 values(entryIdx) = zero;
733 } else {
734 if (KAT::real(values(entryIdx)) != KAT::real(zero))
735 nPositive(rowIdx, entryIdx % nPDEs) = nPositive(rowIdx, entryIdx % nPDEs) + 1;
736
737 if (KAT::real(values(entryIdx)) > KAT::real(1.00001)) {
738 ConstraintViolationSum(rowIdx, entryIdx % nPDEs) += (values(entryIdx) - one);
739 values(entryIdx) = one;
740 }
741 }
742 }
743
744 checkRow = false;
745
746 // take into account any row sum that violates the contraints
747
748 for (size_t k = 0; k < (size_t)nPDEs; k++) {
749 if (KAT::real(Rsum(rowIdx, k)) < KAT::magnitude(zero)) {
750 ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) - Rsum(rowIdx, k); // rstumin
751 } else if (KAT::real(Rsum(rowIdx, k)) > KAT::magnitude(1.00001)) {
752 ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) + (one - Rsum(rowIdx, k)); // rstumin
753 }
754 }
755
756 // check if row need modification
757 for (size_t k = 0; k < (size_t)nPDEs; k++) {
758 if (KAT::magnitude(ConstraintViolationSum(rowIdx, k)) != KAT::magnitude(zero))
759 checkRow = true;
760 }
761 // modify row
762 if (checkRow) {
763 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
764 if (KAT::real(values(entryIdx)) > KAT::real(zero)) {
765 values(entryIdx) = values(entryIdx) +
766 (ConstraintViolationSum(rowIdx, entryIdx % nPDEs) / (Scalar(nPositive(rowIdx, entryIdx % nPDEs)) != zero ? Scalar(nPositive(rowIdx, entryIdx % nPDEs)) : one));
767 }
768 }
769 for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum(rowIdx, k) = zero;
770 }
771 for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum(rowIdx, k) = zero;
772 for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive(rowIdx, k) = 0;
773 } // while (checkRow) ...
774 }
775};
776
777template <typename local_matrix_type>
779 using Scalar = typename local_matrix_type::non_const_value_type;
780 using SC = Scalar;
781 using LO = typename local_matrix_type::non_const_ordinal_type;
782 using Device = typename local_matrix_type::device_type;
783 using KAT = KokkosKernels::ArithTraits<SC>;
784 const Scalar zero = KAT::zero();
785 const Scalar one = KAT::one();
787 local_matrix_type localP;
788 Kokkos::View<SC**, Device> origSorted;
789 Kokkos::View<SC**, Device> fixedSorted;
790 Kokkos::View<LO**, Device> inds;
791
792 optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)
793 : nPDEs(nPDEs_)
794 , localP(localP_) {
795 origSorted = Kokkos::View<SC**, Device>("origSorted", localP_.numRows(), maxRowEntries_);
796 fixedSorted = Kokkos::View<SC**, Device>("fixedSorted", localP_.numRows(), maxRowEntries_);
797 inds = Kokkos::View<LO**, Device>("inds", localP_.numRows(), maxRowEntries_);
798 }
799
800 KOKKOS_INLINE_FUNCTION
801 void operator()(const size_t rowIdx) const {
802 auto rowPtr = localP.graph.row_map;
803 auto values = localP.values;
804
805 auto nnz = rowPtr(rowIdx + 1) - rowPtr(rowIdx);
806
807 if (nnz != 0) {
808 Scalar rsumTarget = zero;
809 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) rsumTarget += values(entryIdx);
810
811 {
812 SC aBigNumber;
813 SC rowSumDeviation, temp, delta;
814 SC closestToLeftBoundDist, closestToRghtBoundDist;
815 LO closestToLeftBound, closestToRghtBound;
816 bool flipped;
817
818 SC leftBound = zero;
819 SC rghtBound = one;
820 if ((KAT::real(rsumTarget) >= KAT::real(leftBound * (static_cast<SC>(nnz)))) &&
821 (KAT::real(rsumTarget) <= KAT::real(rghtBound * (static_cast<SC>(nnz))))) { // has Feasible solution
822
823 flipped = false;
824 // compute aBigNumber to handle some corner cases where we need
825 // something large so that an if statement will be false
826 aBigNumber = KAT::zero();
827 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
828 if (KAT::magnitude(values(entryIdx)) > KAT::magnitude(aBigNumber))
829 aBigNumber = KAT::magnitude(values(entryIdx));
830 }
831 aBigNumber = aBigNumber + (KAT::magnitude(leftBound) + KAT::magnitude(rghtBound)) * (100 * one);
832
833 LO ind = 0;
834 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
835 origSorted(rowIdx, ind) = values(entryIdx);
836 inds(rowIdx, ind) = ind;
837 ind++;
838 }
839
840 auto sortVals = Kokkos::subview(origSorted, rowIdx, Kokkos::ALL());
841 auto sortInds = Kokkos::subview(inds, rowIdx, Kokkos::make_pair(0, ind));
842 // Need permutation indices to sort row values from smallest to largest,
843 // and unsort once row constraints are applied.
844 // serial insertion sort workaround from https://github.com/kokkos/kokkos/issues/645
845 for (LO i = 1; i < static_cast<LO>(nnz); ++i) {
846 ind = sortInds(i);
847 LO j = i;
848
849 if (KAT::real(sortVals(sortInds(i))) < KAT::real(sortVals(sortInds(0)))) {
850 for (; j > 0; --j) sortInds(j) = sortInds(j - 1);
851
852 sortInds(0) = ind;
853 } else {
854 for (; KAT::real(sortVals(ind)) < KAT::real(sortVals(sortInds(j - 1))); --j) sortInds(j) = sortInds(j - 1);
855
856 sortInds(j) = ind;
857 }
858 }
859
860 for (LO i = 0; i < static_cast<LO>(nnz); i++) origSorted(rowIdx, i) = values(rowPtr(rowIdx) + inds(rowIdx, i)); // values is no longer used
861 // find entry in origSorted just to the right of the leftBound
862 closestToLeftBound = 0;
863 while ((closestToLeftBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToLeftBound)) <= KAT::real(leftBound))) closestToLeftBound++;
864
865 // find entry in origSorted just to the right of the rghtBound
866 closestToRghtBound = closestToLeftBound;
867 while ((closestToRghtBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToRghtBound)) <= KAT::real(rghtBound))) closestToRghtBound++;
868
869 // compute distance between closestToLeftBound and the left bound and the
870 // distance between closestToRghtBound and the right bound.
871
872 closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
873 if (closestToRghtBound == static_cast<LO>(nnz))
874 closestToRghtBoundDist = aBigNumber;
875 else
876 closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
877
878 // compute how far the rowSum is off from the target row sum taking into account
879 // numbers that have been shifted to satisfy bound constraint
880
881 rowSumDeviation = leftBound * (static_cast<SC>(closestToLeftBound)) + (static_cast<SC>(nnz - closestToRghtBound)) * rghtBound - rsumTarget;
882 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation += origSorted(rowIdx, i);
883
884 // the code that follow after this if statement assumes that rowSumDeviation is positive. If this
885 // is not the case, flip the signs of everything so that rowSumDeviation is now positive.
886 // Later we will flip the data back to its original form.
887 if (KAT::real(rowSumDeviation) < KAT::real(KAT::zero())) {
888 flipped = true;
889 temp = leftBound;
890 leftBound = -rghtBound;
891 rghtBound = temp;
892
893 /* flip sign of origSorted and reverse ordering so that the negative version is sorted */
894
895 // TODO: the following is bad for GPU performance. Switch to bit shifting if brave.
896 if ((nnz % 2) == 1) origSorted(rowIdx, (nnz / 2)) = -origSorted(rowIdx, (nnz / 2));
897 for (LO i = 0; i < static_cast<LO>(nnz / 2); i++) {
898 temp = origSorted(rowIdx, i);
899 origSorted(rowIdx, i) = -origSorted(rowIdx, nnz - 1 - i);
900 origSorted(rowIdx, nnz - i - 1) = -temp;
901 }
902
903 /* reverse bounds */
904
905 LO itemp = closestToLeftBound;
906 closestToLeftBound = nnz - closestToRghtBound;
907 closestToRghtBound = nnz - itemp;
908 closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
909 if (closestToRghtBound == static_cast<LO>(nnz))
910 closestToRghtBoundDist = aBigNumber;
911 else
912 closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
913
914 rowSumDeviation = -rowSumDeviation;
915 }
916
917 // initial fixedSorted so bounds are satisfied and interiors correspond to origSorted
918
919 for (LO i = 0; i < closestToLeftBound; i++) fixedSorted(rowIdx, i) = leftBound;
920 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i);
921 for (LO i = closestToRghtBound; i < static_cast<LO>(nnz); i++) fixedSorted(rowIdx, i) = rghtBound;
922
923 while ((KAT::magnitude(rowSumDeviation) > KAT::magnitude((one * 1.e-10) * rsumTarget))) { // && ( (closestToLeftBound < nEntries ) || (closestToRghtBound < nEntries))) {
924 if (closestToRghtBound != closestToLeftBound)
925 delta = rowSumDeviation / static_cast<SC>(closestToRghtBound - closestToLeftBound);
926 else
927 delta = aBigNumber;
928
929 if (KAT::magnitude(closestToLeftBoundDist) <= KAT::magnitude(closestToRghtBoundDist)) {
930 if (KAT::magnitude(delta) <= KAT::magnitude(closestToLeftBoundDist)) {
931 rowSumDeviation = zero;
932 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
933 } else {
934 rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
935 fixedSorted(rowIdx, closestToLeftBound) = leftBound;
936 closestToLeftBound++;
937 if (closestToLeftBound < static_cast<LO>(nnz))
938 closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
939 else
940 closestToLeftBoundDist = aBigNumber;
941 }
942 } else {
943 if (KAT::magnitude(delta) <= KAT::magnitude(closestToRghtBoundDist)) {
944 rowSumDeviation = 0;
945 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
946 } else {
947 rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
948 // if (closestToRghtBound < nEntries) {
949 fixedSorted(rowIdx, closestToRghtBound) = origSorted(rowIdx, closestToRghtBound);
950 closestToRghtBound++;
951 // }
952 if (closestToRghtBound >= static_cast<LO>(nnz))
953 closestToRghtBoundDist = aBigNumber;
954 else
955 closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
956 }
957 }
958 }
959
960 auto rowStart = rowPtr(rowIdx);
961 if (flipped) {
962 /* flip sign of fixedSorted and reverse ordering so that the positve version is sorted */
963
964 if ((nnz % 2) == 1) fixedSorted(rowIdx, (nnz / 2)) = -fixedSorted(rowIdx, (nnz / 2));
965 for (LO i = 0; i < static_cast<LO>(nnz / 2); i++) {
966 temp = fixedSorted(rowIdx, i);
967 fixedSorted(rowIdx, i) = -fixedSorted(rowIdx, nnz - 1 - i);
968 fixedSorted(rowIdx, nnz - i - 1) = -temp;
969 }
970 }
971 // unsort and update row values with new values
972 for (LO i = 0; i < static_cast<LO>(nnz); i++) values(rowStart + inds(rowIdx, i)) = fixedSorted(rowIdx, i);
973
974 } else { // row does not have feasible solution to match constraint
975 // just set all entries to the same value giving a row sum of 1
976 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) values(entryIdx) = one / (static_cast<SC>(nnz));
977 }
978 }
979 }
980 }
981};
982
983template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
985 using Device = typename Matrix::local_matrix_device_type::device_type;
986 LO nPDEs = A->GetFixedBlockSize();
987
988 using local_mat_type = typename Matrix::local_matrix_device_type;
989 constraintKernel<local_mat_type> myKernel(nPDEs, P->getLocalMatrixDevice());
990 Kokkos::parallel_for("enforce constraint", Kokkos::RangePolicy<typename Device::execution_space>(0, P->getRowMap()->getLocalNumElements()),
991 myKernel);
992
993} // SatsifyPConstraints()
994
995template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
997 using Device = typename Matrix::local_matrix_device_type::device_type;
998 LO nPDEs = 1; // A->GetFixedBlockSize();
999
1000 using local_mat_type = typename Matrix::local_matrix_device_type;
1001 optimalSatisfyConstraintsForScalarPDEsKernel<local_mat_type> myKernel(nPDEs, P->getLocalMaxNumRowEntries(), P->getLocalMatrixDevice());
1002 Kokkos::parallel_for("enforce constraint", Kokkos::RangePolicy<typename Device::execution_space>(0, P->getLocalNumRows()),
1003 myKernel);
1004
1005} // SatsifyPConstraints()
1006
1007} // namespace MueLu
1008
1009#endif // MUELU_SAPFACTORY_DEF_HPP
1010
1011// TODO: restrictionMode_ should use the parameter list.
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Jacobi(Scalar omega, const Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Dinv, const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > C_in, Teuchos::FancyOStream &fos, const std::string &label, RCP< ParameterList > &params)
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
void SatisfyPConstraints(RCP< Matrix > A, RCP< Matrix > &P) const
void Build(Level &fineLevel, Level &coarseLevel) const
virtual ~SaPFactory()
Destructor.
void optimalSatisfyPConstraintsForScalarPDEsNonKokkos(RCP< Matrix > &P) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void optimalSatisfyPConstraintsForScalarPDEs(RCP< Matrix > &P) const
bool constrainRow(Scalar *orig, LocalOrdinal nEntries, Scalar leftBound, Scalar rghtBound, Scalar rsumTarget, Scalar *fixedUnsorted, Scalar *scalarData) const
SaPFactory()
Constructor. User can supply a factory for generating the tentative prolongator.
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void SatisfyPConstraintsNonKokkos(RCP< Matrix > A, RCP< Matrix > &P) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps() *100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool doLumped=false)
Extract Matrix Diagonal.
static Scalar PowerMethod(const Matrix &A, bool scaleByDiag=true, LocalOrdinal niters=10, Magnitude tolerance=1e-2, bool verbose=false, unsigned int seed=123)
Power method.
static Teuchos::RCP< Vector > GetLumpedMatrixDiagonal(Matrix const &A, const bool doReciprocal=false, Magnitude tol=Teuchos::ScalarTraits< Scalar >::magnitude(Teuchos::ScalarTraits< Scalar >::zero()), Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool replaceSingleEntryRowWithZero=false, const bool useAverageAbsDiagVal=false)
Extract Matrix Diagonal of lumped matrix.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Namespace for MueLu classes and methods.
@ Statistics2
Print even more statistics.
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
scalar_type dampingFactor
typename local_matrix_type::device_type Device
typename local_matrix_type::non_const_value_type Scalar
KokkosKernels::ArithTraits< SC > KAT
Kokkos::View< SC **, Device > ConstraintViolationSum
Kokkos::View< size_t **, Device > nPositive
constraintKernel(LO nPDEs_, local_matrix_type localP_)
typename local_matrix_type::non_const_ordinal_type LO
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
Kokkos::View< SC **, Device > Rsum
typename local_matrix_type::non_const_value_type Scalar
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)
typename local_matrix_type::non_const_ordinal_type LO