MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_SaPFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_SAPFACTORY_DEF_HPP
11#define MUELU_SAPFACTORY_DEF_HPP
12
13#include "KokkosKernels_ArithTraits.hpp"
15
16#include <Xpetra_Matrix.hpp>
17
19#include "MueLu_Level.hpp"
20#include "MueLu_MasterList.hpp"
21#include "MueLu_Monitor.hpp"
22#include "MueLu_PerfUtils.hpp"
23#include "MueLu_TentativePFactory.hpp"
24#include "MueLu_IteratorOps.hpp"
25#include "MueLu_Utilities.hpp"
26
27#include <sstream>
28
29namespace MueLu {
30
31template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
33
34template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
36
37template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
39 RCP<ParameterList> validParamList = rcp(new ParameterList());
40
41#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
42 SET_VALID_ENTRY("sa: damping factor");
43 SET_VALID_ENTRY("sa: nodal damping factor");
44 SET_VALID_ENTRY("sa: calculate eigenvalue estimate");
45 SET_VALID_ENTRY("sa: eigenvalue estimate num iterations");
46 SET_VALID_ENTRY("sa: use rowsumabs diagonal scaling");
47 SET_VALID_ENTRY("sa: enforce constraints");
48 SET_VALID_ENTRY("tentative: calculate qr");
49 SET_VALID_ENTRY("sa: max eigenvalue");
50 SET_VALID_ENTRY("sa: diagonal replacement tolerance");
51 SET_VALID_ENTRY("sa: rowsumabs diagonal replacement tolerance");
52 SET_VALID_ENTRY("sa: rowsumabs diagonal replacement value");
53 SET_VALID_ENTRY("sa: rowsumabs replace single entry row with zero");
54 SET_VALID_ENTRY("sa: rowsumabs use automatic diagonal tolerance");
55 SET_VALID_ENTRY("use kokkos refactor");
56#undef SET_VALID_ENTRY
57 validParamList->set("sa: maxwell1 smoothing", false);
58
59 validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A used during the prolongator smoothing process");
60 validParamList->set<RCP<const FactoryBase>>("P", Teuchos::null, "Tentative prolongator factory");
61
62 validParamList->set<RCP<const FactoryBase>>("CurlCurl", Teuchos::null, "Generating factory of the matrix CurlCurl used during the prolongator smoothing process for Maxwell1");
63
64 // Make sure we don't recursively validate options for the matrixmatrix kernels
65 ParameterList norecurse;
66 norecurse.disableRecursiveValidation();
67 validParamList->set<ParameterList>("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
68
69 return validParamList;
70}
71
72template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
74 Input(fineLevel, "A");
75
76 // Get default tentative prolongator factory
77 // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
78 RCP<const FactoryBase> initialPFact = GetFactory("P");
79 if (initialPFact == Teuchos::null) {
80 initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent");
81 }
82 coarseLevel.DeclareInput("P", initialPFact.get(), this);
83
84 const ParameterList& pL = GetParameterList();
85 if (pL.get<bool>("sa: maxwell1 smoothing") && pL.get<double>("sa: damping factor") != 0.0)
86 fineLevel.DeclareInput("CurlCurl", GetFactory("CurlCurl").get(), this);
87}
88
89template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
91 return BuildP(fineLevel, coarseLevel);
92}
93
94template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
96 FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel);
97
98 std::string levelIDs = toString(coarseLevel.GetLevelID());
99
100 const std::string prefix = "MueLu::SaPFactory(" + levelIDs + "): ";
101
102 typedef typename Teuchos::ScalarTraits<SC>::coordinateType Coordinate;
103 typedef typename Teuchos::ScalarTraits<SC>::magnitudeType MT;
104
105 // Get default tentative prolongator factory
106 // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
107 // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere!
108 RCP<const FactoryBase> initialPFact = GetFactory("P");
109 if (initialPFact == Teuchos::null) {
110 initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent");
111 }
112 const ParameterList& pL = GetParameterList();
113
114 // Level Get
115 RCP<Matrix> A = Get<RCP<Matrix>>(fineLevel, "A");
116 RCP<Matrix> Ptent = coarseLevel.Get<RCP<Matrix>>("P", initialPFact.get());
117 RCP<Matrix> finalP;
118 // If Tentative facctory bailed out (e.g., number of global aggregates is 0), then SaPFactory bails
119 // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
120 if (Ptent == Teuchos::null) {
121 finalP = Teuchos::null;
122 Set(coarseLevel, "P", finalP);
123 return;
124 }
125
126 if (restrictionMode_) {
127 SubFactoryMonitor m2(*this, "Transpose A", coarseLevel);
128 A = Utilities::Transpose(*A, true); // build transpose of A explicitly
129 }
130
131 // Build final prolongator
132
133 // Reuse pattern if available
134 RCP<ParameterList> APparams;
135 if (pL.isSublist("matrixmatrix: kernel params"))
136 APparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
137 else
138 APparams = rcp(new ParameterList);
139 if (coarseLevel.IsAvailable("AP reuse data", this)) {
140 GetOStream(static_cast<MsgType>(Runtime0 | Test)) << "Reusing previous AP data" << std::endl;
141
142 APparams = coarseLevel.Get<RCP<ParameterList>>("AP reuse data", this);
143
144 if (APparams->isParameter("graph"))
145 finalP = APparams->get<RCP<Matrix>>("graph");
146 }
147 // By default, we don't need global constants for SaP
148 APparams->set("compute global constants: temporaries", APparams->get("compute global constants: temporaries", false));
149 APparams->set("compute global constants", APparams->get("compute global constants", false));
150
151 const SC dampingFactor = as<SC>(pL.get<double>("sa: damping factor"));
152 const LO maxEigenIterations = as<LO>(pL.get<int>("sa: eigenvalue estimate num iterations"));
153 const bool estimateMaxEigen = pL.get<bool>("sa: calculate eigenvalue estimate");
154 const bool useAbsValueRowSum = pL.get<bool>("sa: use rowsumabs diagonal scaling");
155 const bool doQRStep = pL.get<bool>("tentative: calculate qr");
156 const bool enforceConstraints = pL.get<bool>("sa: enforce constraints");
157 const MT userDefinedMaxEigen = as<MT>(pL.get<double>("sa: max eigenvalue"));
158 double dTol = pL.get<double>("sa: diagonal replacement tolerance");
159 double dTol_rs = pL.get<double>("sa: rowsumabs diagonal replacement tolerance");
160 const MT diagonalReplacementTolerance = (dTol == as<double>(-1) ? Teuchos::ScalarTraits<MT>::eps() * 100 : as<MT>(pL.get<double>("sa: diagonal replacement tolerance")));
161 const MT diagonalReplacementTolerance_rs = (dTol_rs == as<double>(-1) ? Teuchos::ScalarTraits<MT>::eps() * 100 : as<MT>(pL.get<double>("sa: rowsumabs diagonal replacement tolerance")));
162
163 const SC diagonalReplacementValue = as<SC>(pL.get<double>("sa: rowsumabs diagonal replacement value"));
164 const bool replaceSingleEntryRowWithZero = pL.get<bool>("sa: rowsumabs replace single entry row with zero");
165 const bool useAutomaticDiagTol = pL.get<bool>("sa: rowsumabs use automatic diagonal tolerance");
166
167 // Sanity checking
168 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && enforceConstraints, Exceptions::RuntimeError,
169 "MueLu::TentativePFactory::MakeTentative: cannot use 'enforce constraints' and 'calculate qr' at the same time");
170
171 // Are we using the special prolongator smoothing for smoothed Reitzinger-Schoeberl?
172 const bool specialMaxwell1Smoothing = pL.get<bool>("sa: maxwell1 smoothing");
173
174 if (!specialMaxwell1Smoothing) {
175 if (dampingFactor != Teuchos::ScalarTraits<SC>::zero()) {
176 Scalar lambdaMax;
177 Teuchos::RCP<Vector> invDiag;
178 if (userDefinedMaxEigen == -1.) {
179 SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
180 lambdaMax = A->GetMaxEigenvalueEstimate();
181 if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
182 GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = " << maxEigenIterations << ((useAbsValueRowSum) ? ", use rowSumAbs diagonal)" : ", use point diagonal)") << std::endl;
183 Coordinate stopTol = 1e-4;
184 if (useAbsValueRowSum) {
185 const bool returnReciprocal = true;
186 invDiag = Utilities::GetLumpedMatrixDiagonal(*A, returnReciprocal,
187 diagonalReplacementTolerance_rs,
188 diagonalReplacementValue,
189 replaceSingleEntryRowWithZero,
190 useAutomaticDiagTol);
191 TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError,
192 "SaPFactory: eigenvalue estimate: diagonal reciprocal is null.");
193 lambdaMax = Utilities::PowerMethod(*A, invDiag, maxEigenIterations, stopTol);
194 } else
195 lambdaMax = Utilities::PowerMethod(*A, true, maxEigenIterations, stopTol, diagonalReplacementTolerance);
196 A->SetMaxEigenvalueEstimate(lambdaMax);
197 } else {
198 GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl;
199 }
200 } else {
201 lambdaMax = userDefinedMaxEigen;
202 A->SetMaxEigenvalueEstimate(lambdaMax);
203 }
204 MT omega = Teuchos::ScalarTraits<SC>::magnitude(dampingFactor) / Teuchos::ScalarTraits<SC>::magnitude(lambdaMax);
205 GetOStream(Statistics1) << "Prolongator damping factor = " << omega << " (|" << dampingFactor << " / " << lambdaMax << "|)" << std::endl;
206
207 {
208 SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel);
209 if (!useAbsValueRowSum)
210 invDiag = Utilities::GetMatrixDiagonalInverse(*A); // default
211 else if (invDiag == Teuchos::null) {
212 GetOStream(Runtime0) << "Using rowsumabs diagonal" << std::endl;
213 const bool returnReciprocal = true;
214 invDiag = Utilities::GetLumpedMatrixDiagonal(*A, returnReciprocal,
215 diagonalReplacementTolerance_rs,
216 diagonalReplacementValue,
217 replaceSingleEntryRowWithZero,
218 useAutomaticDiagTol);
219 TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError, "SaPFactory: diagonal reciprocal is null.");
220 }
221
222 TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(omega), Exceptions::RuntimeError, "Prolongator damping factor needs to be finite.");
223
224 {
225 SubFactoryMonitor m3(*this, "MueLu::IteratorOps::Jacobi", coarseLevel);
226 // finalP = (I - \omega D^{-1}A) Ptent
227 finalP = MueLu::IteratorOps<SC, LO, GO, NO>::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(Statistics2), std::string("MueLu::SaP-") + toString(coarseLevel.GetLevelID()), APparams);
228 if (enforceConstraints) {
229 if (!pL.get<bool>("use kokkos refactor")) {
230 if (A->GetFixedBlockSize() == 1)
231 optimalSatisfyPConstraintsForScalarPDEsNonKokkos(finalP);
232 else
233 SatisfyPConstraintsNonKokkos(A, finalP);
234 } else {
235 // if (A->GetFixedBlockSize() == 1)
236 // optimalSatisfyPConstraintsForScalarPDEs(finalP);
237 // else
238 SatisfyPConstraints(A, finalP);
239 }
240 }
241 }
242 }
243
244 } else {
245 finalP = Ptent;
246 }
247 } else {
248 // We are on the (1,1) hierarchy for Maxwell1.
249 // Instead of smoothing with I - omega * Dinv(A) * A, we are smoothing with
250 // I - beta * D0 * Dinv(NodeMatrix) * D0^T * A
251 // I - alpha * Dinv(CurlCurl) * CurlCurl, and
252 // where beta matches the damping that was used for the nodal operator.
253
254 const SC nodalDampingFactor = as<SC>(pL.get<double>("sa: nodal damping factor"));
255
256 const bool nodalAndEdgeSmoothing = ((nodalDampingFactor != Teuchos::ScalarTraits<SC>::zero()) && (fineLevel.Get<RCP<Matrix>>("NodeMatrix")->GetMaxEigenvalueEstimate() != -Teuchos::ScalarTraits<SC>::one()));
257 const bool edgeSmoothing = (dampingFactor != Teuchos::ScalarTraits<SC>::zero());
258
259 RCP<Matrix> P_afterNodalAndEdgeSmoothing;
260 if (nodalAndEdgeSmoothing) {
261 auto D0 = fineLevel.Get<RCP<Matrix>>("D0");
262 auto NodeMatrix = fineLevel.Get<RCP<Matrix>>("NodeMatrix");
263
264 auto lambdaMaxNodal = NodeMatrix->GetMaxEigenvalueEstimate();
265 MT beta = Teuchos::ScalarTraits<SC>::magnitude(nodalDampingFactor) / Teuchos::ScalarTraits<SC>::magnitude(lambdaMaxNodal);
266 GetOStream(Statistics1) << "Maxwell1 prolongator nodal damping factor = " << beta << " (|" << nodalDampingFactor << " / " << lambdaMaxNodal << "|)" << std::endl;
267 TEUCHOS_TEST_FOR_EXCEPTION(lambdaMaxNodal == -Teuchos::ScalarTraits<SC>::one(), Exceptions::RuntimeError, "Prolongator damping factor obtained from NodeMatrix is -1.")
268
269 TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(beta), Exceptions::RuntimeError, "Prolongator damping factor needs to be finite.");
270
271 {
272 SubFactoryMonitor m3(*this, "MueLu::IteratorOps::JacobiMaxwell1", coarseLevel);
273 P_afterNodalAndEdgeSmoothing = JacobiMaxwell1(A, NodeMatrix, D0, Ptent, beta);
274 }
275 } else {
276 if (nodalDampingFactor == Teuchos::ScalarTraits<SC>::zero())
277 GetOStream(Statistics1) << "Skipping nodal&edge smoothing step since nodal damping factor is zero\n";
278 else if (fineLevel.Get<RCP<Matrix>>("NodeMatrix")->GetMaxEigenvalueEstimate() == -Teuchos::ScalarTraits<SC>::one())
279 GetOStream(Warnings) << "Skipping nodal&edge smoothing step since NodeMatrix matrix has invalid eigenvalue estimate\n";
280 P_afterNodalAndEdgeSmoothing = Ptent;
281 }
282
283 if (edgeSmoothing) {
284 auto CurlCurl = fineLevel.Get<RCP<Matrix>>("CurlCurl", GetFactory("CurlCurl").get());
285
286 Scalar lambdaMax;
287 Teuchos::RCP<Vector> invDiag;
288 if (userDefinedMaxEigen == -1.) {
289 SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
290 lambdaMax = CurlCurl->GetMaxEigenvalueEstimate();
291 if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
292 GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = " << maxEigenIterations << ((useAbsValueRowSum) ? ", use rowSumAbs diagonal)" : ", use point diagonal)") << std::endl;
293 Coordinate stopTol = 1e-4;
294 if (useAbsValueRowSum) {
295 const bool returnReciprocal = true;
296 invDiag = Utilities::GetLumpedMatrixDiagonal(*CurlCurl, returnReciprocal,
297 diagonalReplacementTolerance_rs,
298 diagonalReplacementValue,
299 replaceSingleEntryRowWithZero,
300 useAutomaticDiagTol);
301 TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError,
302 "SaPFactory: eigenvalue estimate: diagonal reciprocal is null.");
303 lambdaMax = Utilities::PowerMethod(*CurlCurl, invDiag, maxEigenIterations, stopTol);
304 } else
305 lambdaMax = Utilities::PowerMethod(*CurlCurl, true, maxEigenIterations, stopTol, diagonalReplacementTolerance);
306 CurlCurl->SetMaxEigenvalueEstimate(lambdaMax);
307 } else {
308 GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl;
309 }
310 } else {
311 lambdaMax = userDefinedMaxEigen;
312 CurlCurl->SetMaxEigenvalueEstimate(lambdaMax);
313 }
314
315 if (!useAbsValueRowSum)
316 invDiag = Utilities::GetMatrixDiagonalInverse(*CurlCurl); // default
317 else if (invDiag == Teuchos::null) {
318 GetOStream(Runtime0) << "Using rowsumabs diagonal" << std::endl;
319 const bool returnReciprocal = true;
320 invDiag = Utilities::GetLumpedMatrixDiagonal(*CurlCurl, returnReciprocal,
321 diagonalReplacementTolerance_rs,
322 diagonalReplacementValue,
323 replaceSingleEntryRowWithZero,
324 useAutomaticDiagTol);
325 TEUCHOS_TEST_FOR_EXCEPTION(invDiag.is_null(), Exceptions::RuntimeError, "SaPFactory: diagonal reciprocal is null.");
326 }
327
328 MT alpha = Teuchos::ScalarTraits<SC>::magnitude(dampingFactor) / Teuchos::ScalarTraits<SC>::magnitude(lambdaMax);
329 TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(alpha), Exceptions::RuntimeError, "Prolongator damping factor needs to be finite.");
330
331 GetOStream(Statistics1) << "Maxwell1 prolongator edge damping factor = " << alpha << " (|" << dampingFactor << " / " << lambdaMax << "|)" << std::endl;
332
333 {
334 SubFactoryMonitor m3(*this, "MueLu::IteratorOps::Jacobi", coarseLevel);
335 // finalP = (I - \omega D^{-1}CurlCurl) P_afterNodalAndEdgeSmoothing
336 finalP = MueLu::IteratorOps<SC, LO, GO, NO>::Jacobi(alpha, *invDiag, *CurlCurl, *P_afterNodalAndEdgeSmoothing, finalP, GetOStream(Statistics2), std::string("MueLu::SaP-") + toString(coarseLevel.GetLevelID()), APparams);
337 }
338 } else {
339 GetOStream(Statistics1) << "Maxwell1 prolongator edge smoothing skipped" << std::endl;
340 finalP = P_afterNodalAndEdgeSmoothing;
341 }
342 }
343
344 // Level Set
345 RCP<Matrix> R;
346 if (!restrictionMode_) {
347 // prolongation factory is in prolongation mode
348 if (!finalP.is_null()) {
349 std::ostringstream oss;
350 oss << "P_" << coarseLevel.GetLevelID();
351 finalP->setObjectLabel(oss.str());
352 }
353 Set(coarseLevel, "P", finalP);
354
355 APparams->set("graph", finalP);
356 Set(coarseLevel, "AP reuse data", APparams);
357
358 // NOTE: EXPERIMENTAL
359 if (Ptent->IsView("stridedMaps"))
360 finalP->CreateView("stridedMaps", Ptent);
361
362 } else {
363 // prolongation factory is in restriction mode
364 {
365 SubFactoryMonitor m2(*this, "Transpose P", coarseLevel);
366 R = Utilities::Transpose(*finalP, true);
367 if (!R.is_null()) {
368 std::ostringstream oss;
369 oss << "R_" << coarseLevel.GetLevelID();
370 R->setObjectLabel(oss.str());
371 }
372 }
373
374 Set(coarseLevel, "R", R);
375
376 // NOTE: EXPERIMENTAL
377 if (Ptent->IsView("stridedMaps"))
378 R->CreateView("stridedMaps", Ptent, true /*transposeA*/);
379 }
380
381 if (IsPrint(Statistics2)) {
382 RCP<ParameterList> params = rcp(new ParameterList());
383 params->set("printLoadBalancingInfo", true);
384 params->set("printCommInfo", true);
385 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo((!restrictionMode_ ? *finalP : *R), (!restrictionMode_ ? "P" : "R"), params);
386 }
387
388} // Build()
389
390// Analyze the grid transfer produced by smoothed aggregation and make
391// modifications if it does not look right. In particular, if there are
392// negative entries or entries larger than 1, modify P's rows.
393//
394// Note: this kind of evaluation probably only makes sense if not doing QR
395// when constructing tentative P.
396//
397// For entries that do not satisfy the two constraints (>= 0 or <=1) we set
398// these entries to the constraint value and modify the rest of the row
399// so that the row sum remains the same as before by adding an equal
400// amount to each remaining entry. However, if the original row sum value
401// violates the constraints, we set the row sum back to 1 (the row sum of
402// tentative P). After doing the modification to a row, we need to check
403// again the entire row to make sure that the modified row does not violate
404// the constraints.
405
406template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
408 const Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
409 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
410 LO nPDEs = A->GetFixedBlockSize();
411 Teuchos::ArrayRCP<Scalar> ConstraintViolationSum(nPDEs);
412 Teuchos::ArrayRCP<Scalar> Rsum(nPDEs);
413 Teuchos::ArrayRCP<size_t> nPositive(nPDEs);
414 for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum[k] = zero;
415 for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum[k] = zero;
416 for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive[k] = 0;
417
418 for (size_t i = 0; i < as<size_t>(P->getRowMap()->getLocalNumElements()); i++) {
419 Teuchos::ArrayView<const LocalOrdinal> indices;
420 Teuchos::ArrayView<const Scalar> vals1;
421 Teuchos::ArrayView<Scalar> vals;
422 P->getLocalRowView((LO)i, indices, vals1);
423 size_t nnz = indices.size();
424 if (nnz == 0) continue;
425
426 vals = ArrayView<Scalar>(const_cast<SC*>(vals1.getRawPtr()), nnz);
427
428 bool checkRow = true;
429
430 while (checkRow) {
431 // check constraints and compute the row sum
432
433 for (LO j = 0; j < indices.size(); j++) {
434 Rsum[j % nPDEs] += vals[j];
435 if (Teuchos::ScalarTraits<SC>::real(vals[j]) < Teuchos::ScalarTraits<SC>::real(zero)) {
436 ConstraintViolationSum[j % nPDEs] += vals[j];
437 vals[j] = zero;
438 } else {
439 if (Teuchos::ScalarTraits<SC>::real(vals[j]) != Teuchos::ScalarTraits<SC>::real(zero))
440 (nPositive[j % nPDEs])++;
441
442 if (Teuchos::ScalarTraits<SC>::real(vals[j]) > Teuchos::ScalarTraits<SC>::real(1.00001)) {
443 ConstraintViolationSum[j % nPDEs] += (vals[j] - one);
444 vals[j] = one;
445 }
446 }
447 }
448
449 checkRow = false;
450
451 // take into account any row sum that violates the contraints
452
453 for (size_t k = 0; k < (size_t)nPDEs; k++) {
454 if (Teuchos::ScalarTraits<SC>::real(Rsum[k]) < Teuchos::ScalarTraits<SC>::magnitude(zero)) {
455 ConstraintViolationSum[k] += (-Rsum[k]); // rstumin
456 } else if (Teuchos::ScalarTraits<SC>::real(Rsum[k]) > Teuchos::ScalarTraits<SC>::magnitude(1.00001)) {
457 ConstraintViolationSum[k] += (one - Rsum[k]); // rstumin
458 }
459 }
460
461 // check if row need modification
462 for (size_t k = 0; k < (size_t)nPDEs; k++) {
463 if (Teuchos::ScalarTraits<SC>::magnitude(ConstraintViolationSum[k]) != Teuchos::ScalarTraits<SC>::magnitude(zero))
464 checkRow = true;
465 }
466 // modify row
467 if (checkRow) {
468 for (LO j = 0; j < indices.size(); j++) {
469 if (Teuchos::ScalarTraits<SC>::real(vals[j]) > Teuchos::ScalarTraits<SC>::real(zero)) {
470 vals[j] += (ConstraintViolationSum[j % nPDEs] / (as<Scalar>(nPositive[j % nPDEs])));
471 }
472 }
473 for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum[k] = zero;
474 }
475 for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum[k] = zero;
476 for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive[k] = 0;
477 } // while (checkRow) ...
478 } // for (size_t i = 0; i < as<size_t>(P->getRowMap()->getNumNodeElements()); i++) ...
479} // SatsifyPConstraints()
480
481template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
483 const Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
484 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
485
486 LocalOrdinal maxEntriesPerRow = 100; // increased later if needed
487 Teuchos::ArrayRCP<Scalar> scalarData(3 * maxEntriesPerRow);
488 bool hasFeasible;
489
490 for (size_t i = 0; i < as<size_t>(P->getRowMap()->getLocalNumElements()); i++) {
491 Teuchos::ArrayView<const LocalOrdinal> indices;
492 Teuchos::ArrayView<const Scalar> vals1;
493 Teuchos::ArrayView<Scalar> vals;
494 P->getLocalRowView((LocalOrdinal)i, indices, vals1);
495 size_t nnz = indices.size();
496 if (nnz != 0) {
497 vals = ArrayView<Scalar>(const_cast<SC*>(vals1.getRawPtr()), nnz);
498 Scalar rsumTarget = zero;
499 for (size_t j = 0; j < nnz; j++) rsumTarget += vals[j];
500
501 if (nnz > as<size_t>(maxEntriesPerRow)) {
502 maxEntriesPerRow = nnz * 3;
503 scalarData.resize(3 * maxEntriesPerRow);
504 }
505 hasFeasible = constrainRow(vals.getRawPtr(), as<LocalOrdinal>(nnz), zero, one, rsumTarget, vals.getRawPtr(), scalarData.getRawPtr());
506
507 if (!hasFeasible) { // just set all entries to the same value giving a row sum of 1
508 for (size_t j = 0; j < nnz; j++) vals[j] = one / as<Scalar>(nnz);
509 }
510 }
511
512 } // for (size_t i = 0; i < as<size_t>(P->getRowMap()->getNumNodeElements()); i++) ...
513} // SatsifyPConstraints()
514
515template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
516bool SaPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::constrainRow(Scalar* orig, LocalOrdinal nEntries, Scalar leftBound, Scalar rghtBound, Scalar rsumTarget, Scalar* fixedUnsorted, Scalar* scalarData) const {
517 /*
518 Input
519 orig data that should be adjusted to satisfy bound constraints and
520 row sum constraint. orig is not modified by this function.
521
522 nEntries length or 'orig'
523
524 leftBound, define bound constraints for the results.
525 rghtBound
526
527 rsumTarget defines an equality constraint for the row sum
528
529 fixedUnsorted on output, if a feasible solutuion exists then
530 || orig - fixedUnsorted || = min when also
531 leftBound <= fixedUnsorted[i] <= rghtBound for all i
532 and sum(fixedUnsorted) = rsumTarget.
533
534 Note: it is possible to use the same pointer for
535 fixedUnsorted and orig. In this case, orig gets
536 overwritten with the new constraint satisfying values.
537
538 scalarData a work array that should be 3x nEntries.
539
540 On return constrain() indicates whether or not a feasible solution exists.
541 */
542
543 /*
544 Given a sequence of numbers o1 ... on, fix these so that they are as
545 close as possible to the original but satisfy bound constraints and also
546 have the same row sum as the oi's. If we know who is going to lie on a
547 bound, then the "best" answer (i.e., || o - f ||_2 = min) perturbs
548 each element that doesn't lie on a bound by the same amount.
549
550 We can represent the oi's by considering scattered points on a number line
551
552 | |
553 | |
554 o o o | o o o o o o |o o
555 | |
556
557 \____/ \____/
558 <---- <----
559 delta delta
560
561 Bounds are shown by vertical lines. The fi's must lie within the bounds, so
562 the 3 leftmost points must be shifted to the right and the 2 rightmost must
563 be shifted to the left. If these fi points are all shifted to the bounds
564 while the others remain in the same location, the row sum constraint is
565 likely not satisfied and so more shifting is necessary. In the figure, the f
566 rowsum is too large and so there must be more shifting to the left.
567
568 To minimize || o - f ||_2, we basically shift all "interiors" by the same
569 amount, denoted delta. The only trick is that some points near bounds are
570 still affected by the bounds and so these points might be shifted more or less
571 than delta. In the example,t he 3 rightmost points are shifted in the opposite
572 direction as delta to the bound. The 4th point is shifted by something less
573 than delta so it does not violate the lower bound. The rightmost point is
574 shifted to the bound by some amount larger than delta. However, the 2nd point
575 is shifted by delta (i.e., it lies inside the two bounds).
576
577 If we know delta, we can figure out everything. If we know which points
578 are special (not shifted by delta), we can also figure out everything.
579 The problem is these two things (delta and the special points) are
580 inter-connected. An algorithm for computing follows.
581
582 1) move exterior points to the bounds and compute how much the row sum is off
583 (rowSumDeviation). We assume now that the new row sum is high, so interior
584 points must be shifted left.
585
586 2) Mark closest point just left of the leftmost bound, closestToLeftBound,
587 and compute its distance to the leftmost bound. Mark closest point to the
588 left of the rightmost bound, closestToRghtBound, and compute its distance to
589 right bound. There are two cases to consider.
590
591 3) Case 1: closestToLeftBound is closer than closestToRghtBound.
592 Assume that shifting by delta does not move closestToLeftBound past the
593 left bound. This means that it will be shifted by delta. However,
594 closestToRghtBound will be shifted by more than delta. So the total
595 number of points shifted by delta (|interiorToBounds|) includes
596 closestToLeftBound up to and including the point just to the left of
597 closestToRghtBound. So
598
599 delta = rowSumDeviation/ |interiorToBounds| .
600
601 Recall that rowSumDeviation already accounts for the non-delta shift of
602 of closestToRightBound. Now check whether our assumption is valid.
603
604 If delta <= closestToLeftBoundDist, assumption is true so delta can be
605 applied to interiorToBounds ... and we are done.
606 Else assumption is false. Shift closestToLeftBound to the left bound.
607 Update rowSumDeviation, interiorToBounds, and identify new
608 closestToLeftBound. Repeat step 3).
609
610 Case 2: closestToRghtBound is closer than closestToLeftBound.
611 Assume that shifting by delta does not move closestToRghtBound past right
612 bound. This means that it must be shifted by more than delta to right
613 bound. So the total number of points shifted by delta again includes
614 closestToLeftBound up to and including the point just to the left of
615 closestToRghtBound. So again compute
616
617 delta = rowSumDeviation/ |interiorToBounds| .
618
619 If delta <= closestToRghtBoundDist, assumption is true so delta is
620 can be applied to interiorToBounds ... and we are done
621 Else assumption is false. Put closestToRghtBound in the
622 interiorToBounds set. Remove it's contribution to rowSumDeviation,
623 identify new closestToRghtBound. Repeat step 3)
624
625
626 To implement, sort the oi's so things like closestToLeftBound is just index
627 into sorted array. Updaing closestToLeftBound or closestToRghtBound requires
628 increment by 1. |interiorToBounds|= closestToRghtBound - closestToLeftBound
629 To handle the case when the rowsum is low (requiring right interior shifts),
630 just flip the signs on data and use the left-shift code (and then flip back
631 before exiting function.
632 */
633 bool hasFeasibleSol;
634 Scalar notFlippedLeftBound, notFlippedRghtBound, aBigNumber, *origSorted;
635 Scalar rowSumDeviation, temp, *fixedSorted, delta;
636 Scalar closestToLeftBoundDist, closestToRghtBoundDist;
637 LocalOrdinal closestToLeftBound, closestToRghtBound;
638 LocalOrdinal* inds;
639 bool flipped;
640
641 notFlippedLeftBound = leftBound;
642 notFlippedRghtBound = rghtBound;
643
644 if ((Teuchos::ScalarTraits<SC>::real(rsumTarget) >= Teuchos::ScalarTraits<SC>::real(leftBound * as<Scalar>(nEntries))) &&
645 (Teuchos::ScalarTraits<SC>::real(rsumTarget) <= Teuchos::ScalarTraits<SC>::real(rghtBound * as<Scalar>(nEntries))))
646 hasFeasibleSol = true;
647 else {
648 hasFeasibleSol = false;
649 return hasFeasibleSol;
650 }
651 flipped = false;
652 // compute aBigNumber to handle some corner cases where we need
653 // something large so that an if statement will be false
654 aBigNumber = Teuchos::ScalarTraits<SC>::zero();
655 for (LocalOrdinal i = 0; i < nEntries; i++) {
656 if (Teuchos::ScalarTraits<SC>::magnitude(orig[i]) > Teuchos::ScalarTraits<SC>::magnitude(aBigNumber))
657 aBigNumber = Teuchos::ScalarTraits<SC>::magnitude(orig[i]);
658 }
659 aBigNumber = aBigNumber + (Teuchos::ScalarTraits<SC>::magnitude(leftBound) + Teuchos::ScalarTraits<SC>::magnitude(rghtBound)) * as<Scalar>(100.0);
660
661 origSorted = &scalarData[0];
662 fixedSorted = &(scalarData[nEntries]);
663 inds = (LocalOrdinal*)&(scalarData[2 * nEntries]);
664
665 for (LocalOrdinal i = 0; i < nEntries; i++) inds[i] = i;
666 for (LocalOrdinal i = 0; i < nEntries; i++) origSorted[i] = orig[i]; /* orig no longer used */
667
668 // sort so that orig[inds] is sorted.
669 std::sort(inds, inds + nEntries,
670 [origSorted](LocalOrdinal leftIndex, LocalOrdinal rightIndex) { return Teuchos::ScalarTraits<SC>::real(origSorted[leftIndex]) < Teuchos::ScalarTraits<SC>::real(origSorted[rightIndex]); });
671
672 for (LocalOrdinal i = 0; i < nEntries; i++) origSorted[i] = orig[inds[i]];
673 // find entry in origSorted just to the right of the leftBound
674 closestToLeftBound = 0;
675 while ((closestToLeftBound < nEntries) && (Teuchos::ScalarTraits<SC>::real(origSorted[closestToLeftBound]) <= Teuchos::ScalarTraits<SC>::real(leftBound))) closestToLeftBound++;
676
677 // find entry in origSorted just to the right of the rghtBound
678 closestToRghtBound = closestToLeftBound;
679 while ((closestToRghtBound < nEntries) && (Teuchos::ScalarTraits<SC>::real(origSorted[closestToRghtBound]) <= Teuchos::ScalarTraits<SC>::real(rghtBound))) closestToRghtBound++;
680
681 // compute distance between closestToLeftBound and the left bound and the
682 // distance between closestToRghtBound and the right bound.
683
684 closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
685 if (closestToRghtBound == nEntries)
686 closestToRghtBoundDist = aBigNumber;
687 else
688 closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
689
690 // compute how far the rowSum is off from the target row sum taking into account
691 // numbers that have been shifted to satisfy bound constraint
692
693 rowSumDeviation = leftBound * as<Scalar>(closestToLeftBound) + as<Scalar>((nEntries - closestToRghtBound)) * rghtBound - rsumTarget;
694 for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation += origSorted[i];
695
696 // the code that follow after this if statement assumes that rowSumDeviation is positive. If this
697 // is not the case, flip the signs of everything so that rowSumDeviation is now positive.
698 // Later we will flip the data back to its original form.
699 if (Teuchos::ScalarTraits<SC>::real(rowSumDeviation) < Teuchos::ScalarTraits<SC>::real(Teuchos::ScalarTraits<SC>::zero())) {
700 flipped = true;
701 temp = leftBound;
702 leftBound = -rghtBound;
703 rghtBound = temp;
704
705 /* flip sign of origSorted and reverse ordering so that the negative version is sorted */
706
707 if ((nEntries % 2) == 1) origSorted[(nEntries / 2)] = -origSorted[(nEntries / 2)];
708 for (LocalOrdinal i = 0; i < nEntries / 2; i++) {
709 temp = origSorted[i];
710 origSorted[i] = -origSorted[nEntries - 1 - i];
711 origSorted[nEntries - i - 1] = -temp;
712 }
713
714 /* reverse bounds */
715
716 LocalOrdinal itemp = closestToLeftBound;
717 closestToLeftBound = nEntries - closestToRghtBound;
718 closestToRghtBound = nEntries - itemp;
719 closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
720 if (closestToRghtBound == nEntries)
721 closestToRghtBoundDist = aBigNumber;
722 else
723 closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
724
725 rowSumDeviation = -rowSumDeviation;
726 }
727
728 // initial fixedSorted so bounds are satisfied and interiors correspond to origSorted
729
730 for (LocalOrdinal i = 0; i < closestToLeftBound; i++) fixedSorted[i] = leftBound;
731 for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i];
732 for (LocalOrdinal i = closestToRghtBound; i < nEntries; i++) fixedSorted[i] = rghtBound;
733
734 while ((Teuchos::ScalarTraits<SC>::magnitude(rowSumDeviation) > Teuchos::ScalarTraits<SC>::magnitude(as<Scalar>(1.e-10) * rsumTarget))) { // && ( (closestToLeftBound < nEntries ) || (closestToRghtBound < nEntries))) {
735 if (closestToRghtBound != closestToLeftBound)
736 delta = rowSumDeviation / as<Scalar>(closestToRghtBound - closestToLeftBound);
737 else
738 delta = aBigNumber;
739
740 if (Teuchos::ScalarTraits<SC>::magnitude(closestToLeftBoundDist) <= Teuchos::ScalarTraits<SC>::magnitude(closestToRghtBoundDist)) {
741 if (Teuchos::ScalarTraits<SC>::magnitude(delta) <= Teuchos::ScalarTraits<SC>::magnitude(closestToLeftBoundDist)) {
742 rowSumDeviation = Teuchos::ScalarTraits<SC>::zero();
743 for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i] - delta;
744 } else {
745 rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
746 fixedSorted[closestToLeftBound] = leftBound;
747 closestToLeftBound++;
748 if (closestToLeftBound < nEntries)
749 closestToLeftBoundDist = origSorted[closestToLeftBound] - leftBound;
750 else
751 closestToLeftBoundDist = aBigNumber;
752 }
753 } else {
754 if (Teuchos::ScalarTraits<SC>::magnitude(delta) <= Teuchos::ScalarTraits<SC>::magnitude(closestToRghtBoundDist)) {
755 rowSumDeviation = 0;
756 for (LocalOrdinal i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted[i] = origSorted[i] - delta;
757 } else {
758 rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
759 // if (closestToRghtBound < nEntries) {
760 fixedSorted[closestToRghtBound] = origSorted[closestToRghtBound];
761 closestToRghtBound++;
762 // }
763 if (closestToRghtBound >= nEntries)
764 closestToRghtBoundDist = aBigNumber;
765 else
766 closestToRghtBoundDist = origSorted[closestToRghtBound] - rghtBound;
767 }
768 }
769 }
770
771 if (flipped) {
772 /* flip sign of fixedSorted and reverse ordering so that the positve version is sorted */
773
774 if ((nEntries % 2) == 1) fixedSorted[(nEntries / 2)] = -fixedSorted[(nEntries / 2)];
775 for (LocalOrdinal i = 0; i < nEntries / 2; i++) {
776 temp = fixedSorted[i];
777 fixedSorted[i] = -fixedSorted[nEntries - 1 - i];
778 fixedSorted[nEntries - i - 1] = -temp;
779 }
780 }
781 for (LocalOrdinal i = 0; i < nEntries; i++) fixedUnsorted[inds[i]] = fixedSorted[i];
782
783 /* check that no constraints are violated */
784
785 bool lowerViolation = false;
786 bool upperViolation = false;
787 bool sumViolation = false;
788 using TST = Teuchos::ScalarTraits<SC>;
789 temp = TST::zero();
790 for (LocalOrdinal i = 0; i < nEntries; i++) {
791 if (TST::real(fixedUnsorted[i]) < TST::real(notFlippedLeftBound)) lowerViolation = true;
792 if (TST::real(fixedUnsorted[i]) > TST::real(notFlippedRghtBound)) upperViolation = true;
793 temp += fixedUnsorted[i];
794 }
795 SC tol = as<Scalar>(std::max(1.0e-8, as<double>(100 * TST::eps())));
796 if (TST::magnitude(temp - rsumTarget) > TST::magnitude(tol * rsumTarget)) sumViolation = true;
797
798 TEUCHOS_TEST_FOR_EXCEPTION(lowerViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in a lower bound violation??? ");
799 TEUCHOS_TEST_FOR_EXCEPTION(upperViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in an upper bound violation??? ");
800 TEUCHOS_TEST_FOR_EXCEPTION(sumViolation, Exceptions::RuntimeError, "MueLu::SaPFactory::constrainRow: feasible solution but computation resulted in a row sum violation??? ");
801
802 return hasFeasibleSol;
803}
804
805template <typename local_matrix_type>
807 using Scalar = typename local_matrix_type::non_const_value_type;
808 using SC = Scalar;
809 using LO = typename local_matrix_type::non_const_ordinal_type;
810 using Device = typename local_matrix_type::device_type;
811 using KAT = KokkosKernels::ArithTraits<SC>;
812 const Scalar zero = KAT::zero();
813 const Scalar one = KAT::one();
815 local_matrix_type localP;
816 Kokkos::View<SC**, Device> ConstraintViolationSum;
817 Kokkos::View<SC**, Device> Rsum;
818 Kokkos::View<size_t**, Device> nPositive;
819
820 constraintKernel(LO nPDEs_, local_matrix_type localP_)
821 : nPDEs(nPDEs_)
822 , localP(localP_) {
823 ConstraintViolationSum = Kokkos::View<SC**, Device>("ConstraintViolationSum", localP_.numRows(), nPDEs);
824 Rsum = Kokkos::View<SC**, Device>("Rsum", localP_.numRows(), nPDEs);
825 nPositive = Kokkos::View<size_t**, Device>("nPositive", localP_.numRows(), nPDEs);
826 }
827
828 KOKKOS_INLINE_FUNCTION
829 void operator()(const size_t rowIdx) const {
830 auto rowPtr = localP.graph.row_map;
831 auto values = localP.values;
832
833 bool checkRow = true;
834
835 if (rowPtr(rowIdx + 1) == rowPtr(rowIdx)) checkRow = false;
836
837 while (checkRow) {
838 // check constraints and compute the row sum
839
840 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
841 Rsum(rowIdx, entryIdx % nPDEs) += values(entryIdx);
842 if (KAT::real(values(entryIdx)) < KAT::real(zero)) {
843 ConstraintViolationSum(rowIdx, entryIdx % nPDEs) += values(entryIdx);
844 values(entryIdx) = zero;
845 } else {
846 if (KAT::real(values(entryIdx)) != KAT::real(zero))
847 nPositive(rowIdx, entryIdx % nPDEs) = nPositive(rowIdx, entryIdx % nPDEs) + 1;
848
849 if (KAT::real(values(entryIdx)) > KAT::real(1.00001)) {
850 ConstraintViolationSum(rowIdx, entryIdx % nPDEs) += (values(entryIdx) - one);
851 values(entryIdx) = one;
852 }
853 }
854 }
855
856 checkRow = false;
857
858 // take into account any row sum that violates the contraints
859
860 for (size_t k = 0; k < (size_t)nPDEs; k++) {
861 if (KAT::real(Rsum(rowIdx, k)) < KAT::magnitude(zero)) {
862 ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) - Rsum(rowIdx, k); // rstumin
863 } else if (KAT::real(Rsum(rowIdx, k)) > KAT::magnitude(1.00001)) {
864 ConstraintViolationSum(rowIdx, k) = ConstraintViolationSum(rowIdx, k) + (one - Rsum(rowIdx, k)); // rstumin
865 }
866 }
867
868 // check if row need modification
869 for (size_t k = 0; k < (size_t)nPDEs; k++) {
870 if (KAT::magnitude(ConstraintViolationSum(rowIdx, k)) != KAT::magnitude(zero))
871 checkRow = true;
872 }
873 // modify row
874 if (checkRow) {
875 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
876 if (KAT::real(values(entryIdx)) > KAT::real(zero)) {
877 values(entryIdx) = values(entryIdx) +
878 (ConstraintViolationSum(rowIdx, entryIdx % nPDEs) / (Scalar(nPositive(rowIdx, entryIdx % nPDEs)) != zero ? Scalar(nPositive(rowIdx, entryIdx % nPDEs)) : one));
879 }
880 }
881 for (size_t k = 0; k < (size_t)nPDEs; k++) ConstraintViolationSum(rowIdx, k) = zero;
882 }
883 for (size_t k = 0; k < (size_t)nPDEs; k++) Rsum(rowIdx, k) = zero;
884 for (size_t k = 0; k < (size_t)nPDEs; k++) nPositive(rowIdx, k) = 0;
885 } // while (checkRow) ...
886 }
887};
888
889template <typename local_matrix_type>
891 using Scalar = typename local_matrix_type::non_const_value_type;
892 using SC = Scalar;
893 using LO = typename local_matrix_type::non_const_ordinal_type;
894 using Device = typename local_matrix_type::device_type;
895 using KAT = KokkosKernels::ArithTraits<SC>;
896 const Scalar zero = KAT::zero();
897 const Scalar one = KAT::one();
899 local_matrix_type localP;
900 Kokkos::View<SC**, Device> origSorted;
901 Kokkos::View<SC**, Device> fixedSorted;
902 Kokkos::View<LO**, Device> inds;
903
904 optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)
905 : nPDEs(nPDEs_)
906 , localP(localP_) {
907 origSorted = Kokkos::View<SC**, Device>("origSorted", localP_.numRows(), maxRowEntries_);
908 fixedSorted = Kokkos::View<SC**, Device>("fixedSorted", localP_.numRows(), maxRowEntries_);
909 inds = Kokkos::View<LO**, Device>("inds", localP_.numRows(), maxRowEntries_);
910 }
911
912 KOKKOS_INLINE_FUNCTION
913 void operator()(const size_t rowIdx) const {
914 auto rowPtr = localP.graph.row_map;
915 auto values = localP.values;
916
917 auto nnz = rowPtr(rowIdx + 1) - rowPtr(rowIdx);
918
919 if (nnz != 0) {
920 Scalar rsumTarget = zero;
921 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) rsumTarget += values(entryIdx);
922
923 {
924 SC aBigNumber;
925 SC rowSumDeviation, temp, delta;
926 SC closestToLeftBoundDist, closestToRghtBoundDist;
927 LO closestToLeftBound, closestToRghtBound;
928 bool flipped;
929
930 SC leftBound = zero;
931 SC rghtBound = one;
932 if ((KAT::real(rsumTarget) >= KAT::real(leftBound * (static_cast<SC>(nnz)))) &&
933 (KAT::real(rsumTarget) <= KAT::real(rghtBound * (static_cast<SC>(nnz))))) { // has Feasible solution
934
935 flipped = false;
936 // compute aBigNumber to handle some corner cases where we need
937 // something large so that an if statement will be false
938 aBigNumber = KAT::zero();
939 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
940 if (KAT::magnitude(values(entryIdx)) > KAT::magnitude(aBigNumber))
941 aBigNumber = KAT::magnitude(values(entryIdx));
942 }
943 aBigNumber = aBigNumber + (KAT::magnitude(leftBound) + KAT::magnitude(rghtBound)) * (100 * one);
944
945 LO ind = 0;
946 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
947 origSorted(rowIdx, ind) = values(entryIdx);
948 inds(rowIdx, ind) = ind;
949 ind++;
950 }
951
952 auto sortVals = Kokkos::subview(origSorted, rowIdx, Kokkos::ALL());
953 auto sortInds = Kokkos::subview(inds, rowIdx, Kokkos::make_pair(0, ind));
954 // Need permutation indices to sort row values from smallest to largest,
955 // and unsort once row constraints are applied.
956 // serial insertion sort workaround from https://github.com/kokkos/kokkos/issues/645
957 for (LO i = 1; i < static_cast<LO>(nnz); ++i) {
958 ind = sortInds(i);
959 LO j = i;
960
961 if (KAT::real(sortVals(sortInds(i))) < KAT::real(sortVals(sortInds(0)))) {
962 for (; j > 0; --j) sortInds(j) = sortInds(j - 1);
963
964 sortInds(0) = ind;
965 } else {
966 for (; KAT::real(sortVals(ind)) < KAT::real(sortVals(sortInds(j - 1))); --j) sortInds(j) = sortInds(j - 1);
967
968 sortInds(j) = ind;
969 }
970 }
971
972 for (LO i = 0; i < static_cast<LO>(nnz); i++) origSorted(rowIdx, i) = values(rowPtr(rowIdx) + inds(rowIdx, i)); // values is no longer used
973 // find entry in origSorted just to the right of the leftBound
974 closestToLeftBound = 0;
975 while ((closestToLeftBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToLeftBound)) <= KAT::real(leftBound))) closestToLeftBound++;
976
977 // find entry in origSorted just to the right of the rghtBound
978 closestToRghtBound = closestToLeftBound;
979 while ((closestToRghtBound < static_cast<LO>(nnz)) && (KAT::real(origSorted(rowIdx, closestToRghtBound)) <= KAT::real(rghtBound))) closestToRghtBound++;
980
981 // compute distance between closestToLeftBound and the left bound and the
982 // distance between closestToRghtBound and the right bound.
983
984 closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
985 if (closestToRghtBound == static_cast<LO>(nnz))
986 closestToRghtBoundDist = aBigNumber;
987 else
988 closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
989
990 // compute how far the rowSum is off from the target row sum taking into account
991 // numbers that have been shifted to satisfy bound constraint
992
993 rowSumDeviation = leftBound * (static_cast<SC>(closestToLeftBound)) + (static_cast<SC>(nnz - closestToRghtBound)) * rghtBound - rsumTarget;
994 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation += origSorted(rowIdx, i);
995
996 // the code that follow after this if statement assumes that rowSumDeviation is positive. If this
997 // is not the case, flip the signs of everything so that rowSumDeviation is now positive.
998 // Later we will flip the data back to its original form.
999 if (KAT::real(rowSumDeviation) < KAT::real(KAT::zero())) {
1000 flipped = true;
1001 temp = leftBound;
1002 leftBound = -rghtBound;
1003 rghtBound = temp;
1004
1005 /* flip sign of origSorted and reverse ordering so that the negative version is sorted */
1006
1007 // TODO: the following is bad for GPU performance. Switch to bit shifting if brave.
1008 if ((nnz % 2) == 1) origSorted(rowIdx, (nnz / 2)) = -origSorted(rowIdx, (nnz / 2));
1009 for (LO i = 0; i < static_cast<LO>(nnz / 2); i++) {
1010 temp = origSorted(rowIdx, i);
1011 origSorted(rowIdx, i) = -origSorted(rowIdx, nnz - 1 - i);
1012 origSorted(rowIdx, nnz - i - 1) = -temp;
1013 }
1014
1015 /* reverse bounds */
1016
1017 LO itemp = closestToLeftBound;
1018 closestToLeftBound = nnz - closestToRghtBound;
1019 closestToRghtBound = nnz - itemp;
1020 closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
1021 if (closestToRghtBound == static_cast<LO>(nnz))
1022 closestToRghtBoundDist = aBigNumber;
1023 else
1024 closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
1025
1026 rowSumDeviation = -rowSumDeviation;
1027 }
1028
1029 // initial fixedSorted so bounds are satisfied and interiors correspond to origSorted
1030
1031 for (LO i = 0; i < closestToLeftBound; i++) fixedSorted(rowIdx, i) = leftBound;
1032 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i);
1033 for (LO i = closestToRghtBound; i < static_cast<LO>(nnz); i++) fixedSorted(rowIdx, i) = rghtBound;
1034
1035 while ((KAT::magnitude(rowSumDeviation) > KAT::magnitude((one * 1.e-10) * rsumTarget))) { // && ( (closestToLeftBound < nEntries ) || (closestToRghtBound < nEntries))) {
1036 if (closestToRghtBound != closestToLeftBound)
1037 delta = rowSumDeviation / static_cast<SC>(closestToRghtBound - closestToLeftBound);
1038 else
1039 delta = aBigNumber;
1040
1041 if (KAT::magnitude(closestToLeftBoundDist) <= KAT::magnitude(closestToRghtBoundDist)) {
1042 if (KAT::magnitude(delta) <= KAT::magnitude(closestToLeftBoundDist)) {
1043 rowSumDeviation = zero;
1044 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
1045 } else {
1046 rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
1047 fixedSorted(rowIdx, closestToLeftBound) = leftBound;
1048 closestToLeftBound++;
1049 if (closestToLeftBound < static_cast<LO>(nnz))
1050 closestToLeftBoundDist = origSorted(rowIdx, closestToLeftBound) - leftBound;
1051 else
1052 closestToLeftBoundDist = aBigNumber;
1053 }
1054 } else {
1055 if (KAT::magnitude(delta) <= KAT::magnitude(closestToRghtBoundDist)) {
1056 rowSumDeviation = 0;
1057 for (LO i = closestToLeftBound; i < closestToRghtBound; i++) fixedSorted(rowIdx, i) = origSorted(rowIdx, i) - delta;
1058 } else {
1059 rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
1060 // if (closestToRghtBound < nEntries) {
1061 fixedSorted(rowIdx, closestToRghtBound) = origSorted(rowIdx, closestToRghtBound);
1062 closestToRghtBound++;
1063 // }
1064 if (closestToRghtBound >= static_cast<LO>(nnz))
1065 closestToRghtBoundDist = aBigNumber;
1066 else
1067 closestToRghtBoundDist = origSorted(rowIdx, closestToRghtBound) - rghtBound;
1068 }
1069 }
1070 }
1071
1072 auto rowStart = rowPtr(rowIdx);
1073 if (flipped) {
1074 /* flip sign of fixedSorted and reverse ordering so that the positve version is sorted */
1075
1076 if ((nnz % 2) == 1) fixedSorted(rowIdx, (nnz / 2)) = -fixedSorted(rowIdx, (nnz / 2));
1077 for (LO i = 0; i < static_cast<LO>(nnz / 2); i++) {
1078 temp = fixedSorted(rowIdx, i);
1079 fixedSorted(rowIdx, i) = -fixedSorted(rowIdx, nnz - 1 - i);
1080 fixedSorted(rowIdx, nnz - i - 1) = -temp;
1081 }
1082 }
1083 // unsort and update row values with new values
1084 for (LO i = 0; i < static_cast<LO>(nnz); i++) values(rowStart + inds(rowIdx, i)) = fixedSorted(rowIdx, i);
1085
1086 } else { // row does not have feasible solution to match constraint
1087 // just set all entries to the same value giving a row sum of 1
1088 for (auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) values(entryIdx) = one / (static_cast<SC>(nnz));
1089 }
1090 }
1091 }
1092 }
1093};
1094
1095template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1097 using Device = typename Matrix::local_matrix_device_type::device_type;
1098 LO nPDEs = A->GetFixedBlockSize();
1099
1100 using local_mat_type = typename Matrix::local_matrix_device_type;
1101 constraintKernel<local_mat_type> myKernel(nPDEs, P->getLocalMatrixDevice());
1102 Kokkos::parallel_for("enforce constraint", Kokkos::RangePolicy<typename Device::execution_space>(0, P->getRowMap()->getLocalNumElements()),
1103 myKernel);
1104
1105} // SatsifyPConstraints()
1106
1107template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1109 using Device = typename Matrix::local_matrix_device_type::device_type;
1110 LO nPDEs = 1; // A->GetFixedBlockSize();
1111
1112 using local_mat_type = typename Matrix::local_matrix_device_type;
1113 optimalSatisfyConstraintsForScalarPDEsKernel<local_mat_type> myKernel(nPDEs, P->getLocalMaxNumRowEntries(), P->getLocalMatrixDevice());
1114 Kokkos::parallel_for("enforce constraint", Kokkos::RangePolicy<typename Device::execution_space>(0, P->getLocalNumRows()),
1115 myKernel);
1116
1117} // SatsifyPConstraints()
1118
1119template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1120RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> SaPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::JacobiMaxwell1(const RCP<Matrix>& SM,
1121 const RCP<Matrix>& Kn,
1122 const RCP<Matrix>& D0,
1123 const RCP<Matrix>& Pe_tent,
1124 const Scalar beta) const {
1125 // Compute Pe := (I - \beta D0 * diag(Kn)^-1 * D0^T SM) * Pe_tent
1126 using XMM = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
1127 auto one = Teuchos::ScalarTraits<Scalar>::one();
1128
1129 auto diagKnInv = Utilities::GetMatrixDiagonalInverse(*Kn);
1130
1131 auto D0_diagKnInv = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BuildCopy(D0, Teuchos::Copy);
1132 D0_diagKnInv->rightScale(*diagKnInv);
1133
1134 RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> dummy;
1135 auto D0_diagKnInv_D0T = XMM::Multiply(*D0_diagKnInv, false, *D0, true, dummy, GetOStream(Runtime0));
1136 D0_diagKnInv = Teuchos::null;
1137
1138 auto SM_Pe_tent = XMM::Multiply(*SM, false, *Pe_tent, false, dummy, GetOStream(Runtime0));
1139 auto D0_diagKnInv_D0T_SM_Pe_tent = XMM::Multiply(*D0_diagKnInv_D0T, false, *SM_Pe_tent, false, dummy, GetOStream(Runtime0));
1140
1141 RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Pe;
1142 XMM::TwoMatrixAdd(*Pe_tent, false, one, *D0_diagKnInv_D0T_SM_Pe_tent, false, -beta, Pe, GetOStream(Runtime0));
1143 Pe->fillComplete(Pe_tent->getDomainMap(), Pe_tent->getRangeMap());
1144
1145 return Pe;
1146}
1147
1148} // namespace MueLu
1149
1150#endif // MUELU_SAPFACTORY_DEF_HPP
1151
1152// TODO: restrictionMode_ should use the parameter list.
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Jacobi(typename Teuchos::ScalarTraits< Scalar >::magnitudeType omega, const Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Dinv, const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > C_in, Teuchos::FancyOStream &fos, const std::string &label, RCP< ParameterList > &params)
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
void SatisfyPConstraints(RCP< Matrix > A, RCP< Matrix > &P) const
void Build(Level &fineLevel, Level &coarseLevel) const
virtual ~SaPFactory()
Destructor.
void optimalSatisfyPConstraintsForScalarPDEsNonKokkos(RCP< Matrix > &P) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void optimalSatisfyPConstraintsForScalarPDEs(RCP< Matrix > &P) const
bool constrainRow(Scalar *orig, LocalOrdinal nEntries, Scalar leftBound, Scalar rghtBound, Scalar rsumTarget, Scalar *fixedUnsorted, Scalar *scalarData) const
SaPFactory()
Constructor. User can supply a factory for generating the tentative prolongator.
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void SatisfyPConstraintsNonKokkos(RCP< Matrix > A, RCP< Matrix > &P) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
RCP< Matrix > JacobiMaxwell1(const RCP< Matrix > &SM, const RCP< Matrix > &Kn, const RCP< Matrix > &D0, const RCP< Matrix > &Pe_tent, const Scalar beta) const
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static Scalar PowerMethod(const Matrix &A, bool scaleByDiag=true, LocalOrdinal niters=10, Magnitude tolerance=1e-2, Magnitude diagonalReplacementTol=Teuchos::ScalarTraits< Scalar >::eps() *100, bool verbose=false, unsigned int seed=123)
Power method.
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps() *100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool doLumped=false)
Extract Matrix Diagonal.
static Teuchos::RCP< Vector > GetLumpedMatrixDiagonal(Matrix const &A, const bool doReciprocal=false, Magnitude tol=Teuchos::ScalarTraits< Scalar >::magnitude(Teuchos::ScalarTraits< Scalar >::zero()), Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool replaceSingleEntryRowWithZero=false, const bool useAverageAbsDiagVal=false)
Extract Matrix Diagonal of lumped matrix.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Namespace for MueLu classes and methods.
@ Statistics2
Print even more statistics.
@ Warnings
Print all warning messages.
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
scalar_type dampingFactor
typename local_matrix_type::device_type Device
typename local_matrix_type::non_const_value_type Scalar
KokkosKernels::ArithTraits< SC > KAT
Kokkos::View< SC **, Device > ConstraintViolationSum
Kokkos::View< size_t **, Device > nPositive
constraintKernel(LO nPDEs_, local_matrix_type localP_)
typename local_matrix_type::non_const_ordinal_type LO
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
Kokkos::View< SC **, Device > Rsum
typename local_matrix_type::non_const_value_type Scalar
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)
typename local_matrix_type::non_const_ordinal_type LO