MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_ReitzingerPFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_REITZINGERPFACTORY_DEF_HPP
11#define MUELU_REITZINGERPFACTORY_DEF_HPP
12
13#include <Xpetra_MapFactory.hpp>
14#include <Xpetra_Map.hpp>
15#include <Xpetra_CrsMatrix.hpp>
16#include <Xpetra_Matrix.hpp>
17#include <Xpetra_MatrixMatrix.hpp>
18#include <Xpetra_MultiVector.hpp>
19#include <Xpetra_VectorFactory.hpp>
20#include <Xpetra_Import.hpp>
21#include <Xpetra_ImportFactory.hpp>
22#include <Xpetra_CrsMatrixWrap.hpp>
23// #include <Xpetra_IO.hpp>
24
26
27#include <Teuchos_ScalarTraits.hpp>
28
29#include "MueLu_MasterList.hpp"
30#include "MueLu_Monitor.hpp"
31#include "MueLu_Utilities.hpp"
32#include "MueLu_ImportUtils.hpp"
33
34#include "MueLu_Behavior.hpp"
35
36namespace MueLu {
37
38template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
40 RCP<ParameterList> validParamList = rcp(new ParameterList());
41
42#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
43 SET_VALID_ENTRY("repartition: enable");
44 SET_VALID_ENTRY("repartition: use subcommunicators");
45 SET_VALID_ENTRY("tentative: calculate qr");
46 SET_VALID_ENTRY("tentative: constant column sums");
47#undef SET_VALID_ENTRY
48
49 validParamList->set<RCP<const FactoryBase> >("D0", Teuchos::null, "Generating factory of the matrix D0");
50 validParamList->set<RCP<const FactoryBase> >("NodeAggMatrix", Teuchos::null, "Generating factory of the matrix NodeAggMatrix");
51 validParamList->set<RCP<const FactoryBase> >("Pnodal", Teuchos::null, "Generating factory of the matrix P");
52
53 // Make sure we don't recursively validate options for the matrixmatrix kernels
54 ParameterList norecurse;
55 norecurse.disableRecursiveValidation();
56 validParamList->set<ParameterList>("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
57
58 return validParamList;
59}
60
61template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
63 Input(fineLevel, "D0");
64 Input(coarseLevel, "NodeAggMatrix");
65 Input(coarseLevel, "Pnodal");
66}
67
68template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
70 return BuildP(fineLevel, coarseLevel);
71}
72
73template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
75 FactoryMonitor m(*this, "Build", coarseLevel);
76
77 using XMM = Xpetra::MatrixMatrix<SC, LO, GO, NO>;
78 using local_matrix_type = typename Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_type;
79 using rowptr_type = typename local_matrix_type::row_map_type::non_const_type;
80 using colidx_type = typename local_matrix_type::index_type::non_const_type;
81 using values_type = typename local_matrix_type::values_type::non_const_type;
82
83 using impl_scalar_type = typename Matrix::impl_scalar_type;
84 using ATS = KokkosKernels::ArithTraits<impl_scalar_type>;
85 using mag_type = typename KokkosKernels::ArithTraits<impl_scalar_type>::magnitudeType;
86 using magATS = KokkosKernels::ArithTraits<mag_type>;
87
88 using execution_space = typename Node::execution_space;
89 using memory_space = typename Node::memory_space;
90
91 const auto one_Scalar = Teuchos::ScalarTraits<Scalar>::one();
92 const auto one_impl_scalar = ATS::one();
93 const auto zero_LO = KokkosKernels::ArithTraits<LocalOrdinal>::zero();
94 const auto one_LO = KokkosKernels::ArithTraits<LocalOrdinal>::one();
95 const auto one_mag = magATS::one();
96 const auto eps_mag = magATS::epsilon();
97 const auto INVALID_GO = Teuchos::OrdinalTraits<GlobalOrdinal>::invalid();
98
99 // Using a nodal prolongator Pn and the discrete gradient matrix D0, this factory constructs
100 // a coarse discrete gradient matrix D0H and an edge prolongator Pe such that the commuting
101 // relationship
102 //
103 // D0 * Pn = Pe * D0H
104 //
105 // holds.
106
107 // The construction of the coarse discrete gradient works as follows.
108 // We create edges between aggregates that contain at least one pair of connected nodes.
109 // This boils down to computing the matrix
110 //
111 // Z := (D0*Pn)^T * (D0 * Pn).
112 //
113 // If Z_ij != 0 we create an edge e between the nodal aggregates i and j.
114 // Z is clearly symmetric. We only create a single edge between i and j.
115 // In the distributed case, we also need to decide which rank owns the coarse edge e.
116 // If both endpoints i and j live on process proc0 then proc0 should obiously own the edge e.
117 // If i lives on proc0 and j lives on proc1, we tie-break based on the rule
118 //
119 // min{proc0, proc1} if proc0+proc1 is odd,
120 // max{proc0, proc1} if proc0+proc1 is even.
121 //
122 // The orientation of the edge (encoded in the values 1 and -1) is determined by the GIDs of the endpoints.
123 // All edges point from smaller GID to larger GID, i.e. i < j.
124
125 // We also perform detection of boundary condtions and add additional edges to the coarse discrete gradient.
126 // We detect all edges in D0 that only connect to a single node.
127
128 Teuchos::FancyOStream& out0 = GetBlackHole();
129 const ParameterList& pL = GetParameterList();
130
131 bool update_communicators = pL.get<bool>("repartition: enable") && pL.get<bool>("repartition: use subcommunicators");
132
133 RCP<Matrix> D0 = Get<RCP<Matrix> >(fineLevel, "D0");
134 RCP<Matrix> Pn = Get<RCP<Matrix> >(coarseLevel, "Pnodal");
135
136 // This needs to be an Operator because if NodeMatrix gets repartitioned away, we get an Operator on the level
137 RCP<Operator> CoarseNodeMatrix = Get<RCP<Operator> >(coarseLevel, "NodeAggMatrix");
138
139 // Matrix matrix params
140 RCP<ParameterList> mm_params = rcp(new ParameterList);
141 if (pL.isSublist("matrixmatrix: kernel params"))
142 mm_params->sublist("matrixmatrix: kernel params") = pL.sublist("matrixmatrix: kernel params");
143
144 if (Behavior::debug()) { // Check that Pn is piecewise constant
145 // TODO: Should this be a debug-only check?
146
147 auto vec_ones = VectorFactory::Build(Pn->getDomainMap(), false);
148 vec_ones->putScalar(one_Scalar);
149 auto vec_rowsums = VectorFactory::Build(Pn->getRangeMap(), false);
150 Pn->apply(*vec_ones, *vec_rowsums, Teuchos::NO_TRANS);
151
152 auto lclPn = Pn->getLocalMatrixDevice();
153 auto lclRowSums = vec_rowsums->getLocalViewDevice(Tpetra::Access::ReadOnly);
154
155 bool all_entries_ok = true;
156 Kokkos::parallel_reduce(
157 Kokkos::RangePolicy<execution_space>(0, lclPn.numRows()), KOKKOS_LAMBDA(const LocalOrdinal rlid, bool& entries_ok) {
158 // rowsums are 1
159 entries_ok = entries_ok && (ATS::magnitude(lclRowSums(rlid, 0) - one_impl_scalar) < eps_mag);
160
161 // all nonzero entries are 1
162 auto row = lclPn.rowConst(rlid);
163 for (LocalOrdinal k = 0; k < row.length; ++k) {
164 entries_ok = entries_ok && (ATS::magnitude(row.value(k)-one_impl_scalar) < eps_mag);
165
166 } }, Kokkos::LAnd<bool>(all_entries_ok));
167
168 TEUCHOS_TEST_FOR_EXCEPTION(!all_entries_ok, std::runtime_error, "The prolongator needs to be piecewise constant and all entries need to be 1.");
169 }
170
171 RCP<Matrix> D0_Pn;
172 RCP<Matrix> D0H;
173 LocalOrdinal numCoarseEdges = 0;
174 LocalOrdinal numCoarseRegularEdges = 0;
175 LocalOrdinal numCoarseDirichletEdges = 0;
176 auto isDirichletFineEdge = Xpetra::VectorFactory<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(D0->getRowMap(), false);
177 auto numFineEdges = isDirichletFineEdge->getMap()->getLocalNumElements();
178 {
179 // Construct D0*Pn and Z := (D0*Pn)^T * (D0*Pn)
180 RCP<Matrix> dummy;
181 D0_Pn = XMM::Multiply(*D0, false, *Pn, false, dummy, GetOStream(Runtime0), true, true);
182 RCP<Matrix> Z = XMM::Multiply(*D0_Pn, true, *D0_Pn, false, dummy, GetOStream(Runtime0), true, true);
183
184 auto rowMap = Z->getRowMap();
185 auto colMap = Z->getColMap();
186 auto lclRowMap = rowMap->getLocalMap();
187 auto lclColMap = colMap->getLocalMap();
188 auto lclZ = Z->getLocalMatrixDevice();
189 auto numLocalRows = lclZ.numRows();
190
191 if (Behavior::debug()) {
192 TEUCHOS_ASSERT(Utilities::MapsAreNested(*rowMap, *colMap));
193 }
194
195 auto importer = Z->getCrsGraph()->getImporter();
196 // TODO: replace with Kokkos once PID data lives on device
197 Teuchos::Array<int> Z_col_pids;
198 Kokkos::View<int*, memory_space> Z_col_pids_d;
199 if (!importer.is_null()) {
201 utils.getPids(*importer, Z_col_pids, false);
202 Kokkos::View<int*, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > Z_col_pids_h(Z_col_pids.data(), Z_col_pids.size());
203 Z_col_pids_d = Kokkos::View<int*, memory_space>("Z_col_pids_d", Z_col_pids.size());
204 Kokkos::deep_copy(Z_col_pids_d, Z_col_pids_h);
205 }
206
207 int myProcId = rowMap->getComm()->getRank();
208
209 // Tie-break criterion for owner of coarse edges
210 auto tie_break = KOKKOS_LAMBDA(int proc0, int proc1) {
211 if ((proc0 + proc1) % 2 == 1) {
212 return Kokkos::min(proc0, proc1);
213 } else {
214 return Kokkos::max(proc0, proc1);
215 }
216 };
217
218 // Utility function to determine whether we need to add a coarse edge for entry
219 // (rlid, clid) of Z.
220 auto add_edge = KOKKOS_LAMBDA(LocalOrdinal rlid, LocalOrdinal clid) {
221 if (clid < numLocalRows) {
222 // Both row and column index are local, this process owns the new edge.
223 // Only create one edge between rlid and clid and ignore the transposed entry.
224 return (rlid < clid);
225 } else {
226 // Column index is nonlocal. Need to decide if this process owns the new edge.
227 int otherProcId = Z_col_pids_d(clid);
228 int owner = tie_break(myProcId, otherProcId);
229 return (owner == myProcId);
230 }
231 };
232
233 // Count up how many coarse regular edges we are creating.
234 Kokkos::parallel_reduce(
235 Kokkos::RangePolicy<execution_space>(0, numLocalRows), KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& ne) {
236 auto row = lclZ.rowConst(rlid);
237 // Loop over entries in row of Z
238 for (LocalOrdinal k = 0; k < row.length; ++k) {
239 auto clid = row.colidx(k);
240 if (add_edge(rlid, clid))
241 ++ne;
242 }
243 },
244 numCoarseRegularEdges);
245
246 // Mark as singleParents any D0 edges with only one node (so these are
247 // edges that connect an interior node with a Dirichlet node).
248 // isDirichletFineEdge is 1 for edges with a single endpoint and 0 otherwise.
249 using LOMatrix = Tpetra::CrsMatrix<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>;
250 RCP<LOMatrix> D0_LocalOrdinal;
251 {
252 // We want to do a apply using D0 on vectors with Scalar=LocalOrdinal.
253 // Something like this could work and would not require any memory allocations, but it requires
254 // "convert" to be ETI'd for all possible scalar types.
255
256 // toTpetra(D0)->template convert<LocalOrdinal>()->apply(*toTpetra(oneVec), *toTpetra(isDirichletFineEdge), Teuchos::NO_TRANS);
257
258 using lo_local_matrix_type = typename LOMatrix::local_matrix_device_type;
259
260 auto lclGraph = D0->getCrsGraph()->getLocalGraphDevice();
261 Kokkos::View<LocalOrdinal*, memory_space> values(Kokkos::ViewAllocateWithoutInitializing("values_LocalOrdinal"), D0->getLocalNumEntries());
262 {
263 auto values_scalar = D0->getLocalMatrixDevice().values;
264 Kokkos::parallel_for(
265 "MueLu::ReitzingerPFactory::convert", Kokkos::RangePolicy<execution_space>(0, values.extent(0)), KOKKOS_LAMBDA(const size_t i) {
266 if (values_scalar(i) == one_impl_scalar)
267 values(i) = one_LO;
268 else if (values_scalar(i) == -one_impl_scalar)
269 values(i) = -one_LO;
270 else
271 Kokkos::abort("D0 contains bad values");
272 });
273 }
274 lo_local_matrix_type lclMatrix("D0_LocalOrdinal", D0->getLocalMatrixDevice().numCols(), values, lclGraph);
275
276 D0_LocalOrdinal = rcp(new LOMatrix(lclMatrix, toTpetra(D0->getRowMap()), toTpetra(D0->getColMap()), toTpetra(D0->getDomainMap()), toTpetra(D0->getRangeMap())));
277 }
278 {
279 auto oneVec = Xpetra::VectorFactory<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(D0->getDomainMap(), false);
280 oneVec->putScalar(KokkosKernels::ArithTraits<LocalOrdinal>::one());
281 D0_LocalOrdinal->apply(*toTpetra(oneVec), *toTpetra(isDirichletFineEdge), Teuchos::NO_TRANS);
282
283 auto lcl_isDirichletFineEdge = isDirichletFineEdge->getLocalViewDevice(Tpetra::Access::ReadWrite);
284 auto lcl_D0_Pn = D0_Pn->getLocalMatrixDevice();
285 Kokkos::parallel_for(
286 Kokkos::RangePolicy<execution_space>(0, lcl_isDirichletFineEdge.extent(0)), KOKKOS_LAMBDA(const LocalOrdinal i) {
287 if (ATS::magnitude(ATS::magnitude(lcl_isDirichletFineEdge(i, 0)) - one_mag) > eps_mag) {
288 // This is a regular edge with two end points.
289 lcl_isDirichletFineEdge(i, 0) = zero_LO;
290 } else {
291 // This is an edge with one endpoint.
292 lcl_isDirichletFineEdge(i, 0) = one_LO;
293 }
294 });
295 }
296
297 // Count the number of fine Dirichlet edges that are connected to every coarse nodal aggregate via
298 // the graph of D0*Pn.
299 auto numberConnectedFineDirichletEdgesToCoarseNode = Xpetra::VectorFactory<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(D0_Pn->getDomainMap(), false);
300 {
301 auto abs_D0_Pn = LOMatrix(toTpetra(D0_Pn->getCrsGraph()));
302 abs_D0_Pn.fillComplete(toTpetra(D0_Pn->getDomainMap()), toTpetra(D0_Pn->getRangeMap()));
303 abs_D0_Pn.setAllToScalar(KokkosKernels::ArithTraits<LocalOrdinal>::one());
304 abs_D0_Pn.apply(*toTpetra(isDirichletFineEdge), *toTpetra(numberConnectedFineDirichletEdgesToCoarseNode), Teuchos::TRANS);
305 }
306
307 // Count local Dirichlet coarse edges
308 {
309 auto lcl_numberConnectedFineDirichletEdgesToCoarseNode = numberConnectedFineDirichletEdgesToCoarseNode->getLocalViewDevice(Tpetra::Access::ReadOnly);
310
311 Kokkos::parallel_reduce(
312 Kokkos::RangePolicy<execution_space>(0, lcl_numberConnectedFineDirichletEdgesToCoarseNode.extent(0)),
313 KOKKOS_LAMBDA(const LocalOrdinal i, LocalOrdinal& ne) {
314 if (ATS::magnitude(lcl_numberConnectedFineDirichletEdgesToCoarseNode(i, 0)) > eps_mag) {
315 ++ne;
316 }
317 },
318 numCoarseDirichletEdges);
319 }
320
321 if (IsPrint(Statistics0)) {
322 LocalOrdinal numGlobalRegularEdges;
323 LocalOrdinal numGlobalDirichletEdges;
324 MueLu_sumAll(rowMap->getComm(), numCoarseRegularEdges, numGlobalRegularEdges);
325 MueLu_sumAll(rowMap->getComm(), numCoarseDirichletEdges, numGlobalDirichletEdges);
326 GetOStream(Statistics0) << "regular edges: " << numGlobalRegularEdges << ", Dirichlet edges: " << numGlobalDirichletEdges << std::endl;
327 }
328
329 numCoarseEdges = numCoarseRegularEdges + numCoarseDirichletEdges;
330 rowptr_type rowptr(Kokkos::ViewAllocateWithoutInitializing("rowptr D0H"), numCoarseEdges + 1);
331 // 2 entries per regular edge, 1 entry per Dirichlet edge
332 LocalOrdinal nnz = 2 * numCoarseRegularEdges + numCoarseDirichletEdges;
333 colidx_type colidx(Kokkos::ViewAllocateWithoutInitializing("colidx D0H"), nnz);
334 values_type values(Kokkos::ViewAllocateWithoutInitializing("values D0H"), nnz);
335
336 // Fill regular edges
337 Kokkos::parallel_scan(
338 Kokkos::RangePolicy<execution_space>(0, numLocalRows),
339 KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& ne, const bool update) {
340 auto row = lclZ.rowConst(rlid);
341 if (!update) {
342 // First pass: figure out offsets for entries.
343 for (LocalOrdinal k = 0; k < row.length; ++k) {
344 auto clid = row.colidx(k);
345 if (add_edge(rlid, clid))
346 ++ne;
347 }
348 } else {
349 // Second pass: enter entries
350
351 // initialize
352 if (rlid == 0)
353 rowptr(rlid) = 0;
354
355 auto rgid = lclRowMap.getGlobalElement(rlid);
356 auto rclid = lclColMap.getLocalElement(rgid);
357
358 // loop over entries in row of Z
359 for (LocalOrdinal k = 0; k < row.length; ++k) {
360 auto clid = row.colidx(k);
361 if (add_edge(rlid, clid)) {
362 auto cgid = lclColMap.getGlobalElement(clid);
363 // enter the two end-points of the edge, orient edge based on GIDs of nodal endpoints
364 colidx(2 * ne) = rclid;
365 colidx(2 * ne + 1) = clid;
366 if (rgid < cgid) {
367 values(2 * ne) = -one_impl_scalar;
368 values(2 * ne + 1) = one_impl_scalar;
369 } else {
370 values(2 * ne) = one_impl_scalar;
371 values(2 * ne + 1) = -one_impl_scalar;
372 }
373 ++ne;
374 rowptr(ne) = 2 * ne;
375 }
376 }
377 }
378 });
379
380 // Fill Dirichlet edges
381 // Create one coarse Dirichlet edge for every nodal aggregate that is connected to at least one fine Dirichlet edge.
382 {
383 auto lcl_numberConnectedFineDirichletEdgesToCoarseNode = numberConnectedFineDirichletEdgesToCoarseNode->getLocalViewDevice(Tpetra::Access::ReadOnly);
384 Kokkos::parallel_scan(
385 Kokkos::RangePolicy<execution_space>(0, lcl_numberConnectedFineDirichletEdgesToCoarseNode.extent(0)),
386 KOKKOS_LAMBDA(const LocalOrdinal agg_lid, LocalOrdinal& ne, const bool update) {
387 if (ATS::magnitude(lcl_numberConnectedFineDirichletEdgesToCoarseNode(agg_lid, 0)) > eps_mag) {
388 if (!update) {
389 // First pass: figure out offsets
390 ++ne;
391 } else {
392 // Second pass: fill
393 colidx(2 * numCoarseRegularEdges + ne) = agg_lid;
394 values(2 * numCoarseRegularEdges + ne) = one_impl_scalar;
395 ++ne;
396 rowptr(numCoarseRegularEdges + ne) = 2 * numCoarseRegularEdges + ne;
397 }
398 }
399 });
400 }
401
402 auto D0H_rowmap = MapFactory::Build(rowMap->lib(), INVALID_GO, numCoarseEdges, 0, rowMap->getComm());
403 auto lclD0H = local_matrix_type("D0H", numCoarseEdges, colMap->getLocalNumElements(), nnz, values, rowptr, colidx);
404
405 // Construct distributed matrix
406 D0H = MatrixFactory::Build(lclD0H, D0H_rowmap, colMap, Z->getDomainMap(), D0H_rowmap);
407 }
408
409 // Create the Pe matrix, but with the extra entries. From ML's notes:
410 /* The general idea is that the matrix */
411 /* T_h P_n T_H^* */
412 /* is almost Pe. If we make sure that P_n contains 1's and -1's, the*/
413 /* matrix triple product will yield a matrix with +/- 1 and +/- 2's.*/
414 /* If we remove all the 1's and divide the 2's by 2. we arrive at Pe*/
415
416 RCP<Matrix> D0_Pn_D0HT;
417 {
418 SubFactoryMonitor m2(*this, "Generate Pe (pre-fix)", coarseLevel);
419#if 0
420 {
421 // If you're concerned about processor / rank mismatches, this debugging code might help
422 int rank = D0->getRowMap()->getComm()->getRank();
423 int fine_level = fineLevel.GetLevelID();
424 printf("[%d] Level %d Checkpoint #2 Pn = %d/%d/%d/%d D0c = %d/%d/%d/%d D0 = %d/%d/%d/%d\n",rank,fine_level,
425 Pn->getRangeMap()->getComm()->getSize(),
426 Pn->getRowMap()->getComm()->getSize(),
427 Pn->getColMap()->getComm()->getSize(),
428 Pn->getDomainMap()->getComm()->getSize(),
429 D0H->getRangeMap()->getComm()->getSize(),
430 D0H->getRowMap()->getComm()->getSize(),
431 D0H->getColMap()->getComm()->getSize(),
432 D0H->getDomainMap()->getComm()->getSize(),
433 D0->getRangeMap()->getComm()->getSize(),
434 D0->getRowMap()->getComm()->getSize(),
435 D0->getColMap()->getComm()->getSize(),
436 D0->getDomainMap()->getComm()->getSize());
437 fflush(stdout);
438 D0->getRowMap()->getComm()->barrier();
439 }
440#endif
441 RCP<Matrix> dummy;
442 RCP<Matrix> Pn_D0cT = XMM::Multiply(*Pn, false, *D0H, true, dummy, out0, true, true, "Pn*D0c'", mm_params);
443
444 // We don't want this guy getting accidently used later
445 if (!mm_params.is_null()) mm_params->remove("importer", false);
446
447 D0_Pn_D0HT = XMM::Multiply(*D0, false, *Pn_D0cT, false, dummy, out0, true, true, "D0*(Pn*D0c')", mm_params);
448
449 // TODO: Something like this *might* work. But this specifically, doesn't
450 // Pe = XMM::Multiply(*D0_Pn_nonghosted,false,*D0H,true,dummy,out0,true,true,"(D0*Pn)*D0c'",mm_params);
451 }
452
453 RCP<Matrix> Pe;
454 {
455 auto lcl_D0_Pn = D0_Pn->getLocalMatrixDevice();
456 auto lcl_D0_Pn_D0HT = D0_Pn_D0HT->getLocalMatrixDevice();
457 auto lcl_isDirichletFineEdge = isDirichletFineEdge->getLocalViewDevice(Tpetra::Access::ReadOnly);
458
459 auto lcl_colmap_D0_Pn_D0HT = D0_Pn_D0HT->getColMap()->getLocalMap();
460
461 const auto half = one_impl_scalar / (one_impl_scalar + one_impl_scalar);
462
463 // overallocate by 1 to allow for easier counting
464 rowptr_type Pe_rowptr("Pe_rowptr", numFineEdges + 2);
465
466 // count entries per row
467 Kokkos::parallel_for(
468 "Pe_count_entries", Kokkos::RangePolicy<execution_space>(0, numFineEdges), KOKKOS_LAMBDA(const LocalOrdinal fineEdge) {
469 if (lcl_isDirichletFineEdge(fineEdge, 0) != one_LO) {
470 // regular fine edge
471 auto row = lcl_D0_Pn_D0HT.rowConst(fineEdge);
472 for (int k = 0; k < row.length; ++k) {
473 auto val = row.value(k);
474 // filter out entries +-1 and 0
475 if (!((ATS::magnitude(val - one_impl_scalar) < eps_mag) || (ATS::magnitude(val + one_impl_scalar) < eps_mag) || (ATS::magnitude(val) < eps_mag))) {
476 // add entry (fineEdge, clid) -> val/2.
477 ++Pe_rowptr(fineEdge + 2);
478 }
479 }
480 } else {
481 // Dirichlet interior fine edge
482 ++Pe_rowptr(fineEdge + 2);
483 }
484 });
485
486 // prefix sum
487 LocalOrdinal Pe_nnz;
488 Kokkos::parallel_scan(
489 "Pe_prefix_sum", Kokkos::RangePolicy<execution_space>(0, numFineEdges + 2), KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& nnz, const bool update) {
490 nnz += Pe_rowptr(rlid);
491 if (update) {
492 Pe_rowptr(rlid) = nnz;
493 }
494 },
495 Pe_nnz);
496
497 // allocate view for indices and values
498 colidx_type Pe_colidx("Pe_colidx", Pe_nnz);
499 values_type Pe_values("Pe_values", Pe_nnz);
500
501 // We build the mapping from coarse nodes lids wrt column map of D0_Pn to coarse edge gids.
502 RCP<GOVector> map_coarseNodes_colMap_D0_Pn_to_coarseEdges;
503 {
504 auto map_coarseEdges_rowMap_D0H_to_coarseEdges = Xpetra::VectorFactory<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(D0H->getRowMap());
505 {
506 auto lcl_map_coarseEdges_rowMap_D0H_to_coarseEdges = map_coarseEdges_rowMap_D0H_to_coarseEdges->getLocalViewDevice(Tpetra::Access::OverwriteAll);
507 auto lclMap = D0H->getRowMap()->getLocalMap();
508 Kokkos::parallel_for(
509 Kokkos::RangePolicy<execution_space>(numCoarseRegularEdges, numCoarseEdges), KOKKOS_LAMBDA(const LocalOrdinal coarseEdge) {
510 lcl_map_coarseEdges_rowMap_D0H_to_coarseEdges(coarseEdge, 0) = lclMap.getGlobalElement(coarseEdge);
511 });
512 }
513 auto map_coarseNodes_domainMap_D0_Pn_to_coarseEdges = Xpetra::VectorFactory<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(D0H->getDomainMap());
514 {
515 // We want to do a transpose apply using D0H on vectors with Scalar=GlobalOrdinal.
516 // Something like this could work and would not require any memory allocations, but it requires
517 // "convert" to be ETI'd for all possible scalar types.
518
519 // toTpetra(D0H)->template convert<GlobalOrdinal>()->apply(*toTpetra(map_coarseEdges_rowMap_D0H_to_coarseEdges), *toTpetra(map_coarseNodes_domainMap_D0_Pn_to_coarseEdges), Teuchos::TRANS);
520
521 using GOMatrix = Tpetra::CrsMatrix<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, Node>;
522 using go_local_matrix_type = typename GOMatrix::local_matrix_device_type;
523
524 auto lclGraph = D0H->getCrsGraph()->getLocalGraphDevice();
525 typename go_local_matrix_type::values_type::non_const_type ones("ones_GlobalOrdinal", D0H->getLocalNumEntries());
526 const auto one_GO = KokkosKernels::ArithTraits<typename go_local_matrix_type::values_type::value_type>::one();
527 Kokkos::deep_copy(ones, one_GO);
528
529 go_local_matrix_type lclMatrix("D0H_GlobalOrdinal", D0H->getLocalMatrixDevice().numCols(), ones, lclGraph);
530
531 auto D0H_GlobalOrdinal = GOMatrix(lclMatrix, toTpetra(D0H->getRowMap()), toTpetra(D0H->getColMap()), toTpetra(D0H->getDomainMap()), toTpetra(D0H->getRangeMap()));
532 D0H_GlobalOrdinal.apply(*toTpetra(map_coarseEdges_rowMap_D0H_to_coarseEdges), *toTpetra(map_coarseNodes_domainMap_D0_Pn_to_coarseEdges), Teuchos::TRANS);
533 }
534
535 auto importer = D0_Pn->getCrsGraph()->getImporter();
536 if (!importer.is_null()) {
537 map_coarseNodes_colMap_D0_Pn_to_coarseEdges = Xpetra::VectorFactory<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(D0_Pn->getColMap());
538 map_coarseNodes_colMap_D0_Pn_to_coarseEdges->doImport(*map_coarseNodes_domainMap_D0_Pn_to_coarseEdges, *importer, Xpetra::INSERT);
539 } else {
540 map_coarseNodes_colMap_D0_Pn_to_coarseEdges = map_coarseNodes_domainMap_D0_Pn_to_coarseEdges;
541 }
542 }
543 {
544 auto lcl_map_coarseNodes_colMap_D0_Pn_to_coarseEdges = map_coarseNodes_colMap_D0_Pn_to_coarseEdges->getLocalViewDevice(Tpetra::Access::ReadOnly);
545
546 // fill
547 Kokkos::parallel_for(
548 "Pe_fill", Kokkos::RangePolicy<execution_space>(0, numFineEdges), KOKKOS_LAMBDA(const LocalOrdinal fineEdge_lid) {
549 if (lcl_isDirichletFineEdge(fineEdge_lid, 0) != one_LO) {
550 // regular fine edge
551 auto row = lcl_D0_Pn_D0HT.rowConst(fineEdge_lid);
552 for (int k = 0; k < row.length; ++k) {
553 auto val = row.value(k);
554 if (!((ATS::magnitude(val - one_impl_scalar) < eps_mag) || (ATS::magnitude(val + one_impl_scalar) < eps_mag) || (ATS::magnitude(val) < eps_mag))) {
555 auto clid = row.colidx(k);
556 // add entry (fineEdge_lid, clid) -> val/2.
557 auto offset = Pe_rowptr(fineEdge_lid + 1);
558 Pe_colidx(offset) = clid;
559 Pe_values(offset) = val * half;
560 ++Pe_rowptr(fineEdge_lid + 1);
561 }
562 }
563 } else {
564 // Dirichlet interior fine edge
565 // Only one nonzero entry in row of D0_Pn: (fineEdge_lid, coarseNode_lid_D0_Pn) -> val_D0_Pn
566 for (auto offset_D0_Pn = lcl_D0_Pn.graph.row_map(fineEdge_lid); offset_D0_Pn < lcl_D0_Pn.graph.row_map(fineEdge_lid + 1); ++offset_D0_Pn) {
567 LocalOrdinal coarseNode_lid_D0_Pn = lcl_D0_Pn.graph.entries(offset_D0_Pn);
568 impl_scalar_type val_D0_Pn = lcl_D0_Pn.values(offset_D0_Pn);
569 if (ATS::magnitude(val_D0_Pn) > eps_mag) {
570 GlobalOrdinal coarseEdge_gid = lcl_map_coarseNodes_colMap_D0_Pn_to_coarseEdges(coarseNode_lid_D0_Pn, 0);
571
572 auto coarseEdge_lid_D0_Pn_D0HT = lcl_colmap_D0_Pn_D0HT.getLocalElement(coarseEdge_gid);
573
574 // We rely on the fact that all coarse interior edges have been created with value 1.
575 const auto val_D0H = one_impl_scalar;
576
577 // add entry (fineEdge_lid, coarseEdge) -> val_D0_Pn/val_D0H to edge prolongator
578 auto offset_Pe = Pe_rowptr(fineEdge_lid + 1);
579 Pe_colidx(offset_Pe) = coarseEdge_lid_D0_Pn_D0HT;
580 Pe_values(offset_Pe) = val_D0_Pn / val_D0H;
581 ++Pe_rowptr(fineEdge_lid + 1);
582 break;
583 }
584 }
585 }
586 });
587 }
588 auto lclPe = local_matrix_type("Pe", numFineEdges, D0_Pn_D0HT->getColMap()->getLocalNumElements(), Pe_nnz, Pe_values, Kokkos::subview(Pe_rowptr, Kokkos::make_pair((decltype(numFineEdges))0, numFineEdges + 1)), Pe_colidx);
589
590 // Construct distributed matrix
591 Pe = MatrixFactory::Build(lclPe, D0->getRowMap(), D0_Pn_D0HT->getColMap(), D0H->getRangeMap(), D0->getRangeMap());
592 }
593
594 if (Behavior::debug()) {
595 /* Check commuting property */
596 CheckCommutingProperty(*Pe, *D0H, *D0, *Pn);
597 }
598
599 /* If we're repartitioning here, we need to cut down the communicators */
600 // NOTE: We need to do this *after* checking the commuting property, since
601 // that's going to need to fineLevel's communicators, not the repartitioned ones
602 if (update_communicators) {
603 // NOTE: We can only do D0 here. We have to do Ke_coarse=(Re Ke_fine Pe) in RebalanceAcFactory
604 RCP<const Teuchos::Comm<int> > newComm;
605 if (!CoarseNodeMatrix.is_null()) newComm = CoarseNodeMatrix->getDomainMap()->getComm();
606 RCP<const Map> newMap = MapFactory::copyMapWithNewComm(D0H->getRowMap(), newComm);
607 D0H->removeEmptyProcessesInPlace(newMap);
608
609 // The "in place" still leaves a dummy matrix here. That needs to go
610 if (newMap.is_null()) D0H = Teuchos::null;
611
612 Set(coarseLevel, "InPlaceMap", newMap);
613 }
614 /* Set output on the level */
615 Set(coarseLevel, "P", Pe);
616 Set(coarseLevel, "Ptent", Pe);
617
618 Set(coarseLevel, "D0", D0H);
619
620 /* This needs to be kept for the smoothers */
621 coarseLevel.Set("D0", D0H, NoFactory::get());
622 coarseLevel.AddKeepFlag("D0", NoFactory::get(), MueLu::Final);
623 coarseLevel.RemoveKeepFlag("D0", NoFactory::get(), MueLu::UserData);
624
625#if 0
626 {
627 int numProcs = Pe->getRowMap()->getComm()->getSize();
628 char fname[80];
629
630 sprintf(fname, "Pe_%d_%d.mat", numProcs, fineLevel.GetLevelID());
631 Xpetra::IO<SC, LO, GO, NO>::Write(fname, *Pe);
632 sprintf(fname, "Pn_%d_%d.mat", numProcs, fineLevel.GetLevelID());
633 Xpetra::IO<SC, LO, GO, NO>::Write(fname, *Pn);
634 if (!D0H.is_null()) {
635 sprintf(fname, "D0c_%d_%d.mat", numProcs, fineLevel.GetLevelID());
636 Xpetra::IO<SC, LO, GO, NO>::Write(fname, *D0H);
637 }
638 sprintf(fname, "D0f_%d_%d.mat", numProcs, fineLevel.GetLevelID());
639 Xpetra::IO<SC, LO, GO, NO>::Write(fname, *D0);
640 }
641#endif
642
643} // end Build
644
645template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
647 CheckCommutingProperty(const Matrix& Pe, const Matrix& D0_c, const Matrix& D0_f, const Matrix& Pn) const {
648 if (IsPrint(Statistics0)) {
649 using XMM = MatrixMatrix;
650 auto one = Teuchos::ScalarTraits<SC>::one();
651
652 RCP<Matrix> dummy;
653 RCP<Matrix> left = XMM::Multiply(Pe, false, D0_c, false, dummy, GetOStream(Runtime0));
654 RCP<Matrix> right = XMM::Multiply(D0_f, false, Pn, false, dummy, GetOStream(Runtime0));
655
656 RCP<Matrix> summation;
657 XMM::TwoMatrixAdd(*left, false, one, *right, false, -one, summation, GetOStream(Runtime0));
658 summation->fillComplete(left->getDomainMap(), left->getRangeMap());
659
660 auto norm = summation->getFrobeniusNorm();
661 GetOStream(Statistics0) << "CheckCommutingProperty: || Pe D0_c - D0_f Pn || = " << norm << std::endl;
662 }
663
664} // end CheckCommutingProperty
665
666} // namespace MueLu
667
668#define MUELU_REITZINGERPFACTORY_SHORT
669#endif // MUELU_REITZINGERPFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultGlobalOrdinal GlobalOrdinal
static bool debug()
Whether MueLu is in debug mode.
Timer to be used in factories. Similar to Monitor but with additional timers.
MueLu utility class for Import-related routines.
void getPids(const Xpetra::Import< LocalOrdinal, GlobalOrdinal, Node > &Importer, Teuchos::Array< int > &pids, bool use_minus_one_for_local)
Like getPidGidPairs, but just gets the PIDs, ordered by the column Map.
Class that holds all level-specific information.
void RemoveKeepFlag(const std::string &ename, const FactoryBase *factory, KeepType keep=MueLu::All)
int GetLevelID() const
Return level number.
void AddKeepFlag(const std::string &ename, const FactoryBase *factory=NoFactory::get(), KeepType keep=MueLu::Keep)
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static const NoFactory * get()
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void CheckCommutingProperty(const Matrix &Pe, const Matrix &D0_c, const Matrix &D0_f, const Matrix &Pn) const
Utility method.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static bool MapsAreNested(const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &rowMap, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &colMap)
Namespace for MueLu classes and methods.
@ Final
Keep data only for this run. Used to keep data useful for Hierarchy::Iterate(). Data will be deleted ...
@ UserData
User data are always kept. This flag is set automatically when Level::Set("data", data) is used....
@ Runtime0
One-liner description of what is happening.
@ Statistics0
Print statistics that do not involve significant additional computation.