MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_SemiCoarsenPFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
11#define MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
12
13#include <stdlib.h>
14
15#include <Kokkos_Core.hpp>
16
17#include <KokkosBatched_LU_Decl.hpp>
18#include <KokkosBatched_LU_Serial_Impl.hpp>
19#include <KokkosBatched_Trsm_Decl.hpp>
20#include <KokkosBatched_Trsm_Serial_Impl.hpp>
21#include <KokkosBatched_Util.hpp>
22#include <KokkosSparse_CrsMatrix.hpp>
23
24#include <Xpetra_CrsMatrixWrap.hpp>
25#include <Xpetra_ImportFactory.hpp>
26#include <Xpetra_Matrix.hpp>
27#include <Xpetra_MultiVectorFactory.hpp>
28#include <Xpetra_VectorFactory.hpp>
29
31#include "MueLu_MasterList.hpp"
32#include "MueLu_Monitor.hpp"
34
35namespace MueLu {
36
37template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
38
39RCP<const ParameterList>
42 RCP<ParameterList> validParamList = rcp(new ParameterList());
43
44 std::string name = "semicoarsen: coarsen rate";
45 validParamList->setEntry(name, MasterList::getEntry(name));
46 validParamList->set<RCP<const FactoryBase>>(
47 "A", Teuchos::null, "Generating factory of the matrix A");
48 validParamList->set<RCP<const FactoryBase>>(
49 "Nullspace", Teuchos::null, "Generating factory of the nullspace");
50 validParamList->set<RCP<const FactoryBase>>(
51 "Coordinates", Teuchos::null, "Generating factory for coordinates");
52
53 validParamList->set<RCP<const FactoryBase>>(
54 "LineDetection_VertLineIds", Teuchos::null,
55 "Generating factory for LineDetection vertical line ids");
56 validParamList->set<RCP<const FactoryBase>>(
57 "LineDetection_Layers", Teuchos::null,
58 "Generating factory for LineDetection layer ids");
59 validParamList->set<RCP<const FactoryBase>>(
60 "CoarseNumZLayers", Teuchos::null,
61 "Generating factory for number of coarse z-layers");
62
63 return validParamList;
64}
65
66template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
67 class Node>
71 DeclareInput(Level &fineLevel, Level & /* coarseLevel */) const {
72 Input(fineLevel, "A");
73 Input(fineLevel, "Nullspace");
74
75 Input(fineLevel, "LineDetection_VertLineIds");
76 Input(fineLevel, "LineDetection_Layers");
77 Input(fineLevel, "CoarseNumZLayers");
78
79 // check whether fine level coordinate information is available.
80 // If yes, request the fine level coordinates and generate coarse coordinates
81 // during the Build call
82 if (fineLevel.GetLevelID() == 0) {
83 if (fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
84 fineLevel.DeclareInput("Coordinates", NoFactory::get(), this);
85 bTransferCoordinates_ = true;
86 }
87 } else if (bTransferCoordinates_ == true) {
88 // on coarser levels we check the default factory providing "Coordinates"
89 // or the factory declared to provide "Coordinates"
90 // first, check which factory is providing coordinate information
91 RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
92 if (myCoordsFact == Teuchos::null) {
93 myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
94 }
95 if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
96 fineLevel.DeclareInput("Coordinates", myCoordsFact.get(), this);
97 }
98 }
99}
100
101template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
102 class Node>
104 Build(Level &fineLevel,
105 Level &coarseLevel)
106 const {
107 return BuildP(fineLevel, coarseLevel);
108}
109
110template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
111 class Node>
113 BuildP(Level &fineLevel,
114 Level &coarseLevel)
115 const {
116 FactoryMonitor m(*this, "Build", coarseLevel);
117
118 // obtain general variables
119 RCP<Matrix> A = Get<RCP<Matrix>>(fineLevel, "A");
120 RCP<MultiVector> fineNullspace =
121 Get<RCP<MultiVector>>(fineLevel, "Nullspace");
122
123 // get user-provided coarsening rate parameter (constant over all levels)
124 const ParameterList &pL = GetParameterList();
125 LO CoarsenRate = as<LO>(pL.get<int>("semicoarsen: coarsen rate"));
126 TEUCHOS_TEST_FOR_EXCEPTION(
127 CoarsenRate < 2, Exceptions::RuntimeError,
128 "semicoarsen: coarsen rate must be greater than 1");
129
130 // collect general input data
131 LO BlkSize = A->GetFixedBlockSize();
132 RCP<const Map> rowMap = A->getRowMap();
133 LO Ndofs = rowMap->getLocalNumElements();
134 LO Nnodes = Ndofs / BlkSize;
135
136 // collect line detection information generated by the LineDetectionFactory
137 // instance
138 LO FineNumZLayers = Get<LO>(fineLevel, "CoarseNumZLayers");
139 Teuchos::ArrayRCP<LO> TVertLineId =
140 Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_VertLineIds");
141 Teuchos::ArrayRCP<LO> TLayerId =
142 Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_Layers");
143
144 // compute number of coarse layers
145 TEUCHOS_TEST_FOR_EXCEPTION(FineNumZLayers < 2, Exceptions::RuntimeError,
146 "Cannot coarsen further");
147 using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
148 LO CoarseNumZLayers =
149 (LO)floor(((coordT)(FineNumZLayers + 1)) / ((coordT)CoarsenRate) - 1.0);
150 if (CoarseNumZLayers < 1)
151 CoarseNumZLayers = 1;
152
153 // generate transfer operator with semicoarsening
154 RCP<Matrix> P;
155 RCP<MultiVector> coarseNullspace;
156 BuildSemiCoarsenP(coarseLevel, Ndofs, Nnodes, BlkSize, FineNumZLayers,
157 CoarseNumZLayers, TLayerId, TVertLineId, A, fineNullspace, P,
158 coarseNullspace);
159
160 // Store number of coarse z-layers on the coarse level container
161 // This information is used by the LineDetectionAlgorithm
162 // TODO get rid of the NoFactory
163 coarseLevel.Set("NumZLayers", Teuchos::as<LO>(CoarseNumZLayers),
165
166 // store semicoarsening transfer on coarse level
167 Set(coarseLevel, "P", P);
168 Set(coarseLevel, "Nullspace", coarseNullspace);
169
170 // transfer coordinates
171 if (bTransferCoordinates_) {
172 SubFactoryMonitor m2(*this, "TransferCoordinates", coarseLevel);
173 typedef Xpetra::MultiVector<
174 typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO, NO>
175 xdMV;
176 RCP<xdMV> fineCoords = Teuchos::null;
177 if (fineLevel.GetLevelID() == 0 &&
178 fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
179 fineCoords = fineLevel.Get<RCP<xdMV>>("Coordinates", NoFactory::get());
180 } else {
181 RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
182 if (myCoordsFact == Teuchos::null) {
183 myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
184 }
185 if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
186 fineCoords =
187 fineLevel.Get<RCP<xdMV>>("Coordinates", myCoordsFact.get());
188 }
189 }
190
191 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords == Teuchos::null,
193 "No Coordinates found provided by the user.");
194
195 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3,
197 "Three coordinates arrays must be supplied if "
198 "line detection orientation not given.");
199 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> x =
200 fineCoords->getDataNonConst(0);
201 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> y =
202 fineCoords->getDataNonConst(1);
203 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> z =
204 fineCoords->getDataNonConst(2);
205
206 // determine the maximum and minimum z coordinate value on the current
207 // processor.
208 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_max =
209 -Teuchos::ScalarTraits<
210 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
211 Teuchos::ScalarTraits<
212 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
213 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_min =
214 Teuchos::ScalarTraits<
215 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
216 Teuchos::ScalarTraits<
217 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
218 for (auto it = z.begin(); it != z.end(); ++it) {
219 if (*it > zval_max)
220 zval_max = *it;
221 if (*it < zval_min)
222 zval_min = *it;
223 }
224
225 LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
226
227 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType>
228 myZLayerCoords = Teuchos::arcp<
229 typename Teuchos::ScalarTraits<Scalar>::coordinateType>(
230 myCoarseZLayers);
231 if (myCoarseZLayers == 1) {
232 myZLayerCoords[0] = zval_min;
233 } else {
234 typename Teuchos::ScalarTraits<Scalar>::coordinateType dz =
235 (zval_max - zval_min) / (myCoarseZLayers - 1);
236 for (LO k = 0; k < myCoarseZLayers; ++k) {
237 myZLayerCoords[k] = k * dz;
238 }
239 }
240
241 // Note, that the coarse level node coordinates have to be in vertical
242 // ordering according to the numbering of the vertical lines
243
244 // number of vertical lines on current node:
245 LO numVertLines = Nnodes / FineNumZLayers;
246 LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
247
248 RCP<const Map> coarseCoordMap = MapFactory::Build(
249 fineCoords->getMap()->lib(),
250 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
251 Teuchos::as<size_t>(numLocalCoarseNodes),
252 fineCoords->getMap()->getIndexBase(), fineCoords->getMap()->getComm());
253 RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<
254 typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO,
255 NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
256 coarseCoords->putScalar(-1.0);
257 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cx =
258 coarseCoords->getDataNonConst(0);
259 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cy =
260 coarseCoords->getDataNonConst(1);
261 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cz =
262 coarseCoords->getDataNonConst(2);
263
264 // loop over all vert line indices (stop as soon as possible)
265 LO cntCoarseNodes = 0;
266 for (LO vt = 0; vt < TVertLineId.size(); ++vt) {
267 // vertical line id in *vt
268 LO curVertLineId = TVertLineId[vt];
269
270 if (cx[curVertLineId * myCoarseZLayers] == -1.0 &&
271 cy[curVertLineId * myCoarseZLayers] == -1.0) {
272 // loop over all local myCoarseZLayers
273 for (LO n = 0; n < myCoarseZLayers; ++n) {
274 cx[curVertLineId * myCoarseZLayers + n] = x[vt];
275 cy[curVertLineId * myCoarseZLayers + n] = y[vt];
276 cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
277 }
278 cntCoarseNodes += myCoarseZLayers;
279 }
280
281 TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes,
283 "number of coarse nodes is inconsistent.");
284 if (cntCoarseNodes == numLocalCoarseNodes)
285 break;
286 }
287
288 // set coarse level coordinates
289 Set(coarseLevel, "Coordinates", coarseCoords);
290 } /* end bool bTransferCoordinates */
291}
292
293template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
294 class Node>
298 BuildSemiCoarsenP(Level &coarseLevel, const LO NFRows, const LO NFNodes,
299 const LO DofsPerNode, const LO NFLayers,
300 const LO NCLayers, const ArrayRCP<LO> LayerId,
301 const ArrayRCP<LO> VertLineId, const RCP<Matrix> &Amat,
302 const RCP<MultiVector> fineNullspace, RCP<Matrix> &P,
303 RCP<MultiVector> &coarseNullspace) const {
304 SubFactoryMonitor m2(*this, "BuildSemiCoarsenP", coarseLevel);
305#if KOKKOS_VERSION >= 40799
306 using impl_SC = typename KokkosKernels::ArithTraits<SC>::val_type;
307#else
308 using impl_SC = typename Kokkos::ArithTraits<SC>::val_type;
309#endif
310#if KOKKOS_VERSION >= 40799
311 using impl_ATS = KokkosKernels::ArithTraits<impl_SC>;
312#else
313 using impl_ATS = Kokkos::ArithTraits<impl_SC>;
314#endif
315 using LOView1D = Kokkos::View<LO *, DeviceType>;
316 using LOView2D = Kokkos::View<LO **, DeviceType>;
317
318 // Construct a map from fine level column to layer ids (including ghost nodes)
319 // Note: this is needed to sum all couplings within a layer
320 const auto FCol2LayerVector =
321 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getColMap());
322 const auto localTemp =
323 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getDomainMap());
324 RCP<const Import> importer = Amat->getCrsGraph()->getImporter();
325 if (importer == Teuchos::null)
326 importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
327 {
328 // Fill local temp with layer ids and fill ghost nodes
329 const auto localTempHost = localTemp->getLocalViewHost(Tpetra::Access::ReadWrite);
330 for (int row = 0; row < NFRows; row++)
331 localTempHost(row, 0) = LayerId[row / DofsPerNode];
332 const auto localTempView = localTemp->getLocalViewDevice(Tpetra::Access::ReadWrite);
333 Kokkos::deep_copy(localTempView, localTempHost);
334 FCol2LayerVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
335 }
336 const auto FCol2LayerView = FCol2LayerVector->getLocalViewDevice(Tpetra::Access::ReadOnly);
337 const auto FCol2Layer = Kokkos::subview(FCol2LayerView, Kokkos::ALL(), 0);
338
339 // Construct a map from fine level column to local dof per node id (including
340 // ghost nodes) Note: this is needed to sum all couplings within a layer
341 const auto FCol2DofVector =
342 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getColMap());
343 {
344 // Fill local temp with local dof per node ids and fill ghost nodes
345 const auto localTempHost = localTemp->getLocalViewHost(Tpetra::Access::ReadWrite);
346 for (int row = 0; row < NFRows; row++)
347 localTempHost(row, 0) = row % DofsPerNode;
348 const auto localTempView = localTemp->getLocalViewDevice(Tpetra::Access::ReadWrite);
349 Kokkos::deep_copy(localTempView, localTempHost);
350 FCol2DofVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
351 }
352 const auto FCol2DofView = FCol2DofVector->getLocalViewDevice(Tpetra::Access::ReadOnly);
353 const auto FCol2Dof = Kokkos::subview(FCol2DofView, Kokkos::ALL(), 0);
354
355 // Compute NVertLines
356 // TODO: Read this from line detection factory
357 int NVertLines = -1;
358 if (NFNodes != 0)
359 NVertLines = VertLineId[0];
360 for (int node = 1; node < NFNodes; ++node)
361 if (VertLineId[node] > NVertLines)
362 NVertLines = VertLineId[node];
363 NVertLines++;
364
365 // Construct a map from Line, Layer ids to fine level node
366 LOView2D LineLayer2Node("LineLayer2Node", NVertLines, NFLayers);
367 typename LOView2D::host_mirror_type LineLayer2NodeHost =
368 Kokkos::create_mirror_view(LineLayer2Node);
369 for (int node = 0; node < NFNodes; ++node)
370 LineLayer2NodeHost(VertLineId[node], LayerId[node]) = node;
371 Kokkos::deep_copy(LineLayer2Node, LineLayer2NodeHost);
372
373 // Construct a map from coarse layer id to fine layer id
374 LOView1D CLayer2FLayer("CLayer2FLayer", NCLayers);
375 typename LOView1D::host_mirror_type CLayer2FLayerHost =
376 Kokkos::create_mirror_view(CLayer2FLayer);
377 using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
378 const LO FirstStride =
379 (LO)ceil(((coordT)(NFLayers + 1)) / ((coordT)(NCLayers + 1)));
380 const coordT RestStride =
381 ((coordT)(NFLayers - FirstStride + 1)) / ((coordT)NCLayers);
382 const LO NCpts =
383 (LO)floor((((coordT)(NFLayers - FirstStride + 1)) / RestStride) + .00001);
384 TEUCHOS_TEST_FOR_EXCEPTION(NCLayers != NCpts, Exceptions::RuntimeError,
385 "sizes do not match.");
386 coordT stride = (coordT)FirstStride;
387 for (int clayer = 0; clayer < NCLayers; ++clayer) {
388 CLayer2FLayerHost(clayer) = (LO)floor(stride) - 1;
389 stride += RestStride;
390 }
391 Kokkos::deep_copy(CLayer2FLayer, CLayer2FLayerHost);
392
393 // Compute start layer and stencil sizes for layer interpolation at each
394 // coarse layer
395 int MaxStencilSize = 1;
396 LOView1D CLayer2StartLayer("CLayer2StartLayer", NCLayers);
397 LOView1D CLayer2StencilSize("CLayer2StencilSize", NCLayers);
398 typename LOView1D::host_mirror_type CLayer2StartLayerHost =
399 Kokkos::create_mirror_view(CLayer2StartLayer);
400 typename LOView1D::host_mirror_type CLayer2StencilSizeHost =
401 Kokkos::create_mirror_view(CLayer2StencilSize);
402 for (int clayer = 0; clayer < NCLayers; ++clayer) {
403 const int startLayer = (clayer > 0) ? CLayer2FLayerHost(clayer - 1) + 1 : 0;
404 const int stencilSize = (clayer < NCLayers - 1)
405 ? CLayer2FLayerHost(clayer + 1) - startLayer
406 : NFLayers - startLayer;
407
408 if (MaxStencilSize < stencilSize)
409 MaxStencilSize = stencilSize;
410 CLayer2StartLayerHost(clayer) = startLayer;
411 CLayer2StencilSizeHost(clayer) = stencilSize;
412 }
413 Kokkos::deep_copy(CLayer2StartLayer, CLayer2StartLayerHost);
414 Kokkos::deep_copy(CLayer2StencilSize, CLayer2StencilSizeHost);
415
416 // Allocate storage for the coarse layer interpolation matrices on all
417 // vertical lines Note: Contributions to each matrix are collapsed to vertical
418 // lines. Thus, each vertical line gives rise to a block tridiagonal matrix.
419 // Here we store the full matrix to be compatible with kokkos kernels batch LU
420 // and tringular solve.
421 int Nmax = MaxStencilSize * DofsPerNode;
422 Kokkos::View<impl_SC ***, DeviceType> BandMat(
423 "BandMat", NVertLines, Nmax, Nmax);
424 Kokkos::View<impl_SC ***, DeviceType> BandSol(
425 "BandSol", NVertLines, Nmax, DofsPerNode);
426
427 // Precompute number of nonzeros in prolongation matrix and allocate P views
428 // Note: Each coarse dof (NVertLines*NCLayers*DofsPerNode) contributes an
429 // interpolation stencil (StencilSize*DofsPerNode)
430 int NnzP = 0;
431 for (int clayer = 0; clayer < NCLayers; ++clayer)
432 NnzP += CLayer2StencilSizeHost(clayer);
433 NnzP *= NVertLines * DofsPerNode * DofsPerNode;
434 Kokkos::View<impl_SC *, DeviceType> Pvals("Pvals", NnzP);
435 Kokkos::View<LO *, DeviceType> Pcols("Pcols", NnzP);
436
437 // Precompute Pptr
438 // Note: Each coarse layer stencil dof contributes DofsPerNode to the
439 // corresponding row in P
440 Kokkos::View<size_t *, DeviceType> Pptr("Pptr", NFRows + 1);
441 typename Kokkos::View<size_t *, DeviceType>::host_mirror_type PptrHost =
442 Kokkos::create_mirror_view(Pptr);
443 Kokkos::deep_copy(PptrHost, 0);
444 for (int line = 0; line < NVertLines; ++line) {
445 for (int clayer = 0; clayer < NCLayers; ++clayer) {
446 const int stencilSize = CLayer2StencilSizeHost(clayer);
447 const int startLayer = CLayer2StartLayerHost(clayer);
448 for (int snode = 0; snode < stencilSize; ++snode) {
449 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
450 const int layer = startLayer + snode;
451 const int AmatBlkRow = LineLayer2NodeHost(line, layer);
452 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
453 PptrHost(AmatRow + 1) += DofsPerNode;
454 }
455 }
456 }
457 }
458 for (int i = 2; i < NFRows + 1; ++i)
459 PptrHost(i) += PptrHost(i - 1);
460 TEUCHOS_TEST_FOR_EXCEPTION(NnzP != (int)PptrHost(NFRows),
462 "Number of nonzeros in P does not match");
463 Kokkos::deep_copy(Pptr, PptrHost);
464
465 // Precompute Pptr offsets
466 // Note: These are used to determine the nonzero index in Pvals and Pcols
467 Kokkos::View<LO *, Kokkos::DefaultHostExecutionSpace> layerBuckets(
468 "layerBuckets", NFLayers);
469 Kokkos::deep_copy(layerBuckets, 0);
470 LOView2D CLayerSNode2PptrOffset("CLayerSNode2PptrOffset", NCLayers,
471 MaxStencilSize);
472 typename LOView2D::host_mirror_type CLayerSNode2PptrOffsetHost =
473 Kokkos::create_mirror_view(CLayerSNode2PptrOffset);
474 for (int clayer = 0; clayer < NCLayers; ++clayer) {
475 const int stencilSize = CLayer2StencilSizeHost(clayer);
476 const int startLayer = CLayer2StartLayerHost(clayer);
477 for (int snode = 0; snode < stencilSize; ++snode) {
478 const int layer = startLayer + snode;
479 CLayerSNode2PptrOffsetHost(clayer, snode) = layerBuckets(layer);
480 layerBuckets(layer)++;
481 }
482 }
483 Kokkos::deep_copy(CLayerSNode2PptrOffset, CLayerSNode2PptrOffsetHost);
484
485 { // Fill P - fill and solve each block tridiagonal system and fill P views
486 SubFactoryMonitor m3(*this, "Fill P", coarseLevel);
487
488 const auto localAmat = Amat->getLocalMatrixDevice();
489 const auto zero = impl_ATS::zero();
490 const auto one = impl_ATS::one();
491
492 using range_policy = Kokkos::RangePolicy<execution_space>;
493 Kokkos::parallel_for(
494 "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Fill P",
495 range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
496 for (int clayer = 0; clayer < NCLayers; ++clayer) {
497 // Initialize BandSol
498 auto bandSol =
499 Kokkos::subview(BandSol, line, Kokkos::ALL(), Kokkos::ALL());
500 for (int row = 0; row < Nmax; ++row)
501 for (int dof = 0; dof < DofsPerNode; ++dof)
502 bandSol(row, dof) = zero;
503
504 // Initialize BandMat (set unused row diagonal to 1.0)
505 const int stencilSize = CLayer2StencilSize(clayer);
506 const int N = stencilSize * DofsPerNode;
507 auto bandMat =
508 Kokkos::subview(BandMat, line, Kokkos::ALL(), Kokkos::ALL());
509 for (int row = 0; row < Nmax; ++row)
510 for (int col = 0; col < Nmax; ++col)
511 bandMat(row, col) =
512 (row == col && row >= N) ? one : zero;
513
514 // Loop over layers in stencil and fill banded matrix and rhs
515 const int flayer = CLayer2FLayer(clayer);
516 const int startLayer = CLayer2StartLayer(clayer);
517 for (int snode = 0; snode < stencilSize; ++snode) {
518 const int layer = startLayer + snode;
519 if (layer == flayer) { // If layer in stencil is a coarse layer
520 for (int dof = 0; dof < DofsPerNode; ++dof) {
521 const int row = snode * DofsPerNode + dof;
522 bandMat(row, row) = one;
523 bandSol(row, dof) = one;
524 }
525 } else { // Not a coarse layer
526 const int AmatBlkRow = LineLayer2Node(line, layer);
527 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
528 // Get Amat row info
529 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
530 const auto localAmatRow = localAmat.rowConst(AmatRow);
531 const int AmatRowLeng = localAmatRow.length;
532
533 const int row = snode * DofsPerNode + dofi;
534 for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
535 const int col = snode * DofsPerNode + dofj;
536
537 // Sum values along row which correspond to stencil
538 // layer/dof and fill bandMat
539 auto val = zero;
540 for (int i = 0; i < AmatRowLeng; ++i) {
541 const int colidx = localAmatRow.colidx(i);
542 if (FCol2Layer(colidx) == layer &&
543 FCol2Dof(colidx) == dofj)
544 val += localAmatRow.value(i);
545 }
546 bandMat(row, col) = val;
547
548 if (snode > 0) {
549 // Sum values along row which correspond to stencil
550 // layer/dof below and fill bandMat
551 val = zero;
552 for (int i = 0; i < AmatRowLeng; ++i) {
553 const int colidx = localAmatRow.colidx(i);
554 if (FCol2Layer(colidx) == layer - 1 &&
555 FCol2Dof(colidx) == dofj)
556 val += localAmatRow.value(i);
557 }
558 bandMat(row, col - DofsPerNode) = val;
559 }
560
561 if (snode < stencilSize - 1) {
562 // Sum values along row which correspond to stencil
563 // layer/dof above and fill bandMat
564 val = zero;
565 for (int i = 0; i < AmatRowLeng; ++i) {
566 const int colidx = localAmatRow.colidx(i);
567 if (FCol2Layer(colidx) == layer + 1 &&
568 FCol2Dof(colidx) == dofj)
569 val += localAmatRow.value(i);
570 }
571 bandMat(row, col + DofsPerNode) = val;
572 }
573 }
574 }
575 }
576 }
577
578 // Batch LU and triangular solves
579 namespace KB = KokkosBatched;
580 using lu_type = typename KB::SerialLU<KB::Algo::LU::Unblocked>;
581 lu_type::invoke(bandMat);
582 using trsv_l_type =
583 typename KB::SerialTrsm<KB::Side::Left, KB::Uplo::Lower,
584 KB::Trans::NoTranspose, KB::Diag::Unit,
585 KB::Algo::Trsm::Unblocked>;
586 trsv_l_type::invoke(one, bandMat, bandSol);
587 using trsv_u_type = typename KB::SerialTrsm<
588 KB::Side::Left, KB::Uplo::Upper, KB::Trans::NoTranspose,
589 KB::Diag::NonUnit, KB::Algo::Trsm::Unblocked>;
590 trsv_u_type::invoke(one, bandMat, bandSol);
591
592 // Fill prolongation views with solution
593 for (int snode = 0; snode < stencilSize; ++snode) {
594 for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
595 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
596 const int layer = startLayer + snode;
597 const int AmatBlkRow = LineLayer2Node(line, layer);
598 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
599
600 const int pptrOffset = CLayerSNode2PptrOffset(clayer, snode);
601 const int pptr =
602 Pptr(AmatRow) + pptrOffset * DofsPerNode + dofj;
603
604 const int col =
605 line * NCLayers + clayer; // coarse node (block row) index
606 Pcols(pptr) = col * DofsPerNode + dofj;
607 Pvals(pptr) = bandSol(snode * DofsPerNode + dofi, dofj);
608 }
609 }
610 }
611 }
612 });
613 } // Fill P
614
615 // Build P
616 RCP<const Map> rowMap = Amat->getRowMap();
617 Xpetra::global_size_t GNdofs = rowMap->getGlobalNumElements();
618 Xpetra::global_size_t itemp = GNdofs / NFLayers;
619 std::vector<size_t> stridingInfo_{(size_t)DofsPerNode};
620 RCP<const Map> coarseMap = StridedMapFactory::Build(
621 rowMap->lib(), NCLayers * itemp, NCLayers * NVertLines * DofsPerNode, 0,
622 stridingInfo_, rowMap->getComm(), -1, 0);
623 P = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
624 RCP<CrsMatrix> PCrs = toCrsMatrix(P);
625 PCrs->setAllValues(Pptr, Pcols, Pvals);
626 PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
627
628 // set StridingInformation of P
629 if (Amat->IsView("stridedMaps") == true)
630 P->CreateView("stridedMaps", Amat->getRowMap("stridedMaps"), coarseMap);
631 else
632 P->CreateView("stridedMaps", P->getRangeMap(), coarseMap);
633
634 // Construct coarse nullspace and inject fine nullspace
635 coarseNullspace =
636 MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
637 const int numVectors = fineNullspace->getNumVectors();
638 const auto fineNullspaceView = fineNullspace->getLocalViewDevice(Tpetra::Access::ReadOnly);
639 const auto coarseNullspaceView = coarseNullspace->getLocalViewDevice(Tpetra::Access::ReadWrite);
640 using range_policy = Kokkos::RangePolicy<execution_space>;
641 Kokkos::parallel_for(
642 "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Inject Nullspace",
643 range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
644 for (int clayer = 0; clayer < NCLayers; ++clayer) {
645 const int layer = CLayer2FLayer(clayer);
646 const int AmatBlkRow =
647 LineLayer2Node(line, layer); // fine node (block row) index
648 const int col =
649 line * NCLayers + clayer; // coarse node (block row) index
650 for (int k = 0; k < numVectors; ++k) {
651 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
652 const int fRow = AmatBlkRow * DofsPerNode + dofi;
653 const int cRow = col * DofsPerNode + dofi;
654 coarseNullspaceView(cRow, k) = fineNullspaceView(fRow, k);
655 }
656 }
657 }
658 });
659}
660
661} // namespace MueLu
662
663#define MUELU_SEMICOARSENPFACTORY_KOKKOS_SHORT
664#endif // MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
MueLu::DefaultNode Node
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static const Teuchos::ParameterEntry & getEntry(const std::string &name)
Returns default entry from the "master" list corresponding to the specified name.
static const NoFactory * get()
Prolongator factory performing semi-coarsening.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
Namespace for MueLu classes and methods.