MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_SemiCoarsenPFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
11#define MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
12
13#include <stdlib.h>
14
15#include <Kokkos_Core.hpp>
16
17#include <KokkosBatched_LU_Decl.hpp>
18#include <KokkosBatched_LU_Serial_Impl.hpp>
19#include <KokkosBatched_Trsm_Decl.hpp>
20#include <KokkosBatched_Trsm_Serial_Impl.hpp>
21#include <KokkosBatched_Util.hpp>
22#include <KokkosSparse_CrsMatrix.hpp>
23
24#include <Xpetra_CrsMatrixWrap.hpp>
25#include <Xpetra_ImportFactory.hpp>
26#include <Xpetra_Matrix.hpp>
27#include <Xpetra_MultiVectorFactory.hpp>
28#include <Xpetra_VectorFactory.hpp>
29
31#include "MueLu_MasterList.hpp"
32#include "MueLu_Monitor.hpp"
34
35namespace MueLu {
36
37template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
38
39RCP<const ParameterList>
42 RCP<ParameterList> validParamList = rcp(new ParameterList());
43
44 std::string name = "semicoarsen: coarsen rate";
45 validParamList->setEntry(name, MasterList::getEntry(name));
46 validParamList->set<RCP<const FactoryBase>>(
47 "A", Teuchos::null, "Generating factory of the matrix A");
48 validParamList->set<RCP<const FactoryBase>>(
49 "Nullspace", Teuchos::null, "Generating factory of the nullspace");
50 validParamList->set<RCP<const FactoryBase>>(
51 "Coordinates", Teuchos::null, "Generating factory for coordinates");
52
53 validParamList->set<RCP<const FactoryBase>>(
54 "LineDetection_VertLineIds", Teuchos::null,
55 "Generating factory for LineDetection vertical line ids");
56 validParamList->set<RCP<const FactoryBase>>(
57 "LineDetection_Layers", Teuchos::null,
58 "Generating factory for LineDetection layer ids");
59 validParamList->set<RCP<const FactoryBase>>(
60 "CoarseNumZLayers", Teuchos::null,
61 "Generating factory for number of coarse z-layers");
62
63 return validParamList;
64}
65
66template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
67 class Node>
71 DeclareInput(Level &fineLevel, Level & /* coarseLevel */) const {
72 Input(fineLevel, "A");
73 Input(fineLevel, "Nullspace");
74
75 Input(fineLevel, "LineDetection_VertLineIds");
76 Input(fineLevel, "LineDetection_Layers");
77 Input(fineLevel, "CoarseNumZLayers");
78
79 // check whether fine level coordinate information is available.
80 // If yes, request the fine level coordinates and generate coarse coordinates
81 // during the Build call
82 if (fineLevel.GetLevelID() == 0) {
83 if (fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
84 fineLevel.DeclareInput("Coordinates", NoFactory::get(), this);
85 bTransferCoordinates_ = true;
86 }
87 } else if (bTransferCoordinates_ == true) {
88 // on coarser levels we check the default factory providing "Coordinates"
89 // or the factory declared to provide "Coordinates"
90 // first, check which factory is providing coordinate information
91 RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
92 if (myCoordsFact == Teuchos::null) {
93 myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
94 }
95 if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
96 fineLevel.DeclareInput("Coordinates", myCoordsFact.get(), this);
97 }
98 }
99}
100
101template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
102 class Node>
104 Build(Level &fineLevel,
105 Level &coarseLevel)
106 const {
107 return BuildP(fineLevel, coarseLevel);
108}
109
110template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
111 class Node>
113 BuildP(Level &fineLevel,
114 Level &coarseLevel)
115 const {
116 FactoryMonitor m(*this, "Build", coarseLevel);
117
118 // obtain general variables
119 RCP<Matrix> A = Get<RCP<Matrix>>(fineLevel, "A");
120 RCP<MultiVector> fineNullspace =
121 Get<RCP<MultiVector>>(fineLevel, "Nullspace");
122
123 // get user-provided coarsening rate parameter (constant over all levels)
124 const ParameterList &pL = GetParameterList();
125 LO CoarsenRate = as<LO>(pL.get<int>("semicoarsen: coarsen rate"));
126 TEUCHOS_TEST_FOR_EXCEPTION(
127 CoarsenRate < 2, Exceptions::RuntimeError,
128 "semicoarsen: coarsen rate must be greater than 1");
129
130 // collect general input data
131 LO BlkSize = A->GetFixedBlockSize();
132 RCP<const Map> rowMap = A->getRowMap();
133 LO Ndofs = rowMap->getLocalNumElements();
134 LO Nnodes = Ndofs / BlkSize;
135
136 // collect line detection information generated by the LineDetectionFactory
137 // instance
138 LO FineNumZLayers = Get<LO>(fineLevel, "CoarseNumZLayers");
139 Teuchos::ArrayRCP<LO> TVertLineId =
140 Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_VertLineIds");
141 Teuchos::ArrayRCP<LO> TLayerId =
142 Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_Layers");
143
144 // compute number of coarse layers
145 TEUCHOS_TEST_FOR_EXCEPTION(FineNumZLayers < 2, Exceptions::RuntimeError,
146 "Cannot coarsen further");
147 using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
148 LO CoarseNumZLayers =
149 (LO)floor(((coordT)(FineNumZLayers + 1)) / ((coordT)CoarsenRate) - 1.0);
150 if (CoarseNumZLayers < 1)
151 CoarseNumZLayers = 1;
152
153 // generate transfer operator with semicoarsening
154 RCP<Matrix> P;
155 RCP<MultiVector> coarseNullspace;
156 BuildSemiCoarsenP(coarseLevel, Ndofs, Nnodes, BlkSize, FineNumZLayers,
157 CoarseNumZLayers, TLayerId, TVertLineId, A, fineNullspace, P,
158 coarseNullspace);
159
160 // Store number of coarse z-layers on the coarse level container
161 // This information is used by the LineDetectionAlgorithm
162 // TODO get rid of the NoFactory
163 coarseLevel.Set("NumZLayers", Teuchos::as<LO>(CoarseNumZLayers),
165
166 // store semicoarsening transfer on coarse level
167 Set(coarseLevel, "P", P);
168 Set(coarseLevel, "Nullspace", coarseNullspace);
169
170 // transfer coordinates
171 if (bTransferCoordinates_) {
172 SubFactoryMonitor m2(*this, "TransferCoordinates", coarseLevel);
173 typedef Xpetra::MultiVector<
174 typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO, NO>
175 xdMV;
176 RCP<xdMV> fineCoords = Teuchos::null;
177 if (fineLevel.GetLevelID() == 0 &&
178 fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
179 fineCoords = fineLevel.Get<RCP<xdMV>>("Coordinates", NoFactory::get());
180 } else {
181 RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
182 if (myCoordsFact == Teuchos::null) {
183 myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
184 }
185 if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
186 fineCoords =
187 fineLevel.Get<RCP<xdMV>>("Coordinates", myCoordsFact.get());
188 }
189 }
190
191 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords == Teuchos::null,
193 "No Coordinates found provided by the user.");
194
195 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3,
197 "Three coordinates arrays must be supplied if "
198 "line detection orientation not given.");
199 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> x =
200 fineCoords->getDataNonConst(0);
201 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> y =
202 fineCoords->getDataNonConst(1);
203 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> z =
204 fineCoords->getDataNonConst(2);
205
206 // determine the maximum and minimum z coordinate value on the current
207 // processor.
208 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_max =
209 -Teuchos::ScalarTraits<
210 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
211 Teuchos::ScalarTraits<
212 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
213 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_min =
214 Teuchos::ScalarTraits<
215 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
216 Teuchos::ScalarTraits<
217 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
218 for (auto it = z.begin(); it != z.end(); ++it) {
219 if (*it > zval_max)
220 zval_max = *it;
221 if (*it < zval_min)
222 zval_min = *it;
223 }
224
225 LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
226
227 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType>
228 myZLayerCoords = Teuchos::arcp<
229 typename Teuchos::ScalarTraits<Scalar>::coordinateType>(
230 myCoarseZLayers);
231 if (myCoarseZLayers == 1) {
232 myZLayerCoords[0] = zval_min;
233 } else {
234 typename Teuchos::ScalarTraits<Scalar>::coordinateType dz =
235 (zval_max - zval_min) / (myCoarseZLayers - 1);
236 for (LO k = 0; k < myCoarseZLayers; ++k) {
237 myZLayerCoords[k] = k * dz;
238 }
239 }
240
241 // Note, that the coarse level node coordinates have to be in vertical
242 // ordering according to the numbering of the vertical lines
243
244 // number of vertical lines on current node:
245 LO numVertLines = Nnodes / FineNumZLayers;
246 LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
247
248 RCP<const Map> coarseCoordMap = MapFactory::Build(
249 fineCoords->getMap()->lib(),
250 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
251 Teuchos::as<size_t>(numLocalCoarseNodes),
252 fineCoords->getMap()->getIndexBase(), fineCoords->getMap()->getComm());
253 RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<
254 typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO,
255 NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
256 coarseCoords->putScalar(-1.0);
257 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cx =
258 coarseCoords->getDataNonConst(0);
259 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cy =
260 coarseCoords->getDataNonConst(1);
261 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cz =
262 coarseCoords->getDataNonConst(2);
263
264 // loop over all vert line indices (stop as soon as possible)
265 LO cntCoarseNodes = 0;
266 for (LO vt = 0; vt < TVertLineId.size(); ++vt) {
267 // vertical line id in *vt
268 LO curVertLineId = TVertLineId[vt];
269
270 if (cx[curVertLineId * myCoarseZLayers] == -1.0 &&
271 cy[curVertLineId * myCoarseZLayers] == -1.0) {
272 // loop over all local myCoarseZLayers
273 for (LO n = 0; n < myCoarseZLayers; ++n) {
274 cx[curVertLineId * myCoarseZLayers + n] = x[vt];
275 cy[curVertLineId * myCoarseZLayers + n] = y[vt];
276 cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
277 }
278 cntCoarseNodes += myCoarseZLayers;
279 }
280
281 TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes,
283 "number of coarse nodes is inconsistent.");
284 if (cntCoarseNodes == numLocalCoarseNodes)
285 break;
286 }
287
288 // set coarse level coordinates
289 Set(coarseLevel, "Coordinates", coarseCoords);
290 } /* end bool bTransferCoordinates */
291}
292
293template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
294 class Node>
298 BuildSemiCoarsenP(Level &coarseLevel, const LO NFRows, const LO NFNodes,
299 const LO DofsPerNode, const LO NFLayers,
300 const LO NCLayers, const ArrayRCP<LO> LayerId,
301 const ArrayRCP<LO> VertLineId, const RCP<Matrix> &Amat,
302 const RCP<MultiVector> fineNullspace, RCP<Matrix> &P,
303 RCP<MultiVector> &coarseNullspace) const {
304 SubFactoryMonitor m2(*this, "BuildSemiCoarsenP", coarseLevel);
305 using impl_SC = typename KokkosKernels::ArithTraits<SC>::val_type;
306 using impl_ATS = KokkosKernels::ArithTraits<impl_SC>;
307 using LOView1D = Kokkos::View<LO *, DeviceType>;
308 using LOView2D = Kokkos::View<LO **, DeviceType>;
309
310 // Construct a map from fine level column to layer ids (including ghost nodes)
311 // Note: this is needed to sum all couplings within a layer
312 const auto FCol2LayerVector =
313 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getColMap());
314 const auto localTemp =
315 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getDomainMap());
316 RCP<const Import> importer = Amat->getCrsGraph()->getImporter();
317 if (importer == Teuchos::null)
318 importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
319 {
320 // Fill local temp with layer ids and fill ghost nodes
321 const auto localTempHost = localTemp->getLocalViewHost(Tpetra::Access::ReadWrite);
322 for (int row = 0; row < NFRows; row++)
323 localTempHost(row, 0) = LayerId[row / DofsPerNode];
324 const auto localTempView = localTemp->getLocalViewDevice(Tpetra::Access::ReadWrite);
325 Kokkos::deep_copy(localTempView, localTempHost);
326 FCol2LayerVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
327 }
328 const auto FCol2LayerView = FCol2LayerVector->getLocalViewDevice(Tpetra::Access::ReadOnly);
329 const auto FCol2Layer = Kokkos::subview(FCol2LayerView, Kokkos::ALL(), 0);
330
331 // Construct a map from fine level column to local dof per node id (including
332 // ghost nodes) Note: this is needed to sum all couplings within a layer
333 const auto FCol2DofVector =
334 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getColMap());
335 {
336 // Fill local temp with local dof per node ids and fill ghost nodes
337 const auto localTempHost = localTemp->getLocalViewHost(Tpetra::Access::ReadWrite);
338 for (int row = 0; row < NFRows; row++)
339 localTempHost(row, 0) = row % DofsPerNode;
340 const auto localTempView = localTemp->getLocalViewDevice(Tpetra::Access::ReadWrite);
341 Kokkos::deep_copy(localTempView, localTempHost);
342 FCol2DofVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
343 }
344 const auto FCol2DofView = FCol2DofVector->getLocalViewDevice(Tpetra::Access::ReadOnly);
345 const auto FCol2Dof = Kokkos::subview(FCol2DofView, Kokkos::ALL(), 0);
346
347 // Compute NVertLines
348 // TODO: Read this from line detection factory
349 int NVertLines = -1;
350 if (NFNodes != 0)
351 NVertLines = VertLineId[0];
352 for (int node = 1; node < NFNodes; ++node)
353 if (VertLineId[node] > NVertLines)
354 NVertLines = VertLineId[node];
355 NVertLines++;
356
357 // Construct a map from Line, Layer ids to fine level node
358 LOView2D LineLayer2Node("LineLayer2Node", NVertLines, NFLayers);
359 typename LOView2D::host_mirror_type LineLayer2NodeHost =
360 Kokkos::create_mirror_view(LineLayer2Node);
361 for (int node = 0; node < NFNodes; ++node)
362 LineLayer2NodeHost(VertLineId[node], LayerId[node]) = node;
363 Kokkos::deep_copy(LineLayer2Node, LineLayer2NodeHost);
364
365 // Construct a map from coarse layer id to fine layer id
366 LOView1D CLayer2FLayer("CLayer2FLayer", NCLayers);
367 typename LOView1D::host_mirror_type CLayer2FLayerHost =
368 Kokkos::create_mirror_view(CLayer2FLayer);
369 using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
370 const LO FirstStride =
371 (LO)ceil(((coordT)(NFLayers + 1)) / ((coordT)(NCLayers + 1)));
372 const coordT RestStride =
373 ((coordT)(NFLayers - FirstStride + 1)) / ((coordT)NCLayers);
374 const LO NCpts =
375 (LO)floor((((coordT)(NFLayers - FirstStride + 1)) / RestStride) + .00001);
376 TEUCHOS_TEST_FOR_EXCEPTION(NCLayers != NCpts, Exceptions::RuntimeError,
377 "sizes do not match.");
378 coordT stride = (coordT)FirstStride;
379 for (int clayer = 0; clayer < NCLayers; ++clayer) {
380 CLayer2FLayerHost(clayer) = (LO)floor(stride) - 1;
381 stride += RestStride;
382 }
383 Kokkos::deep_copy(CLayer2FLayer, CLayer2FLayerHost);
384
385 // Compute start layer and stencil sizes for layer interpolation at each
386 // coarse layer
387 int MaxStencilSize = 1;
388 LOView1D CLayer2StartLayer("CLayer2StartLayer", NCLayers);
389 LOView1D CLayer2StencilSize("CLayer2StencilSize", NCLayers);
390 typename LOView1D::host_mirror_type CLayer2StartLayerHost =
391 Kokkos::create_mirror_view(CLayer2StartLayer);
392 typename LOView1D::host_mirror_type CLayer2StencilSizeHost =
393 Kokkos::create_mirror_view(CLayer2StencilSize);
394 for (int clayer = 0; clayer < NCLayers; ++clayer) {
395 const int startLayer = (clayer > 0) ? CLayer2FLayerHost(clayer - 1) + 1 : 0;
396 const int stencilSize = (clayer < NCLayers - 1)
397 ? CLayer2FLayerHost(clayer + 1) - startLayer
398 : NFLayers - startLayer;
399
400 if (MaxStencilSize < stencilSize)
401 MaxStencilSize = stencilSize;
402 CLayer2StartLayerHost(clayer) = startLayer;
403 CLayer2StencilSizeHost(clayer) = stencilSize;
404 }
405 Kokkos::deep_copy(CLayer2StartLayer, CLayer2StartLayerHost);
406 Kokkos::deep_copy(CLayer2StencilSize, CLayer2StencilSizeHost);
407
408 // Allocate storage for the coarse layer interpolation matrices on all
409 // vertical lines Note: Contributions to each matrix are collapsed to vertical
410 // lines. Thus, each vertical line gives rise to a block tridiagonal matrix.
411 // Here we store the full matrix to be compatible with kokkos kernels batch LU
412 // and tringular solve.
413 int Nmax = MaxStencilSize * DofsPerNode;
414 Kokkos::View<impl_SC ***, DeviceType> BandMat(
415 "BandMat", NVertLines, Nmax, Nmax);
416 Kokkos::View<impl_SC ***, DeviceType> BandSol(
417 "BandSol", NVertLines, Nmax, DofsPerNode);
418
419 // Precompute number of nonzeros in prolongation matrix and allocate P views
420 // Note: Each coarse dof (NVertLines*NCLayers*DofsPerNode) contributes an
421 // interpolation stencil (StencilSize*DofsPerNode)
422 int NnzP = 0;
423 for (int clayer = 0; clayer < NCLayers; ++clayer)
424 NnzP += CLayer2StencilSizeHost(clayer);
425 NnzP *= NVertLines * DofsPerNode * DofsPerNode;
426 Kokkos::View<impl_SC *, DeviceType> Pvals("Pvals", NnzP);
427 Kokkos::View<LO *, DeviceType> Pcols("Pcols", NnzP);
428
429 // Precompute Pptr
430 // Note: Each coarse layer stencil dof contributes DofsPerNode to the
431 // corresponding row in P
432 Kokkos::View<size_t *, DeviceType> Pptr("Pptr", NFRows + 1);
433 typename Kokkos::View<size_t *, DeviceType>::host_mirror_type PptrHost =
434 Kokkos::create_mirror_view(Pptr);
435 Kokkos::deep_copy(PptrHost, 0);
436 for (int line = 0; line < NVertLines; ++line) {
437 for (int clayer = 0; clayer < NCLayers; ++clayer) {
438 const int stencilSize = CLayer2StencilSizeHost(clayer);
439 const int startLayer = CLayer2StartLayerHost(clayer);
440 for (int snode = 0; snode < stencilSize; ++snode) {
441 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
442 const int layer = startLayer + snode;
443 const int AmatBlkRow = LineLayer2NodeHost(line, layer);
444 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
445 PptrHost(AmatRow + 1) += DofsPerNode;
446 }
447 }
448 }
449 }
450 for (int i = 2; i < NFRows + 1; ++i)
451 PptrHost(i) += PptrHost(i - 1);
452 TEUCHOS_TEST_FOR_EXCEPTION(NnzP != (int)PptrHost(NFRows),
454 "Number of nonzeros in P does not match");
455 Kokkos::deep_copy(Pptr, PptrHost);
456
457 // Precompute Pptr offsets
458 // Note: These are used to determine the nonzero index in Pvals and Pcols
459 Kokkos::View<LO *, Kokkos::DefaultHostExecutionSpace> layerBuckets(
460 "layerBuckets", NFLayers);
461 Kokkos::deep_copy(layerBuckets, 0);
462 LOView2D CLayerSNode2PptrOffset("CLayerSNode2PptrOffset", NCLayers,
463 MaxStencilSize);
464 typename LOView2D::host_mirror_type CLayerSNode2PptrOffsetHost =
465 Kokkos::create_mirror_view(CLayerSNode2PptrOffset);
466 for (int clayer = 0; clayer < NCLayers; ++clayer) {
467 const int stencilSize = CLayer2StencilSizeHost(clayer);
468 const int startLayer = CLayer2StartLayerHost(clayer);
469 for (int snode = 0; snode < stencilSize; ++snode) {
470 const int layer = startLayer + snode;
471 CLayerSNode2PptrOffsetHost(clayer, snode) = layerBuckets(layer);
472 layerBuckets(layer)++;
473 }
474 }
475 Kokkos::deep_copy(CLayerSNode2PptrOffset, CLayerSNode2PptrOffsetHost);
476
477 { // Fill P - fill and solve each block tridiagonal system and fill P views
478 SubFactoryMonitor m3(*this, "Fill P", coarseLevel);
479
480 const auto localAmat = Amat->getLocalMatrixDevice();
481 const auto zero = impl_ATS::zero();
482 const auto one = impl_ATS::one();
483
484 using range_policy = Kokkos::RangePolicy<execution_space>;
485 Kokkos::parallel_for(
486 "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Fill P",
487 range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
488 for (int clayer = 0; clayer < NCLayers; ++clayer) {
489 // Initialize BandSol
490 auto bandSol =
491 Kokkos::subview(BandSol, line, Kokkos::ALL(), Kokkos::ALL());
492 for (int row = 0; row < Nmax; ++row)
493 for (int dof = 0; dof < DofsPerNode; ++dof)
494 bandSol(row, dof) = zero;
495
496 // Initialize BandMat (set unused row diagonal to 1.0)
497 const int stencilSize = CLayer2StencilSize(clayer);
498 const int N = stencilSize * DofsPerNode;
499 auto bandMat =
500 Kokkos::subview(BandMat, line, Kokkos::ALL(), Kokkos::ALL());
501 for (int row = 0; row < Nmax; ++row)
502 for (int col = 0; col < Nmax; ++col)
503 bandMat(row, col) =
504 (row == col && row >= N) ? one : zero;
505
506 // Loop over layers in stencil and fill banded matrix and rhs
507 const int flayer = CLayer2FLayer(clayer);
508 const int startLayer = CLayer2StartLayer(clayer);
509 for (int snode = 0; snode < stencilSize; ++snode) {
510 const int layer = startLayer + snode;
511 if (layer == flayer) { // If layer in stencil is a coarse layer
512 for (int dof = 0; dof < DofsPerNode; ++dof) {
513 const int row = snode * DofsPerNode + dof;
514 bandMat(row, row) = one;
515 bandSol(row, dof) = one;
516 }
517 } else { // Not a coarse layer
518 const int AmatBlkRow = LineLayer2Node(line, layer);
519 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
520 // Get Amat row info
521 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
522 const auto localAmatRow = localAmat.rowConst(AmatRow);
523 const int AmatRowLeng = localAmatRow.length;
524
525 const int row = snode * DofsPerNode + dofi;
526 for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
527 const int col = snode * DofsPerNode + dofj;
528
529 // Sum values along row which correspond to stencil
530 // layer/dof and fill bandMat
531 auto val = zero;
532 for (int i = 0; i < AmatRowLeng; ++i) {
533 const int colidx = localAmatRow.colidx(i);
534 if (FCol2Layer(colidx) == layer &&
535 FCol2Dof(colidx) == dofj)
536 val += localAmatRow.value(i);
537 }
538 bandMat(row, col) = val;
539
540 if (snode > 0) {
541 // Sum values along row which correspond to stencil
542 // layer/dof below and fill bandMat
543 val = zero;
544 for (int i = 0; i < AmatRowLeng; ++i) {
545 const int colidx = localAmatRow.colidx(i);
546 if (FCol2Layer(colidx) == layer - 1 &&
547 FCol2Dof(colidx) == dofj)
548 val += localAmatRow.value(i);
549 }
550 bandMat(row, col - DofsPerNode) = val;
551 }
552
553 if (snode < stencilSize - 1) {
554 // Sum values along row which correspond to stencil
555 // layer/dof above and fill bandMat
556 val = zero;
557 for (int i = 0; i < AmatRowLeng; ++i) {
558 const int colidx = localAmatRow.colidx(i);
559 if (FCol2Layer(colidx) == layer + 1 &&
560 FCol2Dof(colidx) == dofj)
561 val += localAmatRow.value(i);
562 }
563 bandMat(row, col + DofsPerNode) = val;
564 }
565 }
566 }
567 }
568 }
569
570 // Batch LU and triangular solves
571 namespace KB = KokkosBatched;
572 using lu_type = typename KB::SerialLU<KB::Algo::LU::Unblocked>;
573 lu_type::invoke(bandMat);
574 using trsv_l_type =
575 typename KB::SerialTrsm<KB::Side::Left, KB::Uplo::Lower,
576 KB::Trans::NoTranspose, KB::Diag::Unit,
577 KB::Algo::Trsm::Unblocked>;
578 trsv_l_type::invoke(one, bandMat, bandSol);
579 using trsv_u_type = typename KB::SerialTrsm<
580 KB::Side::Left, KB::Uplo::Upper, KB::Trans::NoTranspose,
581 KB::Diag::NonUnit, KB::Algo::Trsm::Unblocked>;
582 trsv_u_type::invoke(one, bandMat, bandSol);
583
584 // Fill prolongation views with solution
585 for (int snode = 0; snode < stencilSize; ++snode) {
586 for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
587 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
588 const int layer = startLayer + snode;
589 const int AmatBlkRow = LineLayer2Node(line, layer);
590 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
591
592 const int pptrOffset = CLayerSNode2PptrOffset(clayer, snode);
593 const int pptr =
594 Pptr(AmatRow) + pptrOffset * DofsPerNode + dofj;
595
596 const int col =
597 line * NCLayers + clayer; // coarse node (block row) index
598 Pcols(pptr) = col * DofsPerNode + dofj;
599 Pvals(pptr) = bandSol(snode * DofsPerNode + dofi, dofj);
600 }
601 }
602 }
603 }
604 });
605 } // Fill P
606
607 // Build P
608 RCP<const Map> rowMap = Amat->getRowMap();
609 Xpetra::global_size_t GNdofs = rowMap->getGlobalNumElements();
610 Xpetra::global_size_t itemp = GNdofs / NFLayers;
611 std::vector<size_t> stridingInfo_{(size_t)DofsPerNode};
612 RCP<const Map> coarseMap = StridedMapFactory::Build(
613 rowMap->lib(), NCLayers * itemp, NCLayers * NVertLines * DofsPerNode, 0,
614 stridingInfo_, rowMap->getComm(), -1, 0);
615 P = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
616 RCP<CrsMatrix> PCrs = toCrsMatrix(P);
617 PCrs->setAllValues(Pptr, Pcols, Pvals);
618 PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
619
620 // set StridingInformation of P
621 if (Amat->IsView("stridedMaps") == true)
622 P->CreateView("stridedMaps", Amat->getRowMap("stridedMaps"), coarseMap);
623 else
624 P->CreateView("stridedMaps", P->getRangeMap(), coarseMap);
625
626 // Construct coarse nullspace and inject fine nullspace
627 coarseNullspace =
628 MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
629 const int numVectors = fineNullspace->getNumVectors();
630 const auto fineNullspaceView = fineNullspace->getLocalViewDevice(Tpetra::Access::ReadOnly);
631 const auto coarseNullspaceView = coarseNullspace->getLocalViewDevice(Tpetra::Access::ReadWrite);
632 using range_policy = Kokkos::RangePolicy<execution_space>;
633 Kokkos::parallel_for(
634 "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Inject Nullspace",
635 range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
636 for (int clayer = 0; clayer < NCLayers; ++clayer) {
637 const int layer = CLayer2FLayer(clayer);
638 const int AmatBlkRow =
639 LineLayer2Node(line, layer); // fine node (block row) index
640 const int col =
641 line * NCLayers + clayer; // coarse node (block row) index
642 for (int k = 0; k < numVectors; ++k) {
643 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
644 const int fRow = AmatBlkRow * DofsPerNode + dofi;
645 const int cRow = col * DofsPerNode + dofi;
646 coarseNullspaceView(cRow, k) = fineNullspaceView(fRow, k);
647 }
648 }
649 }
650 });
651}
652
653} // namespace MueLu
654
655#define MUELU_SEMICOARSENPFACTORY_KOKKOS_SHORT
656#endif // MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
MueLu::DefaultNode Node
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static const Teuchos::ParameterEntry & getEntry(const std::string &name)
Returns default entry from the "master" list corresponding to the specified name.
static const NoFactory * get()
Prolongator factory performing semi-coarsening.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
Namespace for MueLu classes and methods.