MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_RegionRFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
11#define MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
12
13#include "Kokkos_UnorderedMap.hpp"
14
16
17#include <Xpetra_Matrix.hpp>
18#include <Xpetra_CrsGraphFactory.hpp>
19
20#include "MueLu_Types.hpp"
21
22namespace MueLu {
23
24template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
27 RCP<ParameterList> validParamList = rcp(new ParameterList());
28
29 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null,
30 "Generating factory of the matrix A");
31 validParamList->set<RCP<const FactoryBase> >("numDimensions", Teuchos::null,
32 "Number of spatial dimensions in the problem.");
33 validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null,
34 "Number of local nodes per spatial dimension on the fine grid.");
35 validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null,
36 "Fine level nullspace used to construct the coarse level nullspace.");
37 validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null,
38 "Fine level coordinates used to construct piece-wise linear prolongator and coarse level coordinates.");
39 validParamList->set<bool>("keep coarse coords", false, "Flag to keep coordinates for special coarse grid solve");
40
41 return validParamList;
42} // GetValidParameterList()
43
44template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
46 DeclareInput(Level& fineLevel, Level& /* coarseLevel */) const {
47 Input(fineLevel, "A");
48 Input(fineLevel, "numDimensions");
49 Input(fineLevel, "lNodesPerDim");
50 Input(fineLevel, "Nullspace");
51 Input(fineLevel, "Coordinates");
52
53} // DeclareInput()
54
55template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
57 Build(Level& fineLevel, Level& coarseLevel) const {
58 // Set debug outputs based on environment variable
59 RCP<Teuchos::FancyOStream> out;
60 if (const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
61 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
62 out->setShowAllFrontMatter(false).setShowProcRank(true);
63 } else {
64 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
65 }
66
67 *out << "Starting RegionRFactory_kokkos::Build." << std::endl;
68
69 // First get the inputs from the fineLevel
70 const int numDimensions = Get<int>(fineLevel, "numDimensions");
71 Array<LO> lFineNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
72 {
73 Array<LO> lNodesPerDim = Get<Array<LO> >(fineLevel, "lNodesPerDim");
74 for (int dim = 0; dim < numDimensions; ++dim) {
75 lFineNodesPerDim[dim] = lNodesPerDim[dim];
76 }
77 }
78 *out << "numDimensions " << numDimensions << " and lFineNodesPerDim: " << lFineNodesPerDim
79 << std::endl;
80
81 // Let us check that the inputs verify our assumptions
82 if (numDimensions < 1 || numDimensions > 3) {
83 throw std::runtime_error("numDimensions must be 1, 2 or 3!");
84 }
85 for (int dim = 0; dim < numDimensions; ++dim) {
86 if (lFineNodesPerDim[dim] % 3 != 1) {
87 throw std::runtime_error("The number of fine node in each direction need to be 3n+1");
88 }
89 }
90 Array<LO> lCoarseNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
91
92 const RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
93
94 RCP<realvaluedmultivector_type> fineCoordinates, coarseCoordinates;
95 fineCoordinates = Get<RCP<realvaluedmultivector_type> >(fineLevel, "Coordinates");
96 if (static_cast<int>(fineCoordinates->getNumVectors()) != numDimensions) {
97 throw std::runtime_error("The number of vectors in the coordinates is not equal to numDimensions!");
98 }
99
100 // Let us create R and pass it down to the
101 // appropriate specialization and see what we
102 // get back!
103 RCP<Matrix> R;
104
105 if (numDimensions == 1) {
106 throw std::runtime_error("RegionRFactory_kokkos no implemented for 1D case yet.");
107 } else if (numDimensions == 2) {
108 throw std::runtime_error("RegionRFactory_kokkos no implemented for 2D case yet.");
109 } else if (numDimensions == 3) {
110 Build3D(numDimensions, lFineNodesPerDim, A, fineCoordinates,
111 R, coarseCoordinates, lCoarseNodesPerDim);
112 }
113
114 const Teuchos::ParameterList& pL = GetParameterList();
115
116 // Reuse pattern if available (multiple solve)
117 RCP<ParameterList> Tparams;
118 if (pL.isSublist("matrixmatrix: kernel params"))
119 Tparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
120 else
121 Tparams = rcp(new ParameterList);
122
123 // R->describe(*out, Teuchos::VERB_EXTREME);
124 *out << "Compute P=R^t" << std::endl;
125 // By default, we don't need global constants for transpose
126 Tparams->set("compute global constants: temporaries", Tparams->get("compute global constants: temporaries", false));
127 Tparams->set("compute global constants", Tparams->get("compute global constants", false));
128 std::string label = "MueLu::RegionR-transR" + Teuchos::toString(coarseLevel.GetLevelID());
129 RCP<Matrix> P = Utilities::Transpose(*R, true, label, Tparams);
130
131 *out << "Compute coarse nullspace" << std::endl;
132 RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, "Nullspace");
133 RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(R->getRowMap(),
134 fineNullspace->getNumVectors());
135 R->apply(*fineNullspace, *coarseNullspace, Teuchos::NO_TRANS, Teuchos::ScalarTraits<SC>::one(),
136 Teuchos::ScalarTraits<SC>::zero());
137
138 *out << "Set data on coarse level" << std::endl;
139 Set(coarseLevel, "numDimensions", numDimensions);
140 Set(coarseLevel, "lNodesPerDim", lCoarseNodesPerDim);
141 Set(coarseLevel, "Nullspace", coarseNullspace);
142 Set(coarseLevel, "Coordinates", coarseCoordinates);
143 if (pL.get<bool>("keep coarse coords")) {
144 coarseLevel.Set<RCP<realvaluedmultivector_type> >("Coordinates2", coarseCoordinates, NoFactory::get());
145 }
146
147 R->SetFixedBlockSize(A->GetFixedBlockSize());
148 P->SetFixedBlockSize(A->GetFixedBlockSize());
149
150 Set(coarseLevel, "R", R);
151 Set(coarseLevel, "P", P);
152
153} // Build()
154
155template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
157 Build3D(const int numDimensions,
158 Teuchos::Array<LocalOrdinal>& lFineNodesPerDim,
159 const RCP<Matrix>& A,
160 const RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::coordinateType, LocalOrdinal, GlobalOrdinal, Node> >& fineCoordinates,
161 RCP<Matrix>& R,
162 RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::coordinateType, LocalOrdinal, GlobalOrdinal, Node> >& coarseCoordinates,
163 Teuchos::Array<LocalOrdinal>& lCoarseNodesPerDim) const {
164 using local_matrix_type = typename CrsMatrix::local_matrix_device_type;
165 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
166 using row_map_type = typename local_matrix_type::row_map_type::non_const_type;
167 using entries_type = typename local_matrix_type::index_type::non_const_type;
168 using values_type = typename local_matrix_type::values_type::non_const_type;
169#if KOKKOS_VERSION >= 40799
170 using impl_scalar_type = typename KokkosKernels::ArithTraits<Scalar>::val_type;
171#else
172 using impl_scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
173#endif
174
175 // Set debug outputs based on environment variable
176 RCP<Teuchos::FancyOStream> out;
177 if (const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
178 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
179 out->setShowAllFrontMatter(false).setShowProcRank(true);
180 } else {
181 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
182 }
183
184 // Now compute number of coarse grid points
185 for (int dim = 0; dim < numDimensions; ++dim) {
186 lCoarseNodesPerDim[dim] = lFineNodesPerDim[dim] / 3 + 1;
187 }
188 *out << "lCoarseNodesPerDim " << lCoarseNodesPerDim << std::endl;
189
190 // Grab the block size here and multiply all existing offsets by it
191 const LO blkSize = A->GetFixedBlockSize();
192 *out << "blkSize " << blkSize << std::endl;
193
194 // Based on lCoarseNodesPerDim and lFineNodesPerDim
195 // we can compute numRows, numCols and NNZ for R
196 const LO numRows = blkSize * lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2];
197 const LO numCols = blkSize * lFineNodesPerDim[0] * lFineNodesPerDim[1] * lFineNodesPerDim[2];
198
199 // Create the coarse coordinates multivector
200 // so we can fill it on the fly while computing
201 // the restriction operator
202 RCP<Map> rowMap = MapFactory::Build(A->getRowMap()->lib(),
203 Teuchos::OrdinalTraits<GO>::invalid(),
204 numRows,
205 A->getRowMap()->getIndexBase(),
206 A->getRowMap()->getComm());
207
208 RCP<Map> coordRowMap = MapFactory::Build(A->getRowMap()->lib(),
209 Teuchos::OrdinalTraits<GO>::invalid(),
210 lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2],
211 A->getRowMap()->getIndexBase(),
212 A->getRowMap()->getComm());
213
214 coarseCoordinates = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(coordRowMap,
215 numDimensions);
216
217 // Get device views of coordinates
218 auto fineCoordsView = fineCoordinates->getLocalViewDevice(Tpetra::Access::ReadOnly);
219 auto coarseCoordsView = coarseCoordinates->getLocalViewDevice(Tpetra::Access::OverwriteAll);
220
221 Array<ArrayRCP<const real_type> > fineCoordData(numDimensions);
222 Array<ArrayRCP<real_type> > coarseCoordData(numDimensions);
223 for (int dim = 0; dim < numDimensions; ++dim) {
224 fineCoordData[dim] = fineCoordinates->getData(dim);
225 coarseCoordData[dim] = coarseCoordinates->getDataNonConst(dim);
226 }
227
228 // Let us set some parameter that will be useful
229 // while constructing R
230
231 // Length of interpolation stencils based on geometry
232 const LO cornerStencilLength = 27;
233 const LO edgeStencilLength = 45;
234 const LO faceStencilLength = 75;
235 const LO interiorStencilLength = 125;
236
237 // Number of corner, edge, face and interior nodes
238 const LO numCorners = 8;
239 const LO numEdges = 4 * (lCoarseNodesPerDim[0] - 2) + 4 * (lCoarseNodesPerDim[1] - 2) + 4 * (lCoarseNodesPerDim[2] - 2);
240 const LO numFaces = 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) + 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[2] - 2) + 2 * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
241 const LO numInteriors = (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
242
243 const LO nnz = (numCorners * cornerStencilLength + numEdges * edgeStencilLength + numFaces * faceStencilLength + numInteriors * interiorStencilLength) * blkSize;
244
245 // Having the number of rows and columns we can genrate
246 // the appropriate maps for our operator.
247
248 *out << "R statistics:" << std::endl
249 << " -numRows= " << numRows << std::endl
250 << " -numCols= " << numCols << std::endl
251 << " -nnz= " << nnz << std::endl;
252
253 row_map_type row_map(Kokkos::ViewAllocateWithoutInitializing("row_map"), numRows + 1);
254 typename row_map_type::host_mirror_type row_map_h = Kokkos::create_mirror_view(row_map);
255
256 entries_type entries(Kokkos::ViewAllocateWithoutInitializing("entries"), nnz);
257 typename entries_type::host_mirror_type entries_h = Kokkos::create_mirror_view(entries);
258
259 values_type values(Kokkos::ViewAllocateWithoutInitializing("values"), nnz);
260 typename values_type::host_mirror_type values_h = Kokkos::create_mirror_view(values);
261
262 // Compute the basic interpolation
263 // coefficients for 1D rate of 3
264 // coarsening.
265 Array<SC> coeffs({1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0});
266 row_map_h(0) = 0;
267
268 // Define some offsets that
269 // will be needed often later on
270 const LO edgeLineOffset = 2 * cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength;
271 const LO faceLineOffset = 2 * edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength;
272 const LO interiorLineOffset = 2 * faceStencilLength + (lCoarseNodesPerDim[0] - 2) * interiorStencilLength;
273
274 const LO facePlaneOffset = 2 * edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset;
275 const LO interiorPlaneOffset = 2 * faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset;
276
277 // Let us take care of the corners
278 // first since we always have
279 // corners to deal with!
280 {
281 // Corner 1
282 LO coordRowIdx = 0, rowIdx = 0, coordColumnOffset = 0, columnOffset = 0, entryOffset = 0;
283 for (LO l = 0; l < blkSize; ++l) {
284 for (LO k = 0; k < 3; ++k) {
285 for (LO j = 0; j < 3; ++j) {
286 for (LO i = 0; i < 3; ++i) {
287 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
288 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i + 2];
289 }
290 }
291 }
292 }
293 for (LO l = 0; l < blkSize; ++l) {
294 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
295 }
296 for (int dim = 0; dim < numDimensions; ++dim) {
297 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
298 }
299
300 // Corner 5
301 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
302 rowIdx = coordRowIdx * blkSize;
303 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
304 columnOffset = coordColumnOffset * blkSize;
305 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
306 for (LO l = 0; l < blkSize; ++l) {
307 for (LO k = 0; k < 3; ++k) {
308 for (LO j = 0; j < 3; ++j) {
309 for (LO i = 0; i < 3; ++i) {
310 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
311 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
312 }
313 }
314 }
315 }
316 for (LO l = 0; l < blkSize; ++l) {
317 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
318 }
319 for (int dim = 0; dim < numDimensions; ++dim) {
320 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
321 }
322
323 // Corner 2
324 coordRowIdx = (lCoarseNodesPerDim[0] - 1);
325 rowIdx = coordRowIdx * blkSize;
326 coordColumnOffset = (lFineNodesPerDim[0] - 1);
327 columnOffset = coordColumnOffset * blkSize;
328 entryOffset = (cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) * blkSize;
329 for (LO l = 0; l < blkSize; ++l) {
330 for (LO k = 0; k < 3; ++k) {
331 for (LO j = 0; j < 3; ++j) {
332 for (LO i = 0; i < 3; ++i) {
333 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
334 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
335 }
336 }
337 }
338 }
339 for (LO l = 0; l < blkSize; ++l) {
340 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
341 }
342 for (int dim = 0; dim < numDimensions; ++dim) {
343 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
344 }
345
346 // Corner 6
347 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
348 rowIdx = coordRowIdx * blkSize;
349 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
350 columnOffset = coordColumnOffset * blkSize;
351 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
352 for (LO l = 0; l < blkSize; ++l) {
353 for (LO k = 0; k < 3; ++k) {
354 for (LO j = 0; j < 3; ++j) {
355 for (LO i = 0; i < 3; ++i) {
356 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
357 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
358 }
359 }
360 }
361 }
362 for (LO l = 0; l < blkSize; ++l) {
363 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
364 }
365 for (int dim = 0; dim < numDimensions; ++dim) {
366 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
367 }
368
369 // Corner 3
370 coordRowIdx = (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0];
371 rowIdx = coordRowIdx * blkSize;
372 coordColumnOffset = (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0];
373 columnOffset = coordColumnOffset * blkSize;
374 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset) * blkSize;
375 for (LO l = 0; l < blkSize; ++l) {
376 for (LO k = 0; k < 3; ++k) {
377 for (LO j = 0; j < 3; ++j) {
378 for (LO i = 0; i < 3; ++i) {
379 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
380 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
381 }
382 }
383 }
384 }
385 for (LO l = 0; l < blkSize; ++l) {
386 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
387 }
388 for (int dim = 0; dim < numDimensions; ++dim) {
389 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
390 }
391
392 // Corner 7
393 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
394 rowIdx = coordRowIdx * blkSize;
395 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
396 columnOffset = coordColumnOffset * blkSize;
397 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
398 for (LO l = 0; l < blkSize; ++l) {
399 for (LO k = 0; k < 3; ++k) {
400 for (LO j = 0; j < 3; ++j) {
401 for (LO i = 0; i < 3; ++i) {
402 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
403 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
404 }
405 }
406 }
407 }
408 for (LO l = 0; l < blkSize; ++l) {
409 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
410 }
411 for (int dim = 0; dim < numDimensions; ++dim) {
412 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
413 }
414
415 // Corner 4
416 coordRowIdx = (lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
417 rowIdx = coordRowIdx * blkSize;
418 coordColumnOffset = (lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
419 columnOffset = coordColumnOffset * blkSize;
420 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset +
421 cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) *
422 blkSize;
423 for (LO l = 0; l < blkSize; ++l) {
424 for (LO k = 0; k < 3; ++k) {
425 for (LO j = 0; j < 3; ++j) {
426 for (LO i = 0; i < 3; ++i) {
427 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
428 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
429 }
430 }
431 }
432 }
433 for (LO l = 0; l < blkSize; ++l) {
434 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
435 }
436 for (int dim = 0; dim < numDimensions; ++dim) {
437 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
438 }
439
440 // Corner 8
441 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
442 rowIdx = coordRowIdx * blkSize;
443 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
444 columnOffset = coordColumnOffset * blkSize;
445 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
446 for (LO l = 0; l < blkSize; ++l) {
447 for (LO k = 0; k < 3; ++k) {
448 for (LO j = 0; j < 3; ++j) {
449 for (LO i = 0; i < 3; ++i) {
450 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
451 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
452 }
453 }
454 }
455 }
456 for (LO l = 0; l < blkSize; ++l) {
457 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
458 }
459 for (int dim = 0; dim < numDimensions; ++dim) {
460 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
461 }
462 } // Corners are done!
463
464 // Edges along 0 direction
465 if (lCoarseNodesPerDim[0] - 2 > 0) {
466 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
467 for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[0] - 2; ++edgeIdx) {
468 // Edge 0
469 coordRowIdx = (edgeIdx + 1);
470 rowIdx = coordRowIdx * blkSize;
471 coordColumnOffset = (edgeIdx + 1) * 3;
472 columnOffset = coordColumnOffset * blkSize;
473 entryOffset = (cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
474 for (LO l = 0; l < blkSize; ++l) {
475 for (LO k = 0; k < 3; ++k) {
476 for (LO j = 0; j < 3; ++j) {
477 for (LO i = 0; i < 5; ++i) {
478 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
479 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
480 }
481 }
482 }
483 }
484 for (LO l = 0; l < blkSize; ++l) {
485 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
486 }
487 for (int dim = 0; dim < numDimensions; ++dim) {
488 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
489 }
490
491 // Edge 1
492 coordRowIdx = ((lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
493 rowIdx = coordRowIdx * blkSize;
494 coordColumnOffset = ((lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
495 columnOffset = coordColumnOffset * blkSize;
496 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
497 for (LO l = 0; l < blkSize; ++l) {
498 for (LO k = 0; k < 3; ++k) {
499 for (LO j = 0; j < 3; ++j) {
500 for (LO i = 0; i < 5; ++i) {
501 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
502 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
503 }
504 }
505 }
506 }
507 for (LO l = 0; l < blkSize; ++l) {
508 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
509 }
510 for (int dim = 0; dim < numDimensions; ++dim) {
511 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
512 }
513
514 // Edge 2
515 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + edgeIdx + 1);
516 rowIdx = coordRowIdx * blkSize;
517 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
518 columnOffset = coordColumnOffset * blkSize;
519 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
520 for (LO l = 0; l < blkSize; ++l) {
521 for (LO k = 0; k < 3; ++k) {
522 for (LO j = 0; j < 3; ++j) {
523 for (LO i = 0; i < 5; ++i) {
524 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
525 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
526 }
527 }
528 }
529 }
530 for (LO l = 0; l < blkSize; ++l) {
531 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
532 }
533 for (int dim = 0; dim < numDimensions; ++dim) {
534 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
535 }
536
537 // Edge 3
538 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
539 rowIdx = coordRowIdx * blkSize;
540 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
541 columnOffset = coordColumnOffset * blkSize;
542 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
543 for (LO l = 0; l < blkSize; ++l) {
544 for (LO k = 0; k < 3; ++k) {
545 for (LO j = 0; j < 3; ++j) {
546 for (LO i = 0; i < 5; ++i) {
547 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
548 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
549 }
550 }
551 }
552 }
553 for (LO l = 0; l < blkSize; ++l) {
554 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
555 }
556 for (int dim = 0; dim < numDimensions; ++dim) {
557 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
558 }
559 }
560 }
561
562 // Edges along 1 direction
563 if (lCoarseNodesPerDim[1] - 2 > 0) {
564 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
565 for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[1] - 2; ++edgeIdx) {
566 // Edge 0
567 coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[0];
568 rowIdx = coordRowIdx * blkSize;
569 coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[0];
570 columnOffset = coordColumnOffset * blkSize;
571 entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
572 for (LO l = 0; l < blkSize; ++l) {
573 for (LO k = 0; k < 3; ++k) {
574 for (LO j = 0; j < 5; ++j) {
575 for (LO i = 0; i < 3; ++i) {
576 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
577 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
578 }
579 }
580 }
581 }
582 for (LO l = 0; l < blkSize; ++l) {
583 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
584 }
585 for (int dim = 0; dim < numDimensions; ++dim) {
586 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
587 }
588
589 // Edge 1
590 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
591 rowIdx = coordRowIdx * blkSize;
592 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
593 columnOffset = coordColumnOffset * blkSize;
594 entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
595 for (LO l = 0; l < blkSize; ++l) {
596 for (LO k = 0; k < 3; ++k) {
597 for (LO j = 0; j < 5; ++j) {
598 for (LO i = 0; i < 3; ++i) {
599 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
600 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
601 }
602 }
603 }
604 }
605 for (LO l = 0; l < blkSize; ++l) {
606 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
607 }
608 for (int dim = 0; dim < numDimensions; ++dim) {
609 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
610 }
611
612 // Edge 2
613 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0]);
614 rowIdx = coordRowIdx * blkSize;
615 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0]);
616 columnOffset = coordColumnOffset * blkSize;
617 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
618 for (LO l = 0; l < blkSize; ++l) {
619 for (LO k = 0; k < 3; ++k) {
620 for (LO j = 0; j < 5; ++j) {
621 for (LO i = 0; i < 3; ++i) {
622 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
623 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
624 }
625 }
626 }
627 }
628 for (LO l = 0; l < blkSize; ++l) {
629 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
630 }
631 for (int dim = 0; dim < numDimensions; ++dim) {
632 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
633 }
634
635 // Edge 3
636 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
637 rowIdx = coordRowIdx * blkSize;
638 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
639 columnOffset = coordColumnOffset * blkSize;
640 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
641 for (LO l = 0; l < blkSize; ++l) {
642 for (LO k = 0; k < 3; ++k) {
643 for (LO j = 0; j < 5; ++j) {
644 for (LO i = 0; i < 3; ++i) {
645 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
646 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
647 }
648 }
649 }
650 }
651 for (LO l = 0; l < blkSize; ++l) {
652 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
653 }
654 for (int dim = 0; dim < numDimensions; ++dim) {
655 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
656 }
657 }
658 }
659
660 // Edges along 2 direction
661 if (lCoarseNodesPerDim[2] - 2 > 0) {
662 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
663 for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[2] - 2; ++edgeIdx) {
664 // Edge 0
665 coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
666 rowIdx = coordRowIdx * blkSize;
667 coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0];
668 columnOffset = coordColumnOffset * blkSize;
669 entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset) * blkSize;
670 for (LO l = 0; l < blkSize; ++l) {
671 for (LO k = 0; k < 5; ++k) {
672 for (LO j = 0; j < 3; ++j) {
673 for (LO i = 0; i < 3; ++i) {
674 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
675 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
676 }
677 }
678 }
679 }
680 for (LO l = 0; l < blkSize; ++l) {
681 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
682 }
683 for (int dim = 0; dim < numDimensions; ++dim) {
684 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
685 }
686
687 // Edge 1
688 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
689 rowIdx = coordRowIdx * blkSize;
690 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
691 columnOffset = coordColumnOffset * blkSize;
692 entryOffset = (facePlaneOffset + faceLineOffset - edgeStencilLength + edgeIdx * interiorPlaneOffset) * blkSize;
693 for (LO l = 0; l < blkSize; ++l) {
694 for (LO k = 0; k < 5; ++k) {
695 for (LO j = 0; j < 3; ++j) {
696 for (LO i = 0; i < 3; ++i) {
697 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
698 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
699 }
700 }
701 }
702 }
703 for (LO l = 0; l < blkSize; ++l) {
704 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
705 }
706 for (int dim = 0; dim < numDimensions; ++dim) {
707 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
708 }
709
710 // Edge 2
711 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0]);
712 rowIdx = coordRowIdx * blkSize;
713 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0]);
714 columnOffset = coordColumnOffset * blkSize;
715 entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset + faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset) * blkSize;
716 for (LO l = 0; l < blkSize; ++l) {
717 for (LO k = 0; k < 5; ++k) {
718 for (LO j = 0; j < 3; ++j) {
719 for (LO i = 0; i < 3; ++i) {
720 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
721 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
722 }
723 }
724 }
725 }
726 for (LO l = 0; l < blkSize; ++l) {
727 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
728 }
729 for (int dim = 0; dim < numDimensions; ++dim) {
730 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
731 }
732
733 // Edge 3
734 coordRowIdx = ((edgeIdx + 2) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
735 rowIdx = coordRowIdx * blkSize;
736 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
737 columnOffset = coordColumnOffset * blkSize;
738 entryOffset = (facePlaneOffset + (edgeIdx + 1) * interiorPlaneOffset - edgeStencilLength) * blkSize;
739 for (LO l = 0; l < blkSize; ++l) {
740 for (LO k = 0; k < 5; ++k) {
741 for (LO j = 0; j < 3; ++j) {
742 for (LO i = 0; i < 3; ++i) {
743 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
744 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
745 }
746 }
747 }
748 }
749 for (LO l = 0; l < blkSize; ++l) {
750 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
751 }
752 for (int dim = 0; dim < numDimensions; ++dim) {
753 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
754 }
755 }
756 }
757
758 // TODO: KOKKOS parallel_for used from here. Not sure if it should be used for edges.
759 Kokkos::deep_copy(row_map, row_map_h);
760 Kokkos::deep_copy(entries, entries_h);
761 Kokkos::deep_copy(values, values_h);
762
763 // Create views on device for nodes per dim
764 LOTupleView lFineNodesPerDim_d("lFineNodesPerDim");
765 LOTupleView lCoarseNodesPerDim_d("lCoarseNodesPerDim");
766
767 typename Kokkos::View<LO[3], device_type>::host_mirror_type lCoarseNodesPerDim_h = Kokkos::create_mirror_view(lCoarseNodesPerDim_d);
768 typename Kokkos::View<LO[3], device_type>::host_mirror_type lFineNodesPerDim_h = Kokkos::create_mirror_view(lFineNodesPerDim_d);
769
770 for (int dim = 0; dim < numDimensions; ++dim) {
771 lCoarseNodesPerDim_h(dim) = lCoarseNodesPerDim[dim];
772 lFineNodesPerDim_h(dim) = lFineNodesPerDim[dim];
773 }
774
775 Kokkos::deep_copy(lCoarseNodesPerDim_d, lCoarseNodesPerDim_h);
776 Kokkos::deep_copy(lFineNodesPerDim_d, lFineNodesPerDim_h);
777
778 // Faces in 0-1 plane
779 if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[1] - 2 > 0)) {
780 Kokkos::parallel_for(
781 "Faces in 0-1 plane region R",
782 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[0] - 2)),
783 KOKKOS_LAMBDA(const LO faceIdx) {
784 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
785 LO gridIdx[3] = {0, 0, 0};
786 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
787 // Last step in the loop
788 // update the grid indices
789 // for next grid point
790 for (LO i = 0; i < faceIdx; i++) {
791 ++gridIdx[0];
792 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
793 gridIdx[0] = 0;
794 ++gridIdx[1];
795 }
796 }
797
798 // Face 0
799 coordRowIdx = ((gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
800 rowIdx = coordRowIdx * blkSize;
801 coordColumnOffset = 3 * ((gridIdx[1] + 1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1);
802 columnOffset = coordColumnOffset * blkSize;
803 entryOffset = (edgeLineOffset + edgeStencilLength + gridIdx[1] * faceLineOffset + gridIdx[0] * faceStencilLength) * blkSize;
804 for (LO l = 0; l < blkSize; ++l) {
805 for (LO k = 0; k < 3; ++k) {
806 for (LO j = 0; j < 5; ++j) {
807 for (LO i = 0; i < 5; ++i) {
808 entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + (k * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
809 values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k + 2] * coeffs_d[j] * coeffs_d[i];
810 }
811 }
812 }
813 }
814 for (LO l = 0; l < blkSize; ++l) {
815 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
816 }
817 for (int dim = 0; dim < numDimensions; ++dim) {
818 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
819 }
820
821 // Face 1
822 coordRowIdx += (lCoarseNodesPerDim_d(2) - 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0);
823 rowIdx = coordRowIdx * blkSize;
824 coordColumnOffset += (lFineNodesPerDim_d(2) - 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0);
825 columnOffset = coordColumnOffset * blkSize;
826 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim_d(2) - 2) * interiorPlaneOffset) * blkSize;
827 for (LO l = 0; l < blkSize; ++l) {
828 for (LO k = 0; k < 3; ++k) {
829 for (LO j = 0; j < 5; ++j) {
830 for (LO i = 0; i < 5; ++i) {
831 entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
832 values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
833 }
834 }
835 }
836 }
837 for (LO l = 0; l < blkSize; ++l) {
838 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
839 }
840 for (int dim = 0; dim < numDimensions; ++dim) {
841 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
842 }
843 }); // parallel_for faces in 0-1 plane
844 }
845
846 // Faces in 0-2 plane
847 if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
848 Kokkos::parallel_for(
849 "Faces in 0-2 plane region R",
850 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[0] - 2)),
851 KOKKOS_LAMBDA(const LO faceIdx) {
852 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
853 LO gridIdx[3] = {0, 0, 0};
854 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
855 // Last step in the loop
856 // update the grid indices
857 // for next grid point
858 for (LO i = 0; i < faceIdx; i++) {
859 ++gridIdx[0];
860 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
861 gridIdx[0] = 0;
862 ++gridIdx[2];
863 }
864 }
865
866 // Face 0
867 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[0] + 1));
868 rowIdx = coordRowIdx * blkSize;
869 coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1) * 3;
870 columnOffset = coordColumnOffset * blkSize;
871 entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + edgeStencilLength + gridIdx[0] * faceStencilLength) * blkSize;
872 for (LO l = 0; l < blkSize; ++l) {
873 for (LO k = 0; k < 5; ++k) {
874 for (LO j = 0; j < 3; ++j) {
875 for (LO i = 0; i < 5; ++i) {
876 entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + j * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
877 values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j + 2] * coeffs_d[i];
878 }
879 }
880 }
881 }
882 for (LO l = 0; l < blkSize; ++l) {
883 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
884 }
885 for (int dim = 0; dim < numDimensions; ++dim) {
886 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
887 }
888
889 // Face 1
890 coordRowIdx += (lCoarseNodesPerDim_d(1) - 1) * lCoarseNodesPerDim_d(0);
891 rowIdx = coordRowIdx * blkSize;
892 coordColumnOffset += (lFineNodesPerDim_d(1) - 1) * lFineNodesPerDim_d(0);
893 columnOffset = coordColumnOffset * blkSize;
894 entryOffset += (faceLineOffset + (lCoarseNodesPerDim_d(1) - 2) * interiorLineOffset) * blkSize;
895 for (LO l = 0; l < blkSize; ++l) {
896 for (LO k = 0; k < 5; ++k) {
897 for (LO j = 0; j < 3; ++j) {
898 for (LO i = 0; i < 5; ++i) {
899 entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
900 values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
901 }
902 }
903 }
904 }
905 for (LO l = 0; l < blkSize; ++l) {
906 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
907 }
908 for (int dim = 0; dim < numDimensions; ++dim) {
909 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
910 }
911 }); // parallel_for faces in 0-2 plane
912 }
913
914 // Faces in 1-2 plane
915 if ((lCoarseNodesPerDim[1] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
916 Kokkos::parallel_for(
917 "Faces in 1-2 plane region R",
918 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[1] - 2)),
919 KOKKOS_LAMBDA(const LO faceIdx) {
920 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
921 LO gridIdx[3] = {0, 0, 0};
922 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
923 // Last step in the loop
924 // update the grid indices
925 // for next grid point
926 for (LO i = 0; i < faceIdx; i++) {
927 ++gridIdx[1];
928 if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
929 gridIdx[1] = 0;
930 ++gridIdx[2];
931 }
932 }
933
934 // Face 0
935 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0));
936 rowIdx = coordRowIdx * blkSize;
937 coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * lFineNodesPerDim_d(0)) * 3;
938 columnOffset = coordColumnOffset * blkSize;
939 entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + faceLineOffset + gridIdx[1] * interiorLineOffset) * blkSize;
940 for (LO l = 0; l < blkSize; ++l) {
941 for (LO k = 0; k < 5; ++k) {
942 for (LO j = 0; j < 5; ++j) {
943 for (LO i = 0; i < 3; ++i) {
944 entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i) * blkSize + l;
945 values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i + 2];
946 }
947 }
948 }
949 }
950 for (LO l = 0; l < blkSize; ++l) {
951 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
952 }
953 for (int dim = 0; dim < numDimensions; ++dim) {
954 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
955 }
956
957 // Face 1
958 coordRowIdx += (lCoarseNodesPerDim_d(0) - 1);
959 rowIdx = coordRowIdx * blkSize;
960 coordColumnOffset += (lFineNodesPerDim_d(0) - 1);
961 columnOffset = coordColumnOffset * blkSize;
962 entryOffset += (faceStencilLength + (lCoarseNodesPerDim_d(0) - 2) * interiorStencilLength) * blkSize;
963 for (LO l = 0; l < blkSize; ++l) {
964 for (LO k = 0; k < 5; ++k) {
965 for (LO j = 0; j < 5; ++j) {
966 for (LO i = 0; i < 3; ++i) {
967 entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
968 values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
969 }
970 }
971 }
972 }
973 for (LO l = 0; l < blkSize; ++l) {
974 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
975 }
976 for (int dim = 0; dim < numDimensions; ++dim) {
977 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
978 }
979 }); // parallel_for faces in 1-2 plane
980 }
981
982 if (numInteriors > 0) {
983 // Allocate and compute arrays
984 // containing column offsets
985 // and values associated with
986 // interior points
987 LO countRowEntries = 0;
988 Kokkos::View<LO[125]> coordColumnOffsets_d("coordColOffset");
989 auto coordColumnOffsets_h = Kokkos::create_mirror_view(coordColumnOffsets_d);
990
991 for (LO k = -2; k < 3; ++k) {
992 for (LO j = -2; j < 3; ++j) {
993 for (LO i = -2; i < 3; ++i) {
994 coordColumnOffsets_h(countRowEntries) = k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i;
995 ++countRowEntries;
996 }
997 }
998 }
999 Kokkos::deep_copy(coordColumnOffsets_d, coordColumnOffsets_h);
1000
1001 LO countValues = 0;
1002 Kokkos::View<impl_scalar_type*> interiorValues_d("interiorValues", 125);
1003 auto interiorValues_h = Kokkos::create_mirror_view(interiorValues_d);
1004 for (LO k = 0; k < 5; ++k) {
1005 for (LO j = 0; j < 5; ++j) {
1006 for (LO i = 0; i < 5; ++i) {
1007 interiorValues_h(countValues) = coeffs[k] * coeffs[j] * coeffs[i];
1008 ++countValues;
1009 }
1010 }
1011 }
1012 Kokkos::deep_copy(interiorValues_d, interiorValues_h);
1013
1014 Kokkos::parallel_for(
1015 "interior idx region R", Kokkos::RangePolicy<typename device_type::execution_space>(0, numInteriors),
1016 KOKKOS_LAMBDA(const LO interiorIdx) {
1017 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
1018 LO gridIdx[3];
1019 gridIdx[0] = 0;
1020 gridIdx[1] = 0;
1021 gridIdx[2] = 0;
1022 // First step in the loop
1023 // update the grid indices
1024 // for the grid point
1025 for (LO i = 0; i < interiorIdx; i++) {
1026 ++gridIdx[0];
1027 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
1028 gridIdx[0] = 0;
1029 ++gridIdx[1];
1030 if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
1031 gridIdx[1] = 0;
1032 ++gridIdx[2];
1033 }
1034 }
1035 }
1036
1037 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(0) * lCoarseNodesPerDim_d(1) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
1038 rowIdx = coordRowIdx * blkSize;
1039 coordColumnOffset = ((gridIdx[2] + 1) * 3 * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * 3 * lFineNodesPerDim_d(0) + (gridIdx[0] + 1) * 3);
1040 columnOffset = coordColumnOffset * blkSize;
1041 entryOffset = (facePlaneOffset + faceLineOffset + faceStencilLength + gridIdx[2] * interiorPlaneOffset + gridIdx[1] * interiorLineOffset + gridIdx[0] * interiorStencilLength) * blkSize;
1042 for (LO l = 0; l < blkSize; ++l) {
1043 row_map(rowIdx + 1 + l) = entryOffset + interiorStencilLength * (l + 1);
1044 }
1045 // Fill the column indices
1046 // and values in the approproate
1047 // views.
1048 for (LO l = 0; l < blkSize; ++l) {
1049 for (LO entryIdx = 0; entryIdx < interiorStencilLength; ++entryIdx) {
1050 entries(entryOffset + entryIdx + interiorStencilLength * l) = columnOffset + coordColumnOffsets_d(entryIdx) * blkSize + l;
1051 values(entryOffset + entryIdx + interiorStencilLength * l) = interiorValues_d(entryIdx);
1052 }
1053 }
1054 for (int dim = 0; dim < numDimensions; ++dim) {
1055 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
1056 }
1057 }); // Kokkos::parallel_for interior idx
1058 //
1059 }
1060
1061 local_graph_type localGraph(entries, row_map);
1062 local_matrix_type localR("R", numCols, values, localGraph);
1063
1064 R = MatrixFactory::Build(localR, // the local data
1065 rowMap, // rowMap
1066 A->getRowMap(), // colMap
1067 A->getRowMap(), // domainMap == colMap
1068 rowMap, // rangeMap == rowMap
1069 Teuchos::null); // params for optimized construction
1070
1071} // Build3D()
1072
1073} // namespace MueLu
1074
1075#define MUELU_REGIONRFACTORY_KOKKOS_SHORT
1076#endif // MUELU_REGIONRFACTORY_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultGlobalOrdinal GlobalOrdinal
MueLu::DefaultNode Node
Class that holds all level-specific information.
int GetLevelID() const
Return level number.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static const NoFactory * get()
typename Kokkos::View< LO[3], device_type > LOTupleView
void Build3D(const int numDimensions, Array< LO > &lFineNodesPerDim, const RCP< Matrix > &A, const RCP< realvaluedmultivector_type > &fineCoordinates, RCP< Matrix > &R, RCP< realvaluedmultivector_type > &coarseCoordinates, Array< LO > &lCoarseNodesPerDim) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
RCP< const ParameterList > GetValidParameterList() const
Input.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Namespace for MueLu classes and methods.