MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_RegionRFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
11#define MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
12
13#include "Kokkos_UnorderedMap.hpp"
14
16
17#include <Xpetra_Matrix.hpp>
18#include <Xpetra_CrsGraphFactory.hpp>
19
20#include "MueLu_Types.hpp"
21
22namespace MueLu {
23
24template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
27 RCP<ParameterList> validParamList = rcp(new ParameterList());
28
29 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null,
30 "Generating factory of the matrix A");
31 validParamList->set<RCP<const FactoryBase> >("numDimensions", Teuchos::null,
32 "Number of spatial dimensions in the problem.");
33 validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null,
34 "Number of local nodes per spatial dimension on the fine grid.");
35 validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null,
36 "Fine level nullspace used to construct the coarse level nullspace.");
37 validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null,
38 "Fine level coordinates used to construct piece-wise linear prolongator and coarse level coordinates.");
39 validParamList->set<bool>("keep coarse coords", false, "Flag to keep coordinates for special coarse grid solve");
40
41 return validParamList;
42} // GetValidParameterList()
43
44template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
46 DeclareInput(Level& fineLevel, Level& /* coarseLevel */) const {
47 Input(fineLevel, "A");
48 Input(fineLevel, "numDimensions");
49 Input(fineLevel, "lNodesPerDim");
50 Input(fineLevel, "Nullspace");
51 Input(fineLevel, "Coordinates");
52
53} // DeclareInput()
54
55template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
57 Build(Level& fineLevel, Level& coarseLevel) const {
58 // Set debug outputs based on environment variable
59 RCP<Teuchos::FancyOStream> out;
60 if (const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
61 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
62 out->setShowAllFrontMatter(false).setShowProcRank(true);
63 } else {
64 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
65 }
66
67 *out << "Starting RegionRFactory_kokkos::Build." << std::endl;
68
69 // First get the inputs from the fineLevel
70 const int numDimensions = Get<int>(fineLevel, "numDimensions");
71 Array<LO> lFineNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
72 {
73 Array<LO> lNodesPerDim = Get<Array<LO> >(fineLevel, "lNodesPerDim");
74 for (int dim = 0; dim < numDimensions; ++dim) {
75 lFineNodesPerDim[dim] = lNodesPerDim[dim];
76 }
77 }
78 *out << "numDimensions " << numDimensions << " and lFineNodesPerDim: " << lFineNodesPerDim
79 << std::endl;
80
81 // Let us check that the inputs verify our assumptions
82 if (numDimensions < 1 || numDimensions > 3) {
83 throw std::runtime_error("numDimensions must be 1, 2 or 3!");
84 }
85 for (int dim = 0; dim < numDimensions; ++dim) {
86 if (lFineNodesPerDim[dim] % 3 != 1) {
87 throw std::runtime_error("The number of fine node in each direction need to be 3n+1");
88 }
89 }
90 Array<LO> lCoarseNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
91
92 const RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
93
94 RCP<realvaluedmultivector_type> fineCoordinates, coarseCoordinates;
95 fineCoordinates = Get<RCP<realvaluedmultivector_type> >(fineLevel, "Coordinates");
96 if (static_cast<int>(fineCoordinates->getNumVectors()) != numDimensions) {
97 throw std::runtime_error("The number of vectors in the coordinates is not equal to numDimensions!");
98 }
99
100 // Let us create R and pass it down to the
101 // appropriate specialization and see what we
102 // get back!
103 RCP<Matrix> R;
104
105 if (numDimensions == 1) {
106 throw std::runtime_error("RegionRFactory_kokkos no implemented for 1D case yet.");
107 } else if (numDimensions == 2) {
108 throw std::runtime_error("RegionRFactory_kokkos no implemented for 2D case yet.");
109 } else if (numDimensions == 3) {
110 Build3D(numDimensions, lFineNodesPerDim, A, fineCoordinates,
111 R, coarseCoordinates, lCoarseNodesPerDim);
112 }
113
114 const Teuchos::ParameterList& pL = GetParameterList();
115
116 // Reuse pattern if available (multiple solve)
117 RCP<ParameterList> Tparams;
118 if (pL.isSublist("matrixmatrix: kernel params"))
119 Tparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
120 else
121 Tparams = rcp(new ParameterList);
122
123 // R->describe(*out, Teuchos::VERB_EXTREME);
124 *out << "Compute P=R^t" << std::endl;
125 // By default, we don't need global constants for transpose
126 Tparams->set("compute global constants: temporaries", Tparams->get("compute global constants: temporaries", false));
127 Tparams->set("compute global constants", Tparams->get("compute global constants", false));
128 std::string label = "MueLu::RegionR-transR" + Teuchos::toString(coarseLevel.GetLevelID());
129 RCP<Matrix> P = Utilities::Transpose(*R, true, label, Tparams);
130
131 *out << "Compute coarse nullspace" << std::endl;
132 RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, "Nullspace");
133 RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(R->getRowMap(),
134 fineNullspace->getNumVectors());
135 R->apply(*fineNullspace, *coarseNullspace, Teuchos::NO_TRANS, Teuchos::ScalarTraits<SC>::one(),
136 Teuchos::ScalarTraits<SC>::zero());
137
138 *out << "Set data on coarse level" << std::endl;
139 Set(coarseLevel, "numDimensions", numDimensions);
140 Set(coarseLevel, "lNodesPerDim", lCoarseNodesPerDim);
141 Set(coarseLevel, "Nullspace", coarseNullspace);
142 Set(coarseLevel, "Coordinates", coarseCoordinates);
143 if (pL.get<bool>("keep coarse coords")) {
144 coarseLevel.Set<RCP<realvaluedmultivector_type> >("Coordinates2", coarseCoordinates, NoFactory::get());
145 }
146
147 R->SetFixedBlockSize(A->GetFixedBlockSize());
148 P->SetFixedBlockSize(A->GetFixedBlockSize());
149
150 Set(coarseLevel, "R", R);
151 Set(coarseLevel, "P", P);
152
153} // Build()
154
155template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
157 Build3D(const int numDimensions,
158 Teuchos::Array<LocalOrdinal>& lFineNodesPerDim,
159 const RCP<Matrix>& A,
160 const RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::coordinateType, LocalOrdinal, GlobalOrdinal, Node> >& fineCoordinates,
161 RCP<Matrix>& R,
162 RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::coordinateType, LocalOrdinal, GlobalOrdinal, Node> >& coarseCoordinates,
163 Teuchos::Array<LocalOrdinal>& lCoarseNodesPerDim) const {
164 using local_matrix_type = typename CrsMatrix::local_matrix_device_type;
165 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
166 using row_map_type = typename local_matrix_type::row_map_type::non_const_type;
167 using entries_type = typename local_matrix_type::index_type::non_const_type;
168 using values_type = typename local_matrix_type::values_type::non_const_type;
169 using impl_scalar_type = typename KokkosKernels::ArithTraits<Scalar>::val_type;
170
171 // Set debug outputs based on environment variable
172 RCP<Teuchos::FancyOStream> out;
173 if (const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
174 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
175 out->setShowAllFrontMatter(false).setShowProcRank(true);
176 } else {
177 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
178 }
179
180 // Now compute number of coarse grid points
181 for (int dim = 0; dim < numDimensions; ++dim) {
182 lCoarseNodesPerDim[dim] = lFineNodesPerDim[dim] / 3 + 1;
183 }
184 *out << "lCoarseNodesPerDim " << lCoarseNodesPerDim << std::endl;
185
186 // Grab the block size here and multiply all existing offsets by it
187 const LO blkSize = A->GetFixedBlockSize();
188 *out << "blkSize " << blkSize << std::endl;
189
190 // Based on lCoarseNodesPerDim and lFineNodesPerDim
191 // we can compute numRows, numCols and NNZ for R
192 const LO numRows = blkSize * lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2];
193 const LO numCols = blkSize * lFineNodesPerDim[0] * lFineNodesPerDim[1] * lFineNodesPerDim[2];
194
195 // Create the coarse coordinates multivector
196 // so we can fill it on the fly while computing
197 // the restriction operator
198 RCP<Map> rowMap = MapFactory::Build(A->getRowMap()->lib(),
199 Teuchos::OrdinalTraits<GO>::invalid(),
200 numRows,
201 A->getRowMap()->getIndexBase(),
202 A->getRowMap()->getComm());
203
204 RCP<Map> coordRowMap = MapFactory::Build(A->getRowMap()->lib(),
205 Teuchos::OrdinalTraits<GO>::invalid(),
206 lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2],
207 A->getRowMap()->getIndexBase(),
208 A->getRowMap()->getComm());
209
210 coarseCoordinates = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(coordRowMap,
211 numDimensions);
212
213 // Get device views of coordinates
214 auto fineCoordsView = fineCoordinates->getLocalViewDevice(Tpetra::Access::ReadOnly);
215 auto coarseCoordsView = coarseCoordinates->getLocalViewDevice(Tpetra::Access::OverwriteAll);
216
217 Array<ArrayRCP<const real_type> > fineCoordData(numDimensions);
218 Array<ArrayRCP<real_type> > coarseCoordData(numDimensions);
219 for (int dim = 0; dim < numDimensions; ++dim) {
220 fineCoordData[dim] = fineCoordinates->getData(dim);
221 coarseCoordData[dim] = coarseCoordinates->getDataNonConst(dim);
222 }
223
224 // Let us set some parameter that will be useful
225 // while constructing R
226
227 // Length of interpolation stencils based on geometry
228 const LO cornerStencilLength = 27;
229 const LO edgeStencilLength = 45;
230 const LO faceStencilLength = 75;
231 const LO interiorStencilLength = 125;
232
233 // Number of corner, edge, face and interior nodes
234 const LO numCorners = 8;
235 const LO numEdges = 4 * (lCoarseNodesPerDim[0] - 2) + 4 * (lCoarseNodesPerDim[1] - 2) + 4 * (lCoarseNodesPerDim[2] - 2);
236 const LO numFaces = 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) + 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[2] - 2) + 2 * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
237 const LO numInteriors = (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
238
239 const LO nnz = (numCorners * cornerStencilLength + numEdges * edgeStencilLength + numFaces * faceStencilLength + numInteriors * interiorStencilLength) * blkSize;
240
241 // Having the number of rows and columns we can genrate
242 // the appropriate maps for our operator.
243
244 *out << "R statistics:" << std::endl
245 << " -numRows= " << numRows << std::endl
246 << " -numCols= " << numCols << std::endl
247 << " -nnz= " << nnz << std::endl;
248
249 row_map_type row_map(Kokkos::ViewAllocateWithoutInitializing("row_map"), numRows + 1);
250 typename row_map_type::host_mirror_type row_map_h = Kokkos::create_mirror_view(row_map);
251
252 entries_type entries(Kokkos::ViewAllocateWithoutInitializing("entries"), nnz);
253 typename entries_type::host_mirror_type entries_h = Kokkos::create_mirror_view(entries);
254
255 values_type values(Kokkos::ViewAllocateWithoutInitializing("values"), nnz);
256 typename values_type::host_mirror_type values_h = Kokkos::create_mirror_view(values);
257
258 // Compute the basic interpolation
259 // coefficients for 1D rate of 3
260 // coarsening.
261 Array<SC> coeffs({1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0});
262 row_map_h(0) = 0;
263
264 // Define some offsets that
265 // will be needed often later on
266 const LO edgeLineOffset = 2 * cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength;
267 const LO faceLineOffset = 2 * edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength;
268 const LO interiorLineOffset = 2 * faceStencilLength + (lCoarseNodesPerDim[0] - 2) * interiorStencilLength;
269
270 const LO facePlaneOffset = 2 * edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset;
271 const LO interiorPlaneOffset = 2 * faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset;
272
273 // Let us take care of the corners
274 // first since we always have
275 // corners to deal with!
276 {
277 // Corner 1
278 LO coordRowIdx = 0, rowIdx = 0, coordColumnOffset = 0, columnOffset = 0, entryOffset = 0;
279 for (LO l = 0; l < blkSize; ++l) {
280 for (LO k = 0; k < 3; ++k) {
281 for (LO j = 0; j < 3; ++j) {
282 for (LO i = 0; i < 3; ++i) {
283 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
284 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i + 2];
285 }
286 }
287 }
288 }
289 for (LO l = 0; l < blkSize; ++l) {
290 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
291 }
292 for (int dim = 0; dim < numDimensions; ++dim) {
293 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
294 }
295
296 // Corner 5
297 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
298 rowIdx = coordRowIdx * blkSize;
299 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
300 columnOffset = coordColumnOffset * blkSize;
301 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
302 for (LO l = 0; l < blkSize; ++l) {
303 for (LO k = 0; k < 3; ++k) {
304 for (LO j = 0; j < 3; ++j) {
305 for (LO i = 0; i < 3; ++i) {
306 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
307 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
308 }
309 }
310 }
311 }
312 for (LO l = 0; l < blkSize; ++l) {
313 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
314 }
315 for (int dim = 0; dim < numDimensions; ++dim) {
316 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
317 }
318
319 // Corner 2
320 coordRowIdx = (lCoarseNodesPerDim[0] - 1);
321 rowIdx = coordRowIdx * blkSize;
322 coordColumnOffset = (lFineNodesPerDim[0] - 1);
323 columnOffset = coordColumnOffset * blkSize;
324 entryOffset = (cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) * blkSize;
325 for (LO l = 0; l < blkSize; ++l) {
326 for (LO k = 0; k < 3; ++k) {
327 for (LO j = 0; j < 3; ++j) {
328 for (LO i = 0; i < 3; ++i) {
329 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
330 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
331 }
332 }
333 }
334 }
335 for (LO l = 0; l < blkSize; ++l) {
336 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
337 }
338 for (int dim = 0; dim < numDimensions; ++dim) {
339 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
340 }
341
342 // Corner 6
343 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
344 rowIdx = coordRowIdx * blkSize;
345 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
346 columnOffset = coordColumnOffset * blkSize;
347 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
348 for (LO l = 0; l < blkSize; ++l) {
349 for (LO k = 0; k < 3; ++k) {
350 for (LO j = 0; j < 3; ++j) {
351 for (LO i = 0; i < 3; ++i) {
352 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
353 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
354 }
355 }
356 }
357 }
358 for (LO l = 0; l < blkSize; ++l) {
359 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
360 }
361 for (int dim = 0; dim < numDimensions; ++dim) {
362 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
363 }
364
365 // Corner 3
366 coordRowIdx = (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0];
367 rowIdx = coordRowIdx * blkSize;
368 coordColumnOffset = (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0];
369 columnOffset = coordColumnOffset * blkSize;
370 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset) * blkSize;
371 for (LO l = 0; l < blkSize; ++l) {
372 for (LO k = 0; k < 3; ++k) {
373 for (LO j = 0; j < 3; ++j) {
374 for (LO i = 0; i < 3; ++i) {
375 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
376 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
377 }
378 }
379 }
380 }
381 for (LO l = 0; l < blkSize; ++l) {
382 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
383 }
384 for (int dim = 0; dim < numDimensions; ++dim) {
385 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
386 }
387
388 // Corner 7
389 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
390 rowIdx = coordRowIdx * blkSize;
391 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
392 columnOffset = coordColumnOffset * blkSize;
393 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
394 for (LO l = 0; l < blkSize; ++l) {
395 for (LO k = 0; k < 3; ++k) {
396 for (LO j = 0; j < 3; ++j) {
397 for (LO i = 0; i < 3; ++i) {
398 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
399 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
400 }
401 }
402 }
403 }
404 for (LO l = 0; l < blkSize; ++l) {
405 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
406 }
407 for (int dim = 0; dim < numDimensions; ++dim) {
408 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
409 }
410
411 // Corner 4
412 coordRowIdx = (lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
413 rowIdx = coordRowIdx * blkSize;
414 coordColumnOffset = (lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
415 columnOffset = coordColumnOffset * blkSize;
416 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset +
417 cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) *
418 blkSize;
419 for (LO l = 0; l < blkSize; ++l) {
420 for (LO k = 0; k < 3; ++k) {
421 for (LO j = 0; j < 3; ++j) {
422 for (LO i = 0; i < 3; ++i) {
423 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
424 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
425 }
426 }
427 }
428 }
429 for (LO l = 0; l < blkSize; ++l) {
430 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
431 }
432 for (int dim = 0; dim < numDimensions; ++dim) {
433 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
434 }
435
436 // Corner 8
437 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
438 rowIdx = coordRowIdx * blkSize;
439 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
440 columnOffset = coordColumnOffset * blkSize;
441 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
442 for (LO l = 0; l < blkSize; ++l) {
443 for (LO k = 0; k < 3; ++k) {
444 for (LO j = 0; j < 3; ++j) {
445 for (LO i = 0; i < 3; ++i) {
446 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
447 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
448 }
449 }
450 }
451 }
452 for (LO l = 0; l < blkSize; ++l) {
453 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
454 }
455 for (int dim = 0; dim < numDimensions; ++dim) {
456 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
457 }
458 } // Corners are done!
459
460 // Edges along 0 direction
461 if (lCoarseNodesPerDim[0] - 2 > 0) {
462 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
463 for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[0] - 2; ++edgeIdx) {
464 // Edge 0
465 coordRowIdx = (edgeIdx + 1);
466 rowIdx = coordRowIdx * blkSize;
467 coordColumnOffset = (edgeIdx + 1) * 3;
468 columnOffset = coordColumnOffset * blkSize;
469 entryOffset = (cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
470 for (LO l = 0; l < blkSize; ++l) {
471 for (LO k = 0; k < 3; ++k) {
472 for (LO j = 0; j < 3; ++j) {
473 for (LO i = 0; i < 5; ++i) {
474 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
475 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
476 }
477 }
478 }
479 }
480 for (LO l = 0; l < blkSize; ++l) {
481 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
482 }
483 for (int dim = 0; dim < numDimensions; ++dim) {
484 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
485 }
486
487 // Edge 1
488 coordRowIdx = ((lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
489 rowIdx = coordRowIdx * blkSize;
490 coordColumnOffset = ((lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
491 columnOffset = coordColumnOffset * blkSize;
492 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
493 for (LO l = 0; l < blkSize; ++l) {
494 for (LO k = 0; k < 3; ++k) {
495 for (LO j = 0; j < 3; ++j) {
496 for (LO i = 0; i < 5; ++i) {
497 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
498 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
499 }
500 }
501 }
502 }
503 for (LO l = 0; l < blkSize; ++l) {
504 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
505 }
506 for (int dim = 0; dim < numDimensions; ++dim) {
507 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
508 }
509
510 // Edge 2
511 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + edgeIdx + 1);
512 rowIdx = coordRowIdx * blkSize;
513 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
514 columnOffset = coordColumnOffset * blkSize;
515 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
516 for (LO l = 0; l < blkSize; ++l) {
517 for (LO k = 0; k < 3; ++k) {
518 for (LO j = 0; j < 3; ++j) {
519 for (LO i = 0; i < 5; ++i) {
520 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
521 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
522 }
523 }
524 }
525 }
526 for (LO l = 0; l < blkSize; ++l) {
527 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
528 }
529 for (int dim = 0; dim < numDimensions; ++dim) {
530 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
531 }
532
533 // Edge 3
534 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
535 rowIdx = coordRowIdx * blkSize;
536 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
537 columnOffset = coordColumnOffset * blkSize;
538 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
539 for (LO l = 0; l < blkSize; ++l) {
540 for (LO k = 0; k < 3; ++k) {
541 for (LO j = 0; j < 3; ++j) {
542 for (LO i = 0; i < 5; ++i) {
543 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
544 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
545 }
546 }
547 }
548 }
549 for (LO l = 0; l < blkSize; ++l) {
550 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
551 }
552 for (int dim = 0; dim < numDimensions; ++dim) {
553 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
554 }
555 }
556 }
557
558 // Edges along 1 direction
559 if (lCoarseNodesPerDim[1] - 2 > 0) {
560 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
561 for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[1] - 2; ++edgeIdx) {
562 // Edge 0
563 coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[0];
564 rowIdx = coordRowIdx * blkSize;
565 coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[0];
566 columnOffset = coordColumnOffset * blkSize;
567 entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
568 for (LO l = 0; l < blkSize; ++l) {
569 for (LO k = 0; k < 3; ++k) {
570 for (LO j = 0; j < 5; ++j) {
571 for (LO i = 0; i < 3; ++i) {
572 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
573 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
574 }
575 }
576 }
577 }
578 for (LO l = 0; l < blkSize; ++l) {
579 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
580 }
581 for (int dim = 0; dim < numDimensions; ++dim) {
582 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
583 }
584
585 // Edge 1
586 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
587 rowIdx = coordRowIdx * blkSize;
588 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
589 columnOffset = coordColumnOffset * blkSize;
590 entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
591 for (LO l = 0; l < blkSize; ++l) {
592 for (LO k = 0; k < 3; ++k) {
593 for (LO j = 0; j < 5; ++j) {
594 for (LO i = 0; i < 3; ++i) {
595 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
596 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
597 }
598 }
599 }
600 }
601 for (LO l = 0; l < blkSize; ++l) {
602 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
603 }
604 for (int dim = 0; dim < numDimensions; ++dim) {
605 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
606 }
607
608 // Edge 2
609 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0]);
610 rowIdx = coordRowIdx * blkSize;
611 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0]);
612 columnOffset = coordColumnOffset * blkSize;
613 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
614 for (LO l = 0; l < blkSize; ++l) {
615 for (LO k = 0; k < 3; ++k) {
616 for (LO j = 0; j < 5; ++j) {
617 for (LO i = 0; i < 3; ++i) {
618 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
619 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
620 }
621 }
622 }
623 }
624 for (LO l = 0; l < blkSize; ++l) {
625 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
626 }
627 for (int dim = 0; dim < numDimensions; ++dim) {
628 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
629 }
630
631 // Edge 3
632 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
633 rowIdx = coordRowIdx * blkSize;
634 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
635 columnOffset = coordColumnOffset * blkSize;
636 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
637 for (LO l = 0; l < blkSize; ++l) {
638 for (LO k = 0; k < 3; ++k) {
639 for (LO j = 0; j < 5; ++j) {
640 for (LO i = 0; i < 3; ++i) {
641 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
642 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
643 }
644 }
645 }
646 }
647 for (LO l = 0; l < blkSize; ++l) {
648 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
649 }
650 for (int dim = 0; dim < numDimensions; ++dim) {
651 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
652 }
653 }
654 }
655
656 // Edges along 2 direction
657 if (lCoarseNodesPerDim[2] - 2 > 0) {
658 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
659 for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[2] - 2; ++edgeIdx) {
660 // Edge 0
661 coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
662 rowIdx = coordRowIdx * blkSize;
663 coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0];
664 columnOffset = coordColumnOffset * blkSize;
665 entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset) * blkSize;
666 for (LO l = 0; l < blkSize; ++l) {
667 for (LO k = 0; k < 5; ++k) {
668 for (LO j = 0; j < 3; ++j) {
669 for (LO i = 0; i < 3; ++i) {
670 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
671 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
672 }
673 }
674 }
675 }
676 for (LO l = 0; l < blkSize; ++l) {
677 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
678 }
679 for (int dim = 0; dim < numDimensions; ++dim) {
680 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
681 }
682
683 // Edge 1
684 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
685 rowIdx = coordRowIdx * blkSize;
686 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
687 columnOffset = coordColumnOffset * blkSize;
688 entryOffset = (facePlaneOffset + faceLineOffset - edgeStencilLength + edgeIdx * interiorPlaneOffset) * blkSize;
689 for (LO l = 0; l < blkSize; ++l) {
690 for (LO k = 0; k < 5; ++k) {
691 for (LO j = 0; j < 3; ++j) {
692 for (LO i = 0; i < 3; ++i) {
693 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
694 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
695 }
696 }
697 }
698 }
699 for (LO l = 0; l < blkSize; ++l) {
700 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
701 }
702 for (int dim = 0; dim < numDimensions; ++dim) {
703 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
704 }
705
706 // Edge 2
707 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0]);
708 rowIdx = coordRowIdx * blkSize;
709 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0]);
710 columnOffset = coordColumnOffset * blkSize;
711 entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset + faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset) * blkSize;
712 for (LO l = 0; l < blkSize; ++l) {
713 for (LO k = 0; k < 5; ++k) {
714 for (LO j = 0; j < 3; ++j) {
715 for (LO i = 0; i < 3; ++i) {
716 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
717 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
718 }
719 }
720 }
721 }
722 for (LO l = 0; l < blkSize; ++l) {
723 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
724 }
725 for (int dim = 0; dim < numDimensions; ++dim) {
726 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
727 }
728
729 // Edge 3
730 coordRowIdx = ((edgeIdx + 2) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
731 rowIdx = coordRowIdx * blkSize;
732 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
733 columnOffset = coordColumnOffset * blkSize;
734 entryOffset = (facePlaneOffset + (edgeIdx + 1) * interiorPlaneOffset - edgeStencilLength) * blkSize;
735 for (LO l = 0; l < blkSize; ++l) {
736 for (LO k = 0; k < 5; ++k) {
737 for (LO j = 0; j < 3; ++j) {
738 for (LO i = 0; i < 3; ++i) {
739 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
740 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
741 }
742 }
743 }
744 }
745 for (LO l = 0; l < blkSize; ++l) {
746 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
747 }
748 for (int dim = 0; dim < numDimensions; ++dim) {
749 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
750 }
751 }
752 }
753
754 // TODO: KOKKOS parallel_for used from here. Not sure if it should be used for edges.
755 Kokkos::deep_copy(row_map, row_map_h);
756 Kokkos::deep_copy(entries, entries_h);
757 Kokkos::deep_copy(values, values_h);
758
759 // Create views on device for nodes per dim
760 LOTupleView lFineNodesPerDim_d("lFineNodesPerDim");
761 LOTupleView lCoarseNodesPerDim_d("lCoarseNodesPerDim");
762
763 typename Kokkos::View<LO[3], device_type>::host_mirror_type lCoarseNodesPerDim_h = Kokkos::create_mirror_view(lCoarseNodesPerDim_d);
764 typename Kokkos::View<LO[3], device_type>::host_mirror_type lFineNodesPerDim_h = Kokkos::create_mirror_view(lFineNodesPerDim_d);
765
766 for (int dim = 0; dim < numDimensions; ++dim) {
767 lCoarseNodesPerDim_h(dim) = lCoarseNodesPerDim[dim];
768 lFineNodesPerDim_h(dim) = lFineNodesPerDim[dim];
769 }
770
771 Kokkos::deep_copy(lCoarseNodesPerDim_d, lCoarseNodesPerDim_h);
772 Kokkos::deep_copy(lFineNodesPerDim_d, lFineNodesPerDim_h);
773
774 // Faces in 0-1 plane
775 if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[1] - 2 > 0)) {
776 Kokkos::parallel_for(
777 "Faces in 0-1 plane region R",
778 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[0] - 2)),
779 KOKKOS_LAMBDA(const LO faceIdx) {
780 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
781 LO gridIdx[3] = {0, 0, 0};
782 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
783 // Last step in the loop
784 // update the grid indices
785 // for next grid point
786 for (LO i = 0; i < faceIdx; i++) {
787 ++gridIdx[0];
788 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
789 gridIdx[0] = 0;
790 ++gridIdx[1];
791 }
792 }
793
794 // Face 0
795 coordRowIdx = ((gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
796 rowIdx = coordRowIdx * blkSize;
797 coordColumnOffset = 3 * ((gridIdx[1] + 1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1);
798 columnOffset = coordColumnOffset * blkSize;
799 entryOffset = (edgeLineOffset + edgeStencilLength + gridIdx[1] * faceLineOffset + gridIdx[0] * faceStencilLength) * blkSize;
800 for (LO l = 0; l < blkSize; ++l) {
801 for (LO k = 0; k < 3; ++k) {
802 for (LO j = 0; j < 5; ++j) {
803 for (LO i = 0; i < 5; ++i) {
804 entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + (k * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
805 values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k + 2] * coeffs_d[j] * coeffs_d[i];
806 }
807 }
808 }
809 }
810 for (LO l = 0; l < blkSize; ++l) {
811 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
812 }
813 for (int dim = 0; dim < numDimensions; ++dim) {
814 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
815 }
816
817 // Face 1
818 coordRowIdx += (lCoarseNodesPerDim_d(2) - 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0);
819 rowIdx = coordRowIdx * blkSize;
820 coordColumnOffset += (lFineNodesPerDim_d(2) - 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0);
821 columnOffset = coordColumnOffset * blkSize;
822 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim_d(2) - 2) * interiorPlaneOffset) * blkSize;
823 for (LO l = 0; l < blkSize; ++l) {
824 for (LO k = 0; k < 3; ++k) {
825 for (LO j = 0; j < 5; ++j) {
826 for (LO i = 0; i < 5; ++i) {
827 entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
828 values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
829 }
830 }
831 }
832 }
833 for (LO l = 0; l < blkSize; ++l) {
834 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
835 }
836 for (int dim = 0; dim < numDimensions; ++dim) {
837 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
838 }
839 }); // parallel_for faces in 0-1 plane
840 }
841
842 // Faces in 0-2 plane
843 if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
844 Kokkos::parallel_for(
845 "Faces in 0-2 plane region R",
846 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[0] - 2)),
847 KOKKOS_LAMBDA(const LO faceIdx) {
848 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
849 LO gridIdx[3] = {0, 0, 0};
850 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
851 // Last step in the loop
852 // update the grid indices
853 // for next grid point
854 for (LO i = 0; i < faceIdx; i++) {
855 ++gridIdx[0];
856 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
857 gridIdx[0] = 0;
858 ++gridIdx[2];
859 }
860 }
861
862 // Face 0
863 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[0] + 1));
864 rowIdx = coordRowIdx * blkSize;
865 coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1) * 3;
866 columnOffset = coordColumnOffset * blkSize;
867 entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + edgeStencilLength + gridIdx[0] * faceStencilLength) * blkSize;
868 for (LO l = 0; l < blkSize; ++l) {
869 for (LO k = 0; k < 5; ++k) {
870 for (LO j = 0; j < 3; ++j) {
871 for (LO i = 0; i < 5; ++i) {
872 entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + j * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
873 values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j + 2] * coeffs_d[i];
874 }
875 }
876 }
877 }
878 for (LO l = 0; l < blkSize; ++l) {
879 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
880 }
881 for (int dim = 0; dim < numDimensions; ++dim) {
882 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
883 }
884
885 // Face 1
886 coordRowIdx += (lCoarseNodesPerDim_d(1) - 1) * lCoarseNodesPerDim_d(0);
887 rowIdx = coordRowIdx * blkSize;
888 coordColumnOffset += (lFineNodesPerDim_d(1) - 1) * lFineNodesPerDim_d(0);
889 columnOffset = coordColumnOffset * blkSize;
890 entryOffset += (faceLineOffset + (lCoarseNodesPerDim_d(1) - 2) * interiorLineOffset) * blkSize;
891 for (LO l = 0; l < blkSize; ++l) {
892 for (LO k = 0; k < 5; ++k) {
893 for (LO j = 0; j < 3; ++j) {
894 for (LO i = 0; i < 5; ++i) {
895 entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
896 values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
897 }
898 }
899 }
900 }
901 for (LO l = 0; l < blkSize; ++l) {
902 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
903 }
904 for (int dim = 0; dim < numDimensions; ++dim) {
905 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
906 }
907 }); // parallel_for faces in 0-2 plane
908 }
909
910 // Faces in 1-2 plane
911 if ((lCoarseNodesPerDim[1] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
912 Kokkos::parallel_for(
913 "Faces in 1-2 plane region R",
914 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[1] - 2)),
915 KOKKOS_LAMBDA(const LO faceIdx) {
916 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
917 LO gridIdx[3] = {0, 0, 0};
918 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
919 // Last step in the loop
920 // update the grid indices
921 // for next grid point
922 for (LO i = 0; i < faceIdx; i++) {
923 ++gridIdx[1];
924 if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
925 gridIdx[1] = 0;
926 ++gridIdx[2];
927 }
928 }
929
930 // Face 0
931 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0));
932 rowIdx = coordRowIdx * blkSize;
933 coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * lFineNodesPerDim_d(0)) * 3;
934 columnOffset = coordColumnOffset * blkSize;
935 entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + faceLineOffset + gridIdx[1] * interiorLineOffset) * blkSize;
936 for (LO l = 0; l < blkSize; ++l) {
937 for (LO k = 0; k < 5; ++k) {
938 for (LO j = 0; j < 5; ++j) {
939 for (LO i = 0; i < 3; ++i) {
940 entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i) * blkSize + l;
941 values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i + 2];
942 }
943 }
944 }
945 }
946 for (LO l = 0; l < blkSize; ++l) {
947 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
948 }
949 for (int dim = 0; dim < numDimensions; ++dim) {
950 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
951 }
952
953 // Face 1
954 coordRowIdx += (lCoarseNodesPerDim_d(0) - 1);
955 rowIdx = coordRowIdx * blkSize;
956 coordColumnOffset += (lFineNodesPerDim_d(0) - 1);
957 columnOffset = coordColumnOffset * blkSize;
958 entryOffset += (faceStencilLength + (lCoarseNodesPerDim_d(0) - 2) * interiorStencilLength) * blkSize;
959 for (LO l = 0; l < blkSize; ++l) {
960 for (LO k = 0; k < 5; ++k) {
961 for (LO j = 0; j < 5; ++j) {
962 for (LO i = 0; i < 3; ++i) {
963 entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
964 values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
965 }
966 }
967 }
968 }
969 for (LO l = 0; l < blkSize; ++l) {
970 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
971 }
972 for (int dim = 0; dim < numDimensions; ++dim) {
973 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
974 }
975 }); // parallel_for faces in 1-2 plane
976 }
977
978 if (numInteriors > 0) {
979 // Allocate and compute arrays
980 // containing column offsets
981 // and values associated with
982 // interior points
983 LO countRowEntries = 0;
984 Kokkos::View<LO[125]> coordColumnOffsets_d("coordColOffset");
985 auto coordColumnOffsets_h = Kokkos::create_mirror_view(coordColumnOffsets_d);
986
987 for (LO k = -2; k < 3; ++k) {
988 for (LO j = -2; j < 3; ++j) {
989 for (LO i = -2; i < 3; ++i) {
990 coordColumnOffsets_h(countRowEntries) = k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i;
991 ++countRowEntries;
992 }
993 }
994 }
995 Kokkos::deep_copy(coordColumnOffsets_d, coordColumnOffsets_h);
996
997 LO countValues = 0;
998 Kokkos::View<impl_scalar_type*> interiorValues_d("interiorValues", 125);
999 auto interiorValues_h = Kokkos::create_mirror_view(interiorValues_d);
1000 for (LO k = 0; k < 5; ++k) {
1001 for (LO j = 0; j < 5; ++j) {
1002 for (LO i = 0; i < 5; ++i) {
1003 interiorValues_h(countValues) = coeffs[k] * coeffs[j] * coeffs[i];
1004 ++countValues;
1005 }
1006 }
1007 }
1008 Kokkos::deep_copy(interiorValues_d, interiorValues_h);
1009
1010 Kokkos::parallel_for(
1011 "interior idx region R", Kokkos::RangePolicy<typename device_type::execution_space>(0, numInteriors),
1012 KOKKOS_LAMBDA(const LO interiorIdx) {
1013 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
1014 LO gridIdx[3];
1015 gridIdx[0] = 0;
1016 gridIdx[1] = 0;
1017 gridIdx[2] = 0;
1018 // First step in the loop
1019 // update the grid indices
1020 // for the grid point
1021 for (LO i = 0; i < interiorIdx; i++) {
1022 ++gridIdx[0];
1023 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
1024 gridIdx[0] = 0;
1025 ++gridIdx[1];
1026 if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
1027 gridIdx[1] = 0;
1028 ++gridIdx[2];
1029 }
1030 }
1031 }
1032
1033 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(0) * lCoarseNodesPerDim_d(1) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
1034 rowIdx = coordRowIdx * blkSize;
1035 coordColumnOffset = ((gridIdx[2] + 1) * 3 * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * 3 * lFineNodesPerDim_d(0) + (gridIdx[0] + 1) * 3);
1036 columnOffset = coordColumnOffset * blkSize;
1037 entryOffset = (facePlaneOffset + faceLineOffset + faceStencilLength + gridIdx[2] * interiorPlaneOffset + gridIdx[1] * interiorLineOffset + gridIdx[0] * interiorStencilLength) * blkSize;
1038 for (LO l = 0; l < blkSize; ++l) {
1039 row_map(rowIdx + 1 + l) = entryOffset + interiorStencilLength * (l + 1);
1040 }
1041 // Fill the column indices
1042 // and values in the approproate
1043 // views.
1044 for (LO l = 0; l < blkSize; ++l) {
1045 for (LO entryIdx = 0; entryIdx < interiorStencilLength; ++entryIdx) {
1046 entries(entryOffset + entryIdx + interiorStencilLength * l) = columnOffset + coordColumnOffsets_d(entryIdx) * blkSize + l;
1047 values(entryOffset + entryIdx + interiorStencilLength * l) = interiorValues_d(entryIdx);
1048 }
1049 }
1050 for (int dim = 0; dim < numDimensions; ++dim) {
1051 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
1052 }
1053 }); // Kokkos::parallel_for interior idx
1054 //
1055 }
1056
1057 local_graph_type localGraph(entries, row_map);
1058 local_matrix_type localR("R", numCols, values, localGraph);
1059
1060 R = MatrixFactory::Build(localR, // the local data
1061 rowMap, // rowMap
1062 A->getRowMap(), // colMap
1063 A->getRowMap(), // domainMap == colMap
1064 rowMap, // rangeMap == rowMap
1065 Teuchos::null); // params for optimized construction
1066
1067} // Build3D()
1068
1069} // namespace MueLu
1070
1071#define MUELU_REGIONRFACTORY_KOKKOS_SHORT
1072#endif // MUELU_REGIONRFACTORY_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultGlobalOrdinal GlobalOrdinal
MueLu::DefaultNode Node
Class that holds all level-specific information.
int GetLevelID() const
Return level number.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static const NoFactory * get()
typename Kokkos::View< LO[3], device_type > LOTupleView
void Build3D(const int numDimensions, Array< LO > &lFineNodesPerDim, const RCP< Matrix > &A, const RCP< realvaluedmultivector_type > &fineCoordinates, RCP< Matrix > &R, RCP< realvaluedmultivector_type > &coarseCoordinates, Array< LO > &lCoarseNodesPerDim) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
RCP< const ParameterList > GetValidParameterList() const
Input.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Namespace for MueLu classes and methods.