157 Build3D(
const int numDimensions,
158 Teuchos::Array<LocalOrdinal>& lFineNodesPerDim,
159 const RCP<Matrix>& A,
163 Teuchos::Array<LocalOrdinal>& lCoarseNodesPerDim)
const {
164 using local_matrix_type =
typename CrsMatrix::local_matrix_device_type;
165 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
166 using row_map_type =
typename local_matrix_type::row_map_type::non_const_type;
167 using entries_type =
typename local_matrix_type::index_type::non_const_type;
168 using values_type =
typename local_matrix_type::values_type::non_const_type;
169 using impl_scalar_type =
typename KokkosKernels::ArithTraits<Scalar>::val_type;
172 RCP<Teuchos::FancyOStream> out;
173 if (
const char* dbg = std::getenv(
"MUELU_REGIONRFACTORY_DEBUG")) {
174 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
175 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
177 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
181 for (
int dim = 0; dim < numDimensions; ++dim) {
182 lCoarseNodesPerDim[dim] = lFineNodesPerDim[dim] / 3 + 1;
184 *out <<
"lCoarseNodesPerDim " << lCoarseNodesPerDim << std::endl;
187 const LO blkSize = A->GetFixedBlockSize();
188 *out <<
"blkSize " << blkSize << std::endl;
192 const LO numRows = blkSize * lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2];
193 const LO numCols = blkSize * lFineNodesPerDim[0] * lFineNodesPerDim[1] * lFineNodesPerDim[2];
198 RCP<Map> rowMap = MapFactory::Build(A->getRowMap()->lib(),
199 Teuchos::OrdinalTraits<GO>::invalid(),
201 A->getRowMap()->getIndexBase(),
202 A->getRowMap()->getComm());
204 RCP<Map> coordRowMap = MapFactory::Build(A->getRowMap()->lib(),
205 Teuchos::OrdinalTraits<GO>::invalid(),
206 lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2],
207 A->getRowMap()->getIndexBase(),
208 A->getRowMap()->getComm());
210 coarseCoordinates = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(coordRowMap,
214 auto fineCoordsView = fineCoordinates->getLocalViewDevice(Tpetra::Access::ReadOnly);
215 auto coarseCoordsView = coarseCoordinates->getLocalViewDevice(Tpetra::Access::OverwriteAll);
217 Array<ArrayRCP<const real_type> > fineCoordData(numDimensions);
218 Array<ArrayRCP<real_type> > coarseCoordData(numDimensions);
219 for (
int dim = 0; dim < numDimensions; ++dim) {
220 fineCoordData[dim] = fineCoordinates->getData(dim);
221 coarseCoordData[dim] = coarseCoordinates->getDataNonConst(dim);
228 const LO cornerStencilLength = 27;
229 const LO edgeStencilLength = 45;
230 const LO faceStencilLength = 75;
231 const LO interiorStencilLength = 125;
234 const LO numCorners = 8;
235 const LO numEdges = 4 * (lCoarseNodesPerDim[0] - 2) + 4 * (lCoarseNodesPerDim[1] - 2) + 4 * (lCoarseNodesPerDim[2] - 2);
236 const LO numFaces = 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) + 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[2] - 2) + 2 * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
237 const LO numInteriors = (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
239 const LO nnz = (numCorners * cornerStencilLength + numEdges * edgeStencilLength + numFaces * faceStencilLength + numInteriors * interiorStencilLength) * blkSize;
244 *out <<
"R statistics:" << std::endl
245 <<
" -numRows= " << numRows << std::endl
246 <<
" -numCols= " << numCols << std::endl
247 <<
" -nnz= " << nnz << std::endl;
249 row_map_type row_map(Kokkos::ViewAllocateWithoutInitializing(
"row_map"), numRows + 1);
250 typename row_map_type::host_mirror_type row_map_h = Kokkos::create_mirror_view(row_map);
252 entries_type entries(Kokkos::ViewAllocateWithoutInitializing(
"entries"), nnz);
253 typename entries_type::host_mirror_type entries_h = Kokkos::create_mirror_view(entries);
255 values_type values(Kokkos::ViewAllocateWithoutInitializing(
"values"), nnz);
256 typename values_type::host_mirror_type values_h = Kokkos::create_mirror_view(values);
261 Array<SC> coeffs({1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0});
266 const LO edgeLineOffset = 2 * cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength;
267 const LO faceLineOffset = 2 * edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength;
268 const LO interiorLineOffset = 2 * faceStencilLength + (lCoarseNodesPerDim[0] - 2) * interiorStencilLength;
270 const LO facePlaneOffset = 2 * edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset;
271 const LO interiorPlaneOffset = 2 * faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset;
278 LO coordRowIdx = 0, rowIdx = 0, coordColumnOffset = 0, columnOffset = 0, entryOffset = 0;
279 for (LO l = 0; l < blkSize; ++l) {
280 for (LO k = 0; k < 3; ++k) {
281 for (LO j = 0; j < 3; ++j) {
282 for (LO i = 0; i < 3; ++i) {
283 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
284 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i + 2];
289 for (LO l = 0; l < blkSize; ++l) {
290 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
292 for (
int dim = 0; dim < numDimensions; ++dim) {
293 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
297 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
298 rowIdx = coordRowIdx * blkSize;
299 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
300 columnOffset = coordColumnOffset * blkSize;
301 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
302 for (LO l = 0; l < blkSize; ++l) {
303 for (LO k = 0; k < 3; ++k) {
304 for (LO j = 0; j < 3; ++j) {
305 for (LO i = 0; i < 3; ++i) {
306 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
307 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
312 for (LO l = 0; l < blkSize; ++l) {
313 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
315 for (
int dim = 0; dim < numDimensions; ++dim) {
316 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
320 coordRowIdx = (lCoarseNodesPerDim[0] - 1);
321 rowIdx = coordRowIdx * blkSize;
322 coordColumnOffset = (lFineNodesPerDim[0] - 1);
323 columnOffset = coordColumnOffset * blkSize;
324 entryOffset = (cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) * blkSize;
325 for (LO l = 0; l < blkSize; ++l) {
326 for (LO k = 0; k < 3; ++k) {
327 for (LO j = 0; j < 3; ++j) {
328 for (LO i = 0; i < 3; ++i) {
329 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
330 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
335 for (LO l = 0; l < blkSize; ++l) {
336 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
338 for (
int dim = 0; dim < numDimensions; ++dim) {
339 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
343 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
344 rowIdx = coordRowIdx * blkSize;
345 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
346 columnOffset = coordColumnOffset * blkSize;
347 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
348 for (LO l = 0; l < blkSize; ++l) {
349 for (LO k = 0; k < 3; ++k) {
350 for (LO j = 0; j < 3; ++j) {
351 for (LO i = 0; i < 3; ++i) {
352 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
353 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
358 for (LO l = 0; l < blkSize; ++l) {
359 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
361 for (
int dim = 0; dim < numDimensions; ++dim) {
362 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
366 coordRowIdx = (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0];
367 rowIdx = coordRowIdx * blkSize;
368 coordColumnOffset = (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0];
369 columnOffset = coordColumnOffset * blkSize;
370 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset) * blkSize;
371 for (LO l = 0; l < blkSize; ++l) {
372 for (LO k = 0; k < 3; ++k) {
373 for (LO j = 0; j < 3; ++j) {
374 for (LO i = 0; i < 3; ++i) {
375 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
376 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
381 for (LO l = 0; l < blkSize; ++l) {
382 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
384 for (
int dim = 0; dim < numDimensions; ++dim) {
385 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
389 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
390 rowIdx = coordRowIdx * blkSize;
391 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
392 columnOffset = coordColumnOffset * blkSize;
393 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
394 for (LO l = 0; l < blkSize; ++l) {
395 for (LO k = 0; k < 3; ++k) {
396 for (LO j = 0; j < 3; ++j) {
397 for (LO i = 0; i < 3; ++i) {
398 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
399 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
404 for (LO l = 0; l < blkSize; ++l) {
405 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
407 for (
int dim = 0; dim < numDimensions; ++dim) {
408 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
412 coordRowIdx = (lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
413 rowIdx = coordRowIdx * blkSize;
414 coordColumnOffset = (lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
415 columnOffset = coordColumnOffset * blkSize;
416 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset +
417 cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) *
419 for (LO l = 0; l < blkSize; ++l) {
420 for (LO k = 0; k < 3; ++k) {
421 for (LO j = 0; j < 3; ++j) {
422 for (LO i = 0; i < 3; ++i) {
423 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
424 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
429 for (LO l = 0; l < blkSize; ++l) {
430 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
432 for (
int dim = 0; dim < numDimensions; ++dim) {
433 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
437 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
438 rowIdx = coordRowIdx * blkSize;
439 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
440 columnOffset = coordColumnOffset * blkSize;
441 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
442 for (LO l = 0; l < blkSize; ++l) {
443 for (LO k = 0; k < 3; ++k) {
444 for (LO j = 0; j < 3; ++j) {
445 for (LO i = 0; i < 3; ++i) {
446 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
447 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
452 for (LO l = 0; l < blkSize; ++l) {
453 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
455 for (
int dim = 0; dim < numDimensions; ++dim) {
456 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
461 if (lCoarseNodesPerDim[0] - 2 > 0) {
462 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
463 for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[0] - 2; ++edgeIdx) {
465 coordRowIdx = (edgeIdx + 1);
466 rowIdx = coordRowIdx * blkSize;
467 coordColumnOffset = (edgeIdx + 1) * 3;
468 columnOffset = coordColumnOffset * blkSize;
469 entryOffset = (cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
470 for (LO l = 0; l < blkSize; ++l) {
471 for (LO k = 0; k < 3; ++k) {
472 for (LO j = 0; j < 3; ++j) {
473 for (LO i = 0; i < 5; ++i) {
474 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
475 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
480 for (LO l = 0; l < blkSize; ++l) {
481 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
483 for (
int dim = 0; dim < numDimensions; ++dim) {
484 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
488 coordRowIdx = ((lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
489 rowIdx = coordRowIdx * blkSize;
490 coordColumnOffset = ((lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
491 columnOffset = coordColumnOffset * blkSize;
492 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
493 for (LO l = 0; l < blkSize; ++l) {
494 for (LO k = 0; k < 3; ++k) {
495 for (LO j = 0; j < 3; ++j) {
496 for (LO i = 0; i < 5; ++i) {
497 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
498 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
503 for (LO l = 0; l < blkSize; ++l) {
504 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
506 for (
int dim = 0; dim < numDimensions; ++dim) {
507 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
511 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + edgeIdx + 1);
512 rowIdx = coordRowIdx * blkSize;
513 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
514 columnOffset = coordColumnOffset * blkSize;
515 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
516 for (LO l = 0; l < blkSize; ++l) {
517 for (LO k = 0; k < 3; ++k) {
518 for (LO j = 0; j < 3; ++j) {
519 for (LO i = 0; i < 5; ++i) {
520 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
521 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
526 for (LO l = 0; l < blkSize; ++l) {
527 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
529 for (
int dim = 0; dim < numDimensions; ++dim) {
530 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
534 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
535 rowIdx = coordRowIdx * blkSize;
536 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
537 columnOffset = coordColumnOffset * blkSize;
538 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
539 for (LO l = 0; l < blkSize; ++l) {
540 for (LO k = 0; k < 3; ++k) {
541 for (LO j = 0; j < 3; ++j) {
542 for (LO i = 0; i < 5; ++i) {
543 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
544 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
549 for (LO l = 0; l < blkSize; ++l) {
550 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
552 for (
int dim = 0; dim < numDimensions; ++dim) {
553 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
559 if (lCoarseNodesPerDim[1] - 2 > 0) {
560 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
561 for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[1] - 2; ++edgeIdx) {
563 coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[0];
564 rowIdx = coordRowIdx * blkSize;
565 coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[0];
566 columnOffset = coordColumnOffset * blkSize;
567 entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
568 for (LO l = 0; l < blkSize; ++l) {
569 for (LO k = 0; k < 3; ++k) {
570 for (LO j = 0; j < 5; ++j) {
571 for (LO i = 0; i < 3; ++i) {
572 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
573 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
578 for (LO l = 0; l < blkSize; ++l) {
579 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
581 for (
int dim = 0; dim < numDimensions; ++dim) {
582 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
586 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
587 rowIdx = coordRowIdx * blkSize;
588 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
589 columnOffset = coordColumnOffset * blkSize;
590 entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
591 for (LO l = 0; l < blkSize; ++l) {
592 for (LO k = 0; k < 3; ++k) {
593 for (LO j = 0; j < 5; ++j) {
594 for (LO i = 0; i < 3; ++i) {
595 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
596 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
601 for (LO l = 0; l < blkSize; ++l) {
602 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
604 for (
int dim = 0; dim < numDimensions; ++dim) {
605 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
609 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0]);
610 rowIdx = coordRowIdx * blkSize;
611 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0]);
612 columnOffset = coordColumnOffset * blkSize;
613 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
614 for (LO l = 0; l < blkSize; ++l) {
615 for (LO k = 0; k < 3; ++k) {
616 for (LO j = 0; j < 5; ++j) {
617 for (LO i = 0; i < 3; ++i) {
618 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
619 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
624 for (LO l = 0; l < blkSize; ++l) {
625 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
627 for (
int dim = 0; dim < numDimensions; ++dim) {
628 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
632 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
633 rowIdx = coordRowIdx * blkSize;
634 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
635 columnOffset = coordColumnOffset * blkSize;
636 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
637 for (LO l = 0; l < blkSize; ++l) {
638 for (LO k = 0; k < 3; ++k) {
639 for (LO j = 0; j < 5; ++j) {
640 for (LO i = 0; i < 3; ++i) {
641 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
642 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
647 for (LO l = 0; l < blkSize; ++l) {
648 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
650 for (
int dim = 0; dim < numDimensions; ++dim) {
651 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
657 if (lCoarseNodesPerDim[2] - 2 > 0) {
658 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
659 for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[2] - 2; ++edgeIdx) {
661 coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
662 rowIdx = coordRowIdx * blkSize;
663 coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0];
664 columnOffset = coordColumnOffset * blkSize;
665 entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset) * blkSize;
666 for (LO l = 0; l < blkSize; ++l) {
667 for (LO k = 0; k < 5; ++k) {
668 for (LO j = 0; j < 3; ++j) {
669 for (LO i = 0; i < 3; ++i) {
670 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
671 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
676 for (LO l = 0; l < blkSize; ++l) {
677 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
679 for (
int dim = 0; dim < numDimensions; ++dim) {
680 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
684 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
685 rowIdx = coordRowIdx * blkSize;
686 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
687 columnOffset = coordColumnOffset * blkSize;
688 entryOffset = (facePlaneOffset + faceLineOffset - edgeStencilLength + edgeIdx * interiorPlaneOffset) * blkSize;
689 for (LO l = 0; l < blkSize; ++l) {
690 for (LO k = 0; k < 5; ++k) {
691 for (LO j = 0; j < 3; ++j) {
692 for (LO i = 0; i < 3; ++i) {
693 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
694 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
699 for (LO l = 0; l < blkSize; ++l) {
700 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
702 for (
int dim = 0; dim < numDimensions; ++dim) {
703 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
707 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0]);
708 rowIdx = coordRowIdx * blkSize;
709 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0]);
710 columnOffset = coordColumnOffset * blkSize;
711 entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset + faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset) * blkSize;
712 for (LO l = 0; l < blkSize; ++l) {
713 for (LO k = 0; k < 5; ++k) {
714 for (LO j = 0; j < 3; ++j) {
715 for (LO i = 0; i < 3; ++i) {
716 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
717 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
722 for (LO l = 0; l < blkSize; ++l) {
723 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
725 for (
int dim = 0; dim < numDimensions; ++dim) {
726 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
730 coordRowIdx = ((edgeIdx + 2) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
731 rowIdx = coordRowIdx * blkSize;
732 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
733 columnOffset = coordColumnOffset * blkSize;
734 entryOffset = (facePlaneOffset + (edgeIdx + 1) * interiorPlaneOffset - edgeStencilLength) * blkSize;
735 for (LO l = 0; l < blkSize; ++l) {
736 for (LO k = 0; k < 5; ++k) {
737 for (LO j = 0; j < 3; ++j) {
738 for (LO i = 0; i < 3; ++i) {
739 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
740 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
745 for (LO l = 0; l < blkSize; ++l) {
746 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
748 for (
int dim = 0; dim < numDimensions; ++dim) {
749 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
755 Kokkos::deep_copy(row_map, row_map_h);
756 Kokkos::deep_copy(entries, entries_h);
757 Kokkos::deep_copy(values, values_h);
760 LOTupleView lFineNodesPerDim_d(
"lFineNodesPerDim");
761 LOTupleView lCoarseNodesPerDim_d(
"lCoarseNodesPerDim");
763 typename Kokkos::View<LO[3], device_type>::host_mirror_type lCoarseNodesPerDim_h = Kokkos::create_mirror_view(lCoarseNodesPerDim_d);
764 typename Kokkos::View<LO[3], device_type>::host_mirror_type lFineNodesPerDim_h = Kokkos::create_mirror_view(lFineNodesPerDim_d);
766 for (
int dim = 0; dim < numDimensions; ++dim) {
767 lCoarseNodesPerDim_h(dim) = lCoarseNodesPerDim[dim];
768 lFineNodesPerDim_h(dim) = lFineNodesPerDim[dim];
771 Kokkos::deep_copy(lCoarseNodesPerDim_d, lCoarseNodesPerDim_h);
772 Kokkos::deep_copy(lFineNodesPerDim_d, lFineNodesPerDim_h);
775 if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[1] - 2 > 0)) {
776 Kokkos::parallel_for(
777 "Faces in 0-1 plane region R",
778 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[0] - 2)),
779 KOKKOS_LAMBDA(
const LO faceIdx) {
780 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
781 LO gridIdx[3] = {0, 0, 0};
782 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
786 for (LO i = 0; i < faceIdx; i++) {
788 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
795 coordRowIdx = ((gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
796 rowIdx = coordRowIdx * blkSize;
797 coordColumnOffset = 3 * ((gridIdx[1] + 1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1);
798 columnOffset = coordColumnOffset * blkSize;
799 entryOffset = (edgeLineOffset + edgeStencilLength + gridIdx[1] * faceLineOffset + gridIdx[0] * faceStencilLength) * blkSize;
800 for (LO l = 0; l < blkSize; ++l) {
801 for (LO k = 0; k < 3; ++k) {
802 for (LO j = 0; j < 5; ++j) {
803 for (LO i = 0; i < 5; ++i) {
804 entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + (k * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
805 values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k + 2] * coeffs_d[j] * coeffs_d[i];
810 for (LO l = 0; l < blkSize; ++l) {
811 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
813 for (
int dim = 0; dim < numDimensions; ++dim) {
814 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
818 coordRowIdx += (lCoarseNodesPerDim_d(2) - 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0);
819 rowIdx = coordRowIdx * blkSize;
820 coordColumnOffset += (lFineNodesPerDim_d(2) - 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0);
821 columnOffset = coordColumnOffset * blkSize;
822 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim_d(2) - 2) * interiorPlaneOffset) * blkSize;
823 for (LO l = 0; l < blkSize; ++l) {
824 for (LO k = 0; k < 3; ++k) {
825 for (LO j = 0; j < 5; ++j) {
826 for (LO i = 0; i < 5; ++i) {
827 entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
828 values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
833 for (LO l = 0; l < blkSize; ++l) {
834 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
836 for (
int dim = 0; dim < numDimensions; ++dim) {
837 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
843 if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
844 Kokkos::parallel_for(
845 "Faces in 0-2 plane region R",
846 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[0] - 2)),
847 KOKKOS_LAMBDA(
const LO faceIdx) {
848 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
849 LO gridIdx[3] = {0, 0, 0};
850 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
854 for (LO i = 0; i < faceIdx; i++) {
856 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
863 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[0] + 1));
864 rowIdx = coordRowIdx * blkSize;
865 coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1) * 3;
866 columnOffset = coordColumnOffset * blkSize;
867 entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + edgeStencilLength + gridIdx[0] * faceStencilLength) * blkSize;
868 for (LO l = 0; l < blkSize; ++l) {
869 for (LO k = 0; k < 5; ++k) {
870 for (LO j = 0; j < 3; ++j) {
871 for (LO i = 0; i < 5; ++i) {
872 entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + j * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
873 values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j + 2] * coeffs_d[i];
878 for (LO l = 0; l < blkSize; ++l) {
879 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
881 for (
int dim = 0; dim < numDimensions; ++dim) {
882 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
886 coordRowIdx += (lCoarseNodesPerDim_d(1) - 1) * lCoarseNodesPerDim_d(0);
887 rowIdx = coordRowIdx * blkSize;
888 coordColumnOffset += (lFineNodesPerDim_d(1) - 1) * lFineNodesPerDim_d(0);
889 columnOffset = coordColumnOffset * blkSize;
890 entryOffset += (faceLineOffset + (lCoarseNodesPerDim_d(1) - 2) * interiorLineOffset) * blkSize;
891 for (LO l = 0; l < blkSize; ++l) {
892 for (LO k = 0; k < 5; ++k) {
893 for (LO j = 0; j < 3; ++j) {
894 for (LO i = 0; i < 5; ++i) {
895 entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
896 values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
901 for (LO l = 0; l < blkSize; ++l) {
902 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
904 for (
int dim = 0; dim < numDimensions; ++dim) {
905 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
911 if ((lCoarseNodesPerDim[1] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
912 Kokkos::parallel_for(
913 "Faces in 1-2 plane region R",
914 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[1] - 2)),
915 KOKKOS_LAMBDA(
const LO faceIdx) {
916 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
917 LO gridIdx[3] = {0, 0, 0};
918 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
922 for (LO i = 0; i < faceIdx; i++) {
924 if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
931 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0));
932 rowIdx = coordRowIdx * blkSize;
933 coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * lFineNodesPerDim_d(0)) * 3;
934 columnOffset = coordColumnOffset * blkSize;
935 entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + faceLineOffset + gridIdx[1] * interiorLineOffset) * blkSize;
936 for (LO l = 0; l < blkSize; ++l) {
937 for (LO k = 0; k < 5; ++k) {
938 for (LO j = 0; j < 5; ++j) {
939 for (LO i = 0; i < 3; ++i) {
940 entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i) * blkSize + l;
941 values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i + 2];
946 for (LO l = 0; l < blkSize; ++l) {
947 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
949 for (
int dim = 0; dim < numDimensions; ++dim) {
950 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
954 coordRowIdx += (lCoarseNodesPerDim_d(0) - 1);
955 rowIdx = coordRowIdx * blkSize;
956 coordColumnOffset += (lFineNodesPerDim_d(0) - 1);
957 columnOffset = coordColumnOffset * blkSize;
958 entryOffset += (faceStencilLength + (lCoarseNodesPerDim_d(0) - 2) * interiorStencilLength) * blkSize;
959 for (LO l = 0; l < blkSize; ++l) {
960 for (LO k = 0; k < 5; ++k) {
961 for (LO j = 0; j < 5; ++j) {
962 for (LO i = 0; i < 3; ++i) {
963 entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
964 values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
969 for (LO l = 0; l < blkSize; ++l) {
970 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
972 for (
int dim = 0; dim < numDimensions; ++dim) {
973 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
978 if (numInteriors > 0) {
983 LO countRowEntries = 0;
984 Kokkos::View<LO[125]> coordColumnOffsets_d(
"coordColOffset");
985 auto coordColumnOffsets_h = Kokkos::create_mirror_view(coordColumnOffsets_d);
987 for (LO k = -2; k < 3; ++k) {
988 for (LO j = -2; j < 3; ++j) {
989 for (LO i = -2; i < 3; ++i) {
990 coordColumnOffsets_h(countRowEntries) = k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i;
995 Kokkos::deep_copy(coordColumnOffsets_d, coordColumnOffsets_h);
998 Kokkos::View<impl_scalar_type*> interiorValues_d(
"interiorValues", 125);
999 auto interiorValues_h = Kokkos::create_mirror_view(interiorValues_d);
1000 for (LO k = 0; k < 5; ++k) {
1001 for (LO j = 0; j < 5; ++j) {
1002 for (LO i = 0; i < 5; ++i) {
1003 interiorValues_h(countValues) = coeffs[k] * coeffs[j] * coeffs[i];
1008 Kokkos::deep_copy(interiorValues_d, interiorValues_h);
1010 Kokkos::parallel_for(
1011 "interior idx region R", Kokkos::RangePolicy<typename device_type::execution_space>(0, numInteriors),
1012 KOKKOS_LAMBDA(
const LO interiorIdx) {
1013 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
1021 for (LO i = 0; i < interiorIdx; i++) {
1023 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
1026 if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
1033 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(0) * lCoarseNodesPerDim_d(1) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
1034 rowIdx = coordRowIdx * blkSize;
1035 coordColumnOffset = ((gridIdx[2] + 1) * 3 * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * 3 * lFineNodesPerDim_d(0) + (gridIdx[0] + 1) * 3);
1036 columnOffset = coordColumnOffset * blkSize;
1037 entryOffset = (facePlaneOffset + faceLineOffset + faceStencilLength + gridIdx[2] * interiorPlaneOffset + gridIdx[1] * interiorLineOffset + gridIdx[0] * interiorStencilLength) * blkSize;
1038 for (LO l = 0; l < blkSize; ++l) {
1039 row_map(rowIdx + 1 + l) = entryOffset + interiorStencilLength * (l + 1);
1044 for (LO l = 0; l < blkSize; ++l) {
1045 for (LO entryIdx = 0; entryIdx < interiorStencilLength; ++entryIdx) {
1046 entries(entryOffset + entryIdx + interiorStencilLength * l) = columnOffset + coordColumnOffsets_d(entryIdx) * blkSize + l;
1047 values(entryOffset + entryIdx + interiorStencilLength * l) = interiorValues_d(entryIdx);
1050 for (
int dim = 0; dim < numDimensions; ++dim) {
1051 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
1057 local_graph_type localGraph(entries, row_map);
1058 local_matrix_type localR(
"R", numCols, values, localGraph);
1060 R = MatrixFactory::Build(localR,