MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_BlackBoxPFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_BLACKBOXPFACTORY_DEF_HPP
11#define MUELU_BLACKBOXPFACTORY_DEF_HPP
12
13#include <stdlib.h>
14#include <iomanip>
15
16// #include <Teuchos_LAPACK.hpp>
17#include <Teuchos_SerialDenseMatrix.hpp>
18#include <Teuchos_SerialDenseVector.hpp>
19#include <Teuchos_SerialDenseSolver.hpp>
20
22#include <Xpetra_CrsMatrixWrap.hpp>
23#include <Xpetra_ImportFactory.hpp>
24#include <Xpetra_Matrix.hpp>
25#include <Xpetra_MapFactory.hpp>
26#include <Xpetra_MultiVectorFactory.hpp>
27#include <Xpetra_VectorFactory.hpp>
28
29#include <Xpetra_IO.hpp>
30
32
33#include "MueLu_Monitor.hpp"
34
35namespace MueLu {
36
37template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
39 RCP<ParameterList> validParamList = rcp(new ParameterList());
40
41 // Coarsen can come in two forms, either a single char that will be interpreted as an integer
42 // which is used as the coarsening rate in every spatial dimentions,
43 // or it can be a longer string that will then be interpreted as an array of integers.
44 // By default coarsen is set as "{2}", hence a coarsening rate of 2 in every spatial dimension
45 // is the default setting!
46 validParamList->set<std::string>("Coarsen", "{3}", "Coarsening rate in all spatial dimensions");
47 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
48 validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
49 validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coorindates");
50 validParamList->set<RCP<const FactoryBase> >("gNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
51 validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
52 validParamList->set<std::string>("stencil type", "full", "You can use two type of stencils: full and reduced, that correspond to 27 and 7 points stencils respectively in 3D.");
53 validParamList->set<std::string>("block strategy", "coupled", "The strategy used to handle systems of PDEs can be: coupled or uncoupled.");
54
55 return validParamList;
56}
57
58template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
60 Level& /* coarseLevel */)
61 const {
62 Input(fineLevel, "A");
63 Input(fineLevel, "Nullspace");
64 Input(fineLevel, "Coordinates");
65 // Request the global number of nodes per dimensions
66 if (fineLevel.GetLevelID() == 0) {
67 if (fineLevel.IsAvailable("gNodesPerDim", NoFactory::get())) {
68 fineLevel.DeclareInput("gNodesPerDim", NoFactory::get(), this);
69 } else {
70 TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("gNodesPerDim", NoFactory::get()),
72 "gNodesPerDim was not provided by the user on level0!");
73 }
74 } else {
75 Input(fineLevel, "gNodesPerDim");
76 }
77
78 // Request the local number of nodes per dimensions
79 if (fineLevel.GetLevelID() == 0) {
80 if (fineLevel.IsAvailable("lNodesPerDim", NoFactory::get())) {
81 fineLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this);
82 } else {
83 TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("lNodesPerDim", NoFactory::get()),
85 "lNodesPerDim was not provided by the user on level0!");
86 }
87 } else {
88 Input(fineLevel, "lNodesPerDim");
89 }
90}
91
92template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
94 Level& coarseLevel) const {
95 return BuildP(fineLevel, coarseLevel);
96}
97
98template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
100 Level& coarseLevel) const {
101 FactoryMonitor m(*this, "Build", coarseLevel);
102
103 // Get parameter list
104 const ParameterList& pL = GetParameterList();
105
106 // obtain general variables
107 RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
108 RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, "Nullspace");
109 RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LO, GO, NO> > coordinates =
110 Get<RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LO, GO, NO> > >(fineLevel, "Coordinates");
111 LO numDimensions = coordinates->getNumVectors();
112 LO BlkSize = A->GetFixedBlockSize();
113
114 // Get fine level geometric data: g(l)FineNodesPerDir and g(l)NumFineNodes
115 Array<GO> gFineNodesPerDir(3);
116 Array<LO> lFineNodesPerDir(3);
117 // Get the number of points in each direction
118 if (fineLevel.GetLevelID() == 0) {
119 gFineNodesPerDir = fineLevel.Get<Array<GO> >("gNodesPerDim", NoFactory::get());
120 lFineNodesPerDir = fineLevel.Get<Array<LO> >("lNodesPerDim", NoFactory::get());
121 } else {
122 // Loading global number of nodes per diretions
123 gFineNodesPerDir = Get<Array<GO> >(fineLevel, "gNodesPerDim");
124
125 // Loading local number of nodes per diretions
126 lFineNodesPerDir = Get<Array<LO> >(fineLevel, "lNodesPerDim");
127 }
128 for (LO i = 0; i < 3; ++i) {
129 if (gFineNodesPerDir[i] == 0) {
130 GetOStream(Runtime0) << "gNodesPerDim in direction " << i << " is set to 1 from 0"
131 << std::endl;
132 gFineNodesPerDir[i] = 1;
133 }
134 if (lFineNodesPerDir[i] == 0) {
135 GetOStream(Runtime0) << "lNodesPerDim in direction " << i << " is set to 1 from 0"
136 << std::endl;
137 lFineNodesPerDir[i] = 1;
138 }
139 }
140 GO gNumFineNodes = gFineNodesPerDir[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
141 LO lNumFineNodes = lFineNodesPerDir[2] * lFineNodesPerDir[1] * lFineNodesPerDir[0];
142
143 // Get the coarsening rate
144 std::string coarsenRate = pL.get<std::string>("Coarsen");
145 Array<LO> coarseRate(3);
146 {
147 Teuchos::Array<LO> crates;
148 try {
149 crates = Teuchos::fromStringToArray<LO>(coarsenRate);
150 } catch (const Teuchos::InvalidArrayStringRepresentation& e) {
151 GetOStream(Errors, -1) << " *** Coarsen must be a string convertible into an array! *** "
152 << std::endl;
153 throw e;
154 }
155 TEUCHOS_TEST_FOR_EXCEPTION((crates.size() > 1) && (crates.size() < numDimensions),
157 "Coarsen must have at least as many components as the number of"
158 " spatial dimensions in the problem.");
159 for (LO i = 0; i < 3; ++i) {
160 if (i < numDimensions) {
161 if (crates.size() == 1) {
162 coarseRate[i] = crates[0];
163 } else if (i < crates.size()) {
164 coarseRate[i] = crates[i];
165 } else {
166 GetOStream(Errors, -1) << " *** Coarsen must be at least as long as the number of"
167 " spatial dimensions! *** "
168 << std::endl;
170 " *** Coarsen must be at least as long as the number of"
171 " spatial dimensions! *** \n");
172 }
173 } else {
174 coarseRate[i] = 1;
175 }
176 }
177 } // End of scope for crates
178
179 // Get the stencil type used for discretization
180 const std::string stencilType = pL.get<std::string>("stencil type");
181 if (stencilType != "full" && stencilType != "reduced") {
182 GetOStream(Errors, -1) << " *** stencil type must be set to: full or reduced, any other value "
183 "is trated as an error! *** "
184 << std::endl;
185 throw Exceptions::RuntimeError(" *** stencil type is neither full, nor reduced! *** \n");
186 }
187
188 // Get the strategy for PDE systems
189 const std::string blockStrategy = pL.get<std::string>("block strategy");
190 if (blockStrategy != "coupled" && blockStrategy != "uncoupled") {
191 GetOStream(Errors, -1) << " *** block strategy must be set to: coupled or uncoupled, any other "
192 "value is trated as an error! *** "
193 << std::endl;
194 throw Exceptions::RuntimeError(" *** block strategy is neither coupled, nor uncoupled! *** \n");
195 }
196
197 GO gNumCoarseNodes = 0;
198 LO lNumCoarseNodes = 0;
199 Array<GO> gIndices(3), gCoarseNodesPerDir(3), ghostGIDs, coarseNodesGIDs, colGIDs;
200 Array<LO> myOffset(3), lCoarseNodesPerDir(3), glCoarseNodesPerDir(3), endRate(3);
201 Array<bool> ghostInterface(6);
202 Array<int> boundaryFlags(3);
203 ArrayRCP<Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseNodes(numDimensions);
204 Array<ArrayView<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > fineNodes(numDimensions);
205 for (LO dim = 0; dim < numDimensions; ++dim) {
206 fineNodes[dim] = coordinates->getData(dim)();
207 }
208
209 // This struct stores PIDs, LIDs and GIDs on the fine mesh and GIDs on the coarse mesh.
210 RCP<NodesIDs> ghostedCoarseNodes = rcp(new NodesIDs{});
211
212 GetGeometricData(coordinates, coarseRate, gFineNodesPerDir, lFineNodesPerDir, BlkSize, // inputs
213 gIndices, myOffset, ghostInterface, endRate, gCoarseNodesPerDir, // outputs
214 lCoarseNodesPerDir, glCoarseNodesPerDir, ghostGIDs, coarseNodesGIDs, colGIDs,
215 gNumCoarseNodes, lNumCoarseNodes, coarseNodes, boundaryFlags,
216 ghostedCoarseNodes);
217
218 // Create the MultiVector of coarse coordinates
219 Xpetra::UnderlyingLib lib = coordinates->getMap()->lib();
220 RCP<const Map> coarseCoordsMap = MapFactory::Build(lib,
221 gNumCoarseNodes,
222 coarseNodesGIDs.view(0, lNumCoarseNodes),
223 coordinates->getMap()->getIndexBase(),
224 coordinates->getMap()->getComm());
225 Array<ArrayView<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseCoords(numDimensions);
226 for (LO dim = 0; dim < numDimensions; ++dim) {
227 coarseCoords[dim] = coarseNodes[dim]();
228 }
229 RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LO, GO, NO> > coarseCoordinates =
230 Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LO, GO, NO>::Build(coarseCoordsMap, coarseCoords(),
231 numDimensions);
232
233 // Now create a new matrix: Aghost that contains all the data
234 // locally needed to compute the local part of the prolongator.
235 // Here assuming that all the coarse nodes o and fine nodes +
236 // are local then all the data associated with the coarse
237 // nodes O and the fine nodes * needs to be imported.
238 //
239 // *--*--*--*--*--*--*--*
240 // | | | | | | | |
241 // o--+--+--o--+--+--O--*
242 // | | | | | | | |
243 // +--+--+--+--+--+--*--*
244 // | | | | | | | |
245 // +--+--+--+--+--+--*--*
246 // | | | | | | | |
247 // o--+--+--o--+--+--O--*
248 // | | | | | | | |
249 // +--+--+--+--+--+--*--*
250 // | | | | | | | |
251 // *--*--*--*--*--*--*--*
252 // | | | | | | | |
253 // O--*--*--O--*--*--O--*
254 //
255 // Creating that local matrix is easy enough using proper range
256 // and domain maps to import data from A. Note that with this
257 // approach we reorder the local entries using the domain map and
258 // can subsequently compute the prolongator without reordering.
259 // As usual we need to be careful about any coarsening rate
260 // change at the boundary!
261
262 // The ingredients needed are an importer, a range map and a domain map
263 Array<GO> ghostRowGIDs, ghostColGIDs, nodeSteps(3);
264 nodeSteps[0] = 1;
265 nodeSteps[1] = gFineNodesPerDir[0];
266 nodeSteps[2] = gFineNodesPerDir[0] * gFineNodesPerDir[1];
267 Array<LO> glFineNodesPerDir(3);
268 GO startingGID = A->getRowMap()->getMinGlobalIndex();
269 for (LO dim = 0; dim < 3; ++dim) {
270 LO numCoarseNodes = 0;
271 if (dim < numDimensions) {
272 startingGID -= myOffset[dim] * nodeSteps[dim];
273 numCoarseNodes = lCoarseNodesPerDir[dim];
274 if (ghostInterface[2 * dim]) {
275 ++numCoarseNodes;
276 }
277 if (ghostInterface[2 * dim + 1]) {
278 ++numCoarseNodes;
279 }
280 if (gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
281 glFineNodesPerDir[dim] = (numCoarseNodes - 2) * coarseRate[dim] + endRate[dim] + 1;
282 } else {
283 glFineNodesPerDir[dim] = (numCoarseNodes - 1) * coarseRate[dim] + 1;
284 }
285 } else {
286 glFineNodesPerDir[dim] = 1;
287 }
288 }
289 ghostRowGIDs.resize(glFineNodesPerDir[0] * glFineNodesPerDir[1] * glFineNodesPerDir[2] * BlkSize);
290 for (LO k = 0; k < glFineNodesPerDir[2]; ++k) {
291 for (LO j = 0; j < glFineNodesPerDir[1]; ++j) {
292 for (LO i = 0; i < glFineNodesPerDir[0]; ++i) {
293 for (LO l = 0; l < BlkSize; ++l) {
294 ghostRowGIDs[(k * glFineNodesPerDir[1] * glFineNodesPerDir[0] + j * glFineNodesPerDir[0] + i) * BlkSize + l] = startingGID + (k * gFineNodesPerDir[1] * gFineNodesPerDir[0] + j * gFineNodesPerDir[0] + i) * BlkSize + l;
295 }
296 }
297 }
298 }
299
300 // Looking at the above loops it is easy to find startingGID for the ghostColGIDs
301 Array<GO> startingGlobalIndices(numDimensions), dimStride(numDimensions);
302 Array<GO> startingColIndices(numDimensions), finishingColIndices(numDimensions);
303 GO colMinGID = 0;
304 Array<LO> colRange(numDimensions);
305 dimStride[0] = 1;
306 for (int dim = 1; dim < numDimensions; ++dim) {
307 dimStride[dim] = dimStride[dim - 1] * gFineNodesPerDir[dim - 1];
308 }
309 {
310 GO tmp = startingGID;
311 for (int dim = numDimensions; dim > 0; --dim) {
312 startingGlobalIndices[dim - 1] = tmp / dimStride[dim - 1];
313 tmp = tmp % dimStride[dim - 1];
314
315 if (startingGlobalIndices[dim - 1] > 0) {
316 startingColIndices[dim - 1] = startingGlobalIndices[dim - 1] - 1;
317 }
318 if (startingGlobalIndices[dim - 1] + glFineNodesPerDir[dim - 1] < gFineNodesPerDir[dim - 1]) {
319 finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1] + glFineNodesPerDir[dim - 1];
320 } else {
321 finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1] + glFineNodesPerDir[dim - 1] - 1;
322 }
323 colRange[dim - 1] = finishingColIndices[dim - 1] - startingColIndices[dim - 1] + 1;
324 colMinGID += startingColIndices[dim - 1] * dimStride[dim - 1];
325 }
326 }
327 ghostColGIDs.resize(colRange[0] * colRange[1] * colRange[2] * BlkSize);
328 for (LO k = 0; k < colRange[2]; ++k) {
329 for (LO j = 0; j < colRange[1]; ++j) {
330 for (LO i = 0; i < colRange[0]; ++i) {
331 for (LO l = 0; l < BlkSize; ++l) {
332 ghostColGIDs[(k * colRange[1] * colRange[0] + j * colRange[0] + i) * BlkSize + l] = colMinGID + (k * gFineNodesPerDir[1] * gFineNodesPerDir[0] + j * gFineNodesPerDir[0] + i) * BlkSize + l;
333 }
334 }
335 }
336 }
337
338 RCP<const Map> ghostedRowMap = Xpetra::MapFactory<LO, GO, NO>::Build(A->getRowMap()->lib(),
339 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
340 ghostRowGIDs(),
341 A->getRowMap()->getIndexBase(),
342 A->getRowMap()->getComm());
343 RCP<const Map> ghostedColMap = Xpetra::MapFactory<LO, GO, NO>::Build(A->getRowMap()->lib(),
344 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
345 ghostColGIDs(),
346 A->getRowMap()->getIndexBase(),
347 A->getRowMap()->getComm());
348 RCP<const Import> ghostImporter = Xpetra::ImportFactory<LO, GO, NO>::Build(A->getRowMap(),
349 ghostedRowMap);
350 RCP<const Matrix> Aghost = Xpetra::MatrixFactory<SC, LO, GO, NO>::Build(A, *ghostImporter,
351 ghostedRowMap,
352 ghostedRowMap);
353
354 // Create the maps and data structures for the projection matrix
355 RCP<const Map> rowMapP = A->getDomainMap();
356
357 RCP<const Map> domainMapP;
358
359 RCP<const Map> colMapP;
360
361 // At this point we need to create the column map which is a delicate operation.
362 // The entries in that map need to be ordered as follows:
363 // 1) first owned entries ordered by LID
364 // 2) second order the remaining entries by PID
365 // 3) entries with the same remote PID are ordered by GID.
366 // One piece of good news: lNumCoarseNodes is the number of ownedNodes and lNumGhostNodes
367 // is the number of remote nodes. The sorting can be limited to remote nodes
368 // as the owned ones are alreaded ordered by LID!
369
370 LO lNumGhostedNodes = ghostedCoarseNodes->GIDs.size();
371 {
372 std::vector<NodeID> colMapOrdering(lNumGhostedNodes);
373 for (LO ind = 0; ind < lNumGhostedNodes; ++ind) {
374 colMapOrdering[ind].GID = ghostedCoarseNodes->GIDs[ind];
375 if (ghostedCoarseNodes->PIDs[ind] == rowMapP->getComm()->getRank()) {
376 colMapOrdering[ind].PID = -1;
377 } else {
378 colMapOrdering[ind].PID = ghostedCoarseNodes->PIDs[ind];
379 }
380 colMapOrdering[ind].LID = ghostedCoarseNodes->LIDs[ind];
381 colMapOrdering[ind].lexiInd = ind;
382 }
383 std::sort(colMapOrdering.begin(), colMapOrdering.end(),
384 [](NodeID a, NodeID b) -> bool {
385 return ((a.PID < b.PID) || ((a.PID == b.PID) && (a.GID < b.GID)));
386 });
387
388 colGIDs.resize(BlkSize * lNumGhostedNodes);
389 for (LO ind = 0; ind < lNumGhostedNodes; ++ind) {
390 // Save the permutation calculated to go from Lexicographic indexing to column map indexing
391 ghostedCoarseNodes->colInds[colMapOrdering[ind].lexiInd] = ind;
392 for (LO dof = 0; dof < BlkSize; ++dof) {
393 colGIDs[BlkSize * ind + dof] = BlkSize * colMapOrdering[ind].GID + dof;
394 }
395 }
396 domainMapP = Xpetra::MapFactory<LO, GO, NO>::Build(rowMapP->lib(),
397 BlkSize * gNumCoarseNodes,
398 colGIDs.view(0, BlkSize * lNumCoarseNodes),
399 rowMapP->getIndexBase(),
400 rowMapP->getComm());
401 colMapP = Xpetra::MapFactory<LO, GO, NO>::Build(rowMapP->lib(),
402 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
403 colGIDs.view(0, colGIDs.size()),
404 rowMapP->getIndexBase(),
405 rowMapP->getComm());
406 } // End of scope for colMapOrdering and colGIDs
407
408 std::vector<size_t> strideInfo(1);
409 strideInfo[0] = BlkSize;
410 RCP<const Map> stridedDomainMapP = Xpetra::StridedMapFactory<LO, GO, NO>::Build(domainMapP,
411 strideInfo);
412
413 GO gnnzP = 0;
414 LO lnnzP = 0;
415 // coarse points have one nnz per row
416 gnnzP += gNumCoarseNodes;
417 lnnzP += lNumCoarseNodes;
418 // add all other points multiplying by 2^numDimensions
419 gnnzP += (gNumFineNodes - gNumCoarseNodes) * std::pow(2, numDimensions);
420 lnnzP += (lNumFineNodes - lNumCoarseNodes) * std::pow(2, numDimensions);
421 // finally multiply by the number of dofs per node
422 gnnzP = gnnzP * BlkSize;
423 lnnzP = lnnzP * BlkSize;
424
425 // Create the matrix itself using the above maps
426 RCP<Matrix> P;
427 P = rcp(new CrsMatrixWrap(rowMapP, colMapP, 0));
428 RCP<CrsMatrix> PCrs = toCrsMatrix(P);
429
430 ArrayRCP<size_t> iaP;
431 ArrayRCP<LO> jaP;
432 ArrayRCP<SC> valP;
433
434 PCrs->allocateAllValues(lnnzP, iaP, jaP, valP);
435
436 ArrayView<size_t> ia = iaP();
437 ArrayView<LO> ja = jaP();
438 ArrayView<SC> val = valP();
439 ia[0] = 0;
440
441 LO numCoarseElements = 1;
442 Array<LO> lCoarseElementsPerDir(3);
443 for (LO dim = 0; dim < numDimensions; ++dim) {
444 lCoarseElementsPerDir[dim] = lCoarseNodesPerDir[dim];
445 if (ghostInterface[2 * dim]) {
446 ++lCoarseElementsPerDir[dim];
447 }
448 if (!ghostInterface[2 * dim + 1]) {
449 --lCoarseElementsPerDir[dim];
450 }
451 numCoarseElements = numCoarseElements * lCoarseElementsPerDir[dim];
452 }
453
454 for (LO dim = numDimensions; dim < 3; ++dim) {
455 lCoarseElementsPerDir[dim] = 1;
456 }
457
458 // Loop over the coarse elements
459 Array<int> elementFlags(3);
460 Array<LO> elemInds(3), elementNodesPerDir(3), glElementRefTuple(3);
461 Array<LO> glElementRefTupleCG(3), glElementCoarseNodeCG(8);
462 const int numCoarseNodesInElement = std::pow(2, numDimensions);
463 const int nnzPerCoarseNode = (blockStrategy == "coupled") ? BlkSize : 1;
464 const int numRowsPerPoint = BlkSize;
465 for (elemInds[2] = 0; elemInds[2] < lCoarseElementsPerDir[2]; ++elemInds[2]) {
466 for (elemInds[1] = 0; elemInds[1] < lCoarseElementsPerDir[1]; ++elemInds[1]) {
467 for (elemInds[0] = 0; elemInds[0] < lCoarseElementsPerDir[0]; ++elemInds[0]) {
468 elementFlags[0] = 0;
469 elementFlags[1] = 0;
470 elementFlags[2] = 0;
471 for (int dim = 0; dim < 3; ++dim) {
472 // Detect boundary conditions on the element and set corresponding flags.
473 if (elemInds[dim] == 0 && elemInds[dim] == lCoarseElementsPerDir[dim] - 1) {
474 elementFlags[dim] = boundaryFlags[dim];
475 } else if (elemInds[dim] == 0 && (boundaryFlags[dim] == 1 || boundaryFlags[dim] == 3)) {
476 elementFlags[dim] += 1;
477 } else if ((elemInds[dim] == lCoarseElementsPerDir[dim] - 1) && (boundaryFlags[dim] == 2 || boundaryFlags[dim] == 3)) {
478 elementFlags[dim] += 2;
479 } else {
480 elementFlags[dim] = 0;
481 }
482
483 // Compute the number of nodes in the current element.
484 if (dim < numDimensions) {
485 if ((elemInds[dim] == lCoarseElementsPerDir[dim]) && (gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim])) {
486 elementNodesPerDir[dim] = endRate[dim] + 1;
487 } else {
488 elementNodesPerDir[dim] = coarseRate[dim] + 1;
489 }
490 } else {
491 elementNodesPerDir[dim] = 1;
492 }
493
494 // Get the lowest tuple of the element using the ghosted local coordinate system
495 glElementRefTuple[dim] = elemInds[dim] * coarseRate[dim];
496 glElementRefTupleCG[dim] = elemInds[dim];
497 }
498
499 // Now get the column map indices corresponding to the dofs associated with the current
500 // element's coarse nodes.
501 for (typename Array<LO>::size_type elem = 0; elem < glElementCoarseNodeCG.size(); ++elem) {
502 glElementCoarseNodeCG[elem] = glElementRefTupleCG[2] * glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0] + glElementRefTupleCG[1] * glCoarseNodesPerDir[0] + glElementRefTupleCG[0];
503 }
504 glElementCoarseNodeCG[4] += glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0];
505 glElementCoarseNodeCG[5] += glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0];
506 glElementCoarseNodeCG[6] += glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0];
507 glElementCoarseNodeCG[7] += glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0];
508
509 glElementCoarseNodeCG[2] += glCoarseNodesPerDir[0];
510 glElementCoarseNodeCG[3] += glCoarseNodesPerDir[0];
511 glElementCoarseNodeCG[6] += glCoarseNodesPerDir[0];
512 glElementCoarseNodeCG[7] += glCoarseNodesPerDir[0];
513
514 glElementCoarseNodeCG[1] += 1;
515 glElementCoarseNodeCG[3] += 1;
516 glElementCoarseNodeCG[5] += 1;
517 glElementCoarseNodeCG[7] += 1;
518
519 LO numNodesInElement = elementNodesPerDir[0] * elementNodesPerDir[1] * elementNodesPerDir[2];
520 // LO elementOffset = elemInds[2]*coarseRate[2]*glFineNodesPerDir[1]*glFineNodesPerDir[0]
521 // + elemInds[1]*coarseRate[1]*glFineNodesPerDir[0] + elemInds[0]*coarseRate[0];
522
523 // Compute the element prolongator
524 Teuchos::SerialDenseMatrix<LO, SC> Pi, Pf, Pe;
525 Array<LO> dofType(numNodesInElement * BlkSize), lDofInd(numNodesInElement * BlkSize);
526 ComputeLocalEntries(Aghost, coarseRate, endRate, BlkSize, elemInds, lCoarseElementsPerDir,
527 numDimensions, glFineNodesPerDir, gFineNodesPerDir, gIndices,
528 lCoarseNodesPerDir, ghostInterface, elementFlags, stencilType,
529 blockStrategy, elementNodesPerDir, numNodesInElement, colGIDs,
530 Pi, Pf, Pe, dofType, lDofInd);
531
532 // Find ghosted LID associated with nodes in the element and eventually which of these
533 // nodes are ghosts, this information is used to fill the local prolongator.
534 Array<LO> lNodeLIDs(numNodesInElement);
535 {
536 Array<LO> lNodeTuple(3), nodeInd(3);
537 for (nodeInd[2] = 0; nodeInd[2] < elementNodesPerDir[2]; ++nodeInd[2]) {
538 for (nodeInd[1] = 0; nodeInd[1] < elementNodesPerDir[1]; ++nodeInd[1]) {
539 for (nodeInd[0] = 0; nodeInd[0] < elementNodesPerDir[0]; ++nodeInd[0]) {
540 int stencilLength = 0;
541 if ((nodeInd[0] == 0 || nodeInd[0] == elementNodesPerDir[0] - 1) &&
542 (nodeInd[1] == 0 || nodeInd[1] == elementNodesPerDir[1] - 1) &&
543 (nodeInd[2] == 0 || nodeInd[2] == elementNodesPerDir[2] - 1)) {
544 stencilLength = 1;
545 } else {
546 stencilLength = std::pow(2, numDimensions);
547 }
548 LO nodeElementInd = nodeInd[2] * elementNodesPerDir[1] * elementNodesPerDir[1] + nodeInd[1] * elementNodesPerDir[0] + nodeInd[0];
549 for (int dim = 0; dim < 3; ++dim) {
550 lNodeTuple[dim] = glElementRefTuple[dim] - myOffset[dim] + nodeInd[dim];
551 }
552 if (lNodeTuple[0] < 0 || lNodeTuple[1] < 0 || lNodeTuple[2] < 0 ||
553 lNodeTuple[0] > lFineNodesPerDir[0] - 1 ||
554 lNodeTuple[1] > lFineNodesPerDir[1] - 1 ||
555 lNodeTuple[2] > lFineNodesPerDir[2] - 1) {
556 // This flags the ghosts nodes used for prolongator calculation but for which
557 // we do not own the row, hence we won't fill these values on this rank.
558 lNodeLIDs[nodeElementInd] = -1;
559 } else if ((nodeInd[0] == 0 && elemInds[0] != 0) ||
560 (nodeInd[1] == 0 && elemInds[1] != 0) ||
561 (nodeInd[2] == 0 && elemInds[2] != 0)) {
562 // This flags nodes that are owned but common to two coarse elements and that
563 // were already filled by another element, we don't want to fill them twice so
564 // we skip them
565 lNodeLIDs[nodeElementInd] = -2;
566 } else {
567 // The remaining nodes are locally owned and we need to fill the coresponding
568 // rows of the prolongator
569
570 // First we need to find which row needs to be filled
571 lNodeLIDs[nodeElementInd] = lNodeTuple[2] * lFineNodesPerDir[1] * lFineNodesPerDir[0] + lNodeTuple[1] * lFineNodesPerDir[0] + lNodeTuple[0];
572
573 // We also compute the row offset using arithmetic to ensure that we can loop
574 // easily over the nodes in a macro-element as well as facilitate on-node
575 // parallelization. The node serial code could be rewritten with two loops over
576 // the local part of the mesh to avoid the costly integer divisions...
577 Array<LO> refCoarsePointTuple(3);
578 for (int dim = 2; dim > -1; --dim) {
579 if (dim == 0) {
580 refCoarsePointTuple[dim] = (lNodeTuple[dim] + myOffset[dim]) / coarseRate[dim];
581 if (myOffset[dim] == 0) {
582 ++refCoarsePointTuple[dim];
583 }
584 } else {
585 // Note: no need for magnitudeType here, just use double because these things are LO's
586 refCoarsePointTuple[dim] =
587 std::ceil(static_cast<double>(lNodeTuple[dim] + myOffset[dim]) / coarseRate[dim]);
588 }
589 if ((lNodeTuple[dim] + myOffset[dim]) % coarseRate[dim] > 0) {
590 break;
591 }
592 }
593 const LO numCoarsePoints = refCoarsePointTuple[0] + refCoarsePointTuple[1] * lCoarseNodesPerDir[0] + refCoarsePointTuple[2] * lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0];
594 const LO numFinePoints = lNodeLIDs[nodeElementInd] + 1;
595
596 // The below formula computes the rowPtr for row 'n+BlkSize' and we are about to
597 // fill row 'n' to 'n+BlkSize'.
598 size_t rowPtr = (numFinePoints - numCoarsePoints) * numRowsPerPoint * numCoarseNodesInElement * nnzPerCoarseNode + numCoarsePoints * numRowsPerPoint;
599 if (dofType[nodeElementInd * BlkSize] == 0) {
600 // Check if current node is a coarse point
601 rowPtr = rowPtr - numRowsPerPoint;
602 } else {
603 rowPtr = rowPtr - numRowsPerPoint * numCoarseNodesInElement * nnzPerCoarseNode;
604 }
605
606 for (int dof = 0; dof < BlkSize; ++dof) {
607 // Now we loop over the stencil and fill the column indices and row values
608 int cornerInd = 0;
609 switch (dofType[nodeElementInd * BlkSize + dof]) {
610 case 0: // Corner node
611 if (nodeInd[2] == elementNodesPerDir[2] - 1) {
612 cornerInd += 4;
613 }
614 if (nodeInd[1] == elementNodesPerDir[1] - 1) {
615 cornerInd += 2;
616 }
617 if (nodeInd[0] == elementNodesPerDir[0] - 1) {
618 cornerInd += 1;
619 }
620 ia[lNodeLIDs[nodeElementInd] * BlkSize + dof + 1] = rowPtr + dof + 1;
621 ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[cornerInd]] * BlkSize + dof;
622 val[rowPtr + dof] = 1.0;
623 break;
624
625 case 1: // Edge node
626 ia[lNodeLIDs[nodeElementInd] * BlkSize + dof + 1] = rowPtr + (dof + 1) * numCoarseNodesInElement * nnzPerCoarseNode;
627 for (int ind1 = 0; ind1 < stencilLength; ++ind1) {
628 if (blockStrategy == "coupled") {
629 for (int ind2 = 0; ind2 < BlkSize; ++ind2) {
630 size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1 * BlkSize + ind2;
631 ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + ind2;
632 val[lRowPtr] = Pe(lDofInd[nodeElementInd * BlkSize + dof],
633 ind1 * BlkSize + ind2);
634 }
635 } else if (blockStrategy == "uncoupled") {
636 size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1;
637 ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + dof;
638 val[lRowPtr] = Pe(lDofInd[nodeElementInd * BlkSize + dof],
639 ind1 * BlkSize + dof);
640 }
641 }
642 break;
643
644 case 2: // Face node
645 ia[lNodeLIDs[nodeElementInd] * BlkSize + dof + 1] = rowPtr + (dof + 1) * numCoarseNodesInElement * nnzPerCoarseNode;
646 for (int ind1 = 0; ind1 < stencilLength; ++ind1) {
647 if (blockStrategy == "coupled") {
648 for (int ind2 = 0; ind2 < BlkSize; ++ind2) {
649 size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1 * BlkSize + ind2;
650 ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + ind2;
651 val[lRowPtr] = Pf(lDofInd[nodeElementInd * BlkSize + dof],
652 ind1 * BlkSize + ind2);
653 }
654 } else if (blockStrategy == "uncoupled") {
655 size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1;
656 // ja [lRowPtr] = colGIDs[glElementCoarseNodeCG[ind1]*BlkSize + dof];
657 ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + dof;
658 val[lRowPtr] = Pf(lDofInd[nodeElementInd * BlkSize + dof],
659 ind1 * BlkSize + dof);
660 }
661 }
662 break;
663
664 case 3: // Interior node
665 ia[lNodeLIDs[nodeElementInd] * BlkSize + dof + 1] = rowPtr + (dof + 1) * numCoarseNodesInElement * nnzPerCoarseNode;
666 for (int ind1 = 0; ind1 < stencilLength; ++ind1) {
667 if (blockStrategy == "coupled") {
668 for (int ind2 = 0; ind2 < BlkSize; ++ind2) {
669 size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1 * BlkSize + ind2;
670 ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + ind2;
671 val[lRowPtr] = Pi(lDofInd[nodeElementInd * BlkSize + dof],
672 ind1 * BlkSize + ind2);
673 }
674 } else if (blockStrategy == "uncoupled") {
675 size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1;
676 ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + dof;
677 val[lRowPtr] = Pi(lDofInd[nodeElementInd * BlkSize + dof],
678 ind1 * BlkSize + dof);
679 }
680 }
681 break;
682 }
683 }
684 }
685 }
686 }
687 }
688 } // End of scopt for lNodeTuple and nodeInd
689 }
690 }
691 }
692
693 // Sort all row's column indicies and entries by LID
695
696 // Set the values of the prolongation operators into the CrsMatrix P and call FillComplete
697 PCrs->setAllValues(iaP, jaP, valP);
698 PCrs->expertStaticFillComplete(domainMapP, rowMapP);
699
700 // set StridingInformation of P
701 if (A->IsView("stridedMaps") == true) {
702 P->CreateView("stridedMaps", A->getRowMap("stridedMaps"), stridedDomainMapP);
703 } else {
704 P->CreateView("stridedMaps", P->getRangeMap(), stridedDomainMapP);
705 }
706
707 // store the transfer operator and node coordinates on coarse level
708 Set(coarseLevel, "P", P);
709 Set(coarseLevel, "coarseCoordinates", coarseCoordinates);
710 Set<Array<GO> >(coarseLevel, "gCoarseNodesPerDim", gCoarseNodesPerDir);
711 Set<Array<LO> >(coarseLevel, "lCoarseNodesPerDim", lCoarseNodesPerDir);
712}
713
714template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
716 GetGeometricData(RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LO, GO, NO> >& coordinates,
717 const Array<LO> coarseRate, const Array<GO> gFineNodesPerDir,
718 const Array<LO> lFineNodesPerDir, const LO BlkSize, Array<GO>& gIndices,
719 Array<LO>& myOffset, Array<bool>& ghostInterface, Array<LO>& endRate,
720 Array<GO>& gCoarseNodesPerDir, Array<LO>& lCoarseNodesPerDir,
721 Array<LO>& glCoarseNodesPerDir, Array<GO>& ghostGIDs, Array<GO>& coarseNodesGIDs,
722 Array<GO>& colGIDs, GO& gNumCoarseNodes, LO& lNumCoarseNodes,
723 ArrayRCP<Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseNodes, Array<int>& boundaryFlags,
724 RCP<NodesIDs> ghostedCoarseNodes) const {
725 // This function is extracting the geometric information from the coordinates
726 // and creates the necessary data/formatting to perform locally the calculation
727 // of the pronlongator.
728 //
729 // inputs:
730
731 RCP<const Map> coordinatesMap = coordinates->getMap();
732 LO numDimensions = coordinates->getNumVectors();
733
734 // Using the coarsening rate and the fine level data,
735 // compute coarse level data
736
737 // Phase 1 //
738 // ------------------------------------------------------------------ //
739 // We first start by finding small informations on the mesh such as //
740 // the number of coarse nodes (local and global) and the number of //
741 // ghost nodes / the end rate of coarsening. //
742 // ------------------------------------------------------------------ //
743 GO minGlobalIndex = coordinatesMap->getMinGlobalIndex();
744 {
745 GO tmp;
746 gIndices[2] = minGlobalIndex / (gFineNodesPerDir[1] * gFineNodesPerDir[0]);
747 tmp = minGlobalIndex % (gFineNodesPerDir[1] * gFineNodesPerDir[0]);
748 gIndices[1] = tmp / gFineNodesPerDir[0];
749 gIndices[0] = tmp % gFineNodesPerDir[0];
750
751 myOffset[2] = gIndices[2] % coarseRate[2];
752 myOffset[1] = gIndices[1] % coarseRate[1];
753 myOffset[0] = gIndices[0] % coarseRate[0];
754 }
755
756 for (int dim = 0; dim < 3; ++dim) {
757 if (gIndices[dim] == 0) {
758 boundaryFlags[dim] += 1;
759 }
760 if (gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
761 boundaryFlags[dim] += 2;
762 }
763 }
764
765 // Check whether ghost nodes are needed in each direction
766 for (LO i = 0; i < numDimensions; ++i) {
767 if ((gIndices[i] != 0) && (gIndices[i] % coarseRate[i] > 0)) {
768 ghostInterface[2 * i] = true;
769 }
770 if (((gIndices[i] + lFineNodesPerDir[i]) != gFineNodesPerDir[i]) && ((gIndices[i] + lFineNodesPerDir[i] - 1) % coarseRate[i] > 0)) {
771 ghostInterface[2 * i + 1] = true;
772 }
773 }
774
775 for (LO i = 0; i < 3; ++i) {
776 if (i < numDimensions) {
777 lCoarseNodesPerDir[i] = (lFineNodesPerDir[i] + myOffset[i] - 1) / coarseRate[i];
778 if (myOffset[i] == 0) {
779 ++lCoarseNodesPerDir[i];
780 }
781 gCoarseNodesPerDir[i] = (gFineNodesPerDir[i] - 1) / coarseRate[i];
782 endRate[i] = (gFineNodesPerDir[i] - 1) % coarseRate[i];
783 if (endRate[i] == 0) {
784 ++gCoarseNodesPerDir[i];
785 endRate[i] = coarseRate[i];
786 }
787 } else {
788 // Most quantities need to be set to 1 for extra dimensions
789 // this is rather logical, an x-y plane is like a single layer
790 // of nodes in the z direction...
791 gCoarseNodesPerDir[i] = 1;
792 lCoarseNodesPerDir[i] = 1;
793 endRate[i] = 1;
794 }
795 }
796
797 gNumCoarseNodes = gCoarseNodesPerDir[0] * gCoarseNodesPerDir[1] * gCoarseNodesPerDir[2];
798 lNumCoarseNodes = lCoarseNodesPerDir[0] * lCoarseNodesPerDir[1] * lCoarseNodesPerDir[2];
799
800 // For each direction, determine how many ghost points are required.
801 LO lNumGhostNodes = 0;
802 Array<GO> startGhostedCoarseNode(3);
803 {
804 const int complementaryIndices[3][2] = {{1, 2}, {0, 2}, {0, 1}};
805 LO tmp = 0;
806 for (LO i = 0; i < 3; ++i) {
807 // The first branch of this if-statement will be used if the rank contains only one layer
808 // of nodes in direction i, that layer must also coincide with the boundary of the mesh
809 // and coarseRate[i] == endRate[i]...
810 if ((gIndices[i] == gFineNodesPerDir[i] - 1) && (gIndices[i] % coarseRate[i] == 0)) {
811 startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i] - 1;
812 } else {
813 startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i];
814 }
815 // First we load the number of locally owned coarse nodes
816 glCoarseNodesPerDir[i] = lCoarseNodesPerDir[i];
817
818 // Check whether a face in direction i needs ghost nodes
819 if (ghostInterface[2 * i] || ghostInterface[2 * i + 1]) {
820 ++glCoarseNodesPerDir[i];
821 if (i == 0) {
822 tmp = lCoarseNodesPerDir[1] * lCoarseNodesPerDir[2];
823 }
824 if (i == 1) {
825 tmp = lCoarseNodesPerDir[0] * lCoarseNodesPerDir[2];
826 }
827 if (i == 2) {
828 tmp = lCoarseNodesPerDir[0] * lCoarseNodesPerDir[1];
829 }
830 }
831 // If both faces in direction i need nodes, double the number of ghost nodes
832 if (ghostInterface[2 * i] && ghostInterface[2 * i + 1]) {
833 ++glCoarseNodesPerDir[i];
834 tmp = 2 * tmp;
835 }
836 lNumGhostNodes += tmp;
837
838 // The corners and edges need to be checked in 2D / 3D to add more ghosts nodes
839 for (LO j = 0; j < 2; ++j) {
840 for (LO k = 0; k < 2; ++k) {
841 // Check if two adjoining faces need ghost nodes and then add their common edge
842 if (ghostInterface[2 * complementaryIndices[i][0] + j] && ghostInterface[2 * complementaryIndices[i][1] + k]) {
843 lNumGhostNodes += lCoarseNodesPerDir[i];
844 // Add corners if three adjoining faces need ghost nodes,
845 // but add them only once! Hence when i == 0.
846 if (ghostInterface[2 * i] && (i == 0)) {
847 lNumGhostNodes += 1;
848 }
849 if (ghostInterface[2 * i + 1] && (i == 0)) {
850 lNumGhostNodes += 1;
851 }
852 }
853 }
854 }
855 tmp = 0;
856 }
857 } // end of scope for tmp and complementaryIndices
858
859 const LO lNumGhostedNodes = glCoarseNodesPerDir[0] * glCoarseNodesPerDir[1] * glCoarseNodesPerDir[2];
860 ghostedCoarseNodes->PIDs.resize(lNumGhostedNodes);
861 ghostedCoarseNodes->LIDs.resize(lNumGhostedNodes);
862 ghostedCoarseNodes->GIDs.resize(lNumGhostedNodes);
863 ghostedCoarseNodes->coarseGIDs.resize(lNumGhostedNodes);
864 ghostedCoarseNodes->colInds.resize(lNumGhostedNodes);
865
866 // We loop over all ghosted coarse nodes by increasing global lexicographic order
867 Array<LO> coarseNodeCoarseIndices(3), coarseNodeFineIndices(3), ijk(3);
868 LO currentIndex = -1;
869 for (ijk[2] = 0; ijk[2] < glCoarseNodesPerDir[2]; ++ijk[2]) {
870 for (ijk[1] = 0; ijk[1] < glCoarseNodesPerDir[1]; ++ijk[1]) {
871 for (ijk[0] = 0; ijk[0] < glCoarseNodesPerDir[0]; ++ijk[0]) {
872 currentIndex = ijk[2] * glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0] + ijk[1] * glCoarseNodesPerDir[0] + ijk[0];
873 coarseNodeCoarseIndices[0] = startGhostedCoarseNode[0] + ijk[0];
874 coarseNodeFineIndices[0] = coarseNodeCoarseIndices[0] * coarseRate[0];
875 if (coarseNodeFineIndices[0] > gFineNodesPerDir[0] - 1) {
876 coarseNodeFineIndices[0] = gFineNodesPerDir[0] - 1;
877 }
878 coarseNodeCoarseIndices[1] = startGhostedCoarseNode[1] + ijk[1];
879 coarseNodeFineIndices[1] = coarseNodeCoarseIndices[1] * coarseRate[1];
880 if (coarseNodeFineIndices[1] > gFineNodesPerDir[1] - 1) {
881 coarseNodeFineIndices[1] = gFineNodesPerDir[1] - 1;
882 }
883 coarseNodeCoarseIndices[2] = startGhostedCoarseNode[2] + ijk[2];
884 coarseNodeFineIndices[2] = coarseNodeCoarseIndices[2] * coarseRate[2];
885 if (coarseNodeFineIndices[2] > gFineNodesPerDir[2] - 1) {
886 coarseNodeFineIndices[2] = gFineNodesPerDir[2] - 1;
887 }
888 GO myGID = 0, myCoarseGID = -1;
889 GO factor[3] = {};
890 factor[2] = gFineNodesPerDir[1] * gFineNodesPerDir[0];
891 factor[1] = gFineNodesPerDir[0];
892 factor[0] = 1;
893 for (int dim = 0; dim < 3; ++dim) {
894 if (dim < numDimensions) {
895 if (gIndices[dim] - myOffset[dim] + ijk[dim] * coarseRate[dim] < gFineNodesPerDir[dim] - 1) {
896 myGID += (gIndices[dim] - myOffset[dim] + ijk[dim] * coarseRate[dim]) * factor[dim];
897 } else {
898 myGID += (gIndices[dim] - myOffset[dim] + (ijk[dim] - 1) * coarseRate[dim] + endRate[dim]) * factor[dim];
899 }
900 }
901 }
902 myCoarseGID = coarseNodeCoarseIndices[0] + coarseNodeCoarseIndices[1] * gCoarseNodesPerDir[0] + coarseNodeCoarseIndices[2] * gCoarseNodesPerDir[1] * gCoarseNodesPerDir[0];
903 ghostedCoarseNodes->GIDs[currentIndex] = myGID;
904 ghostedCoarseNodes->coarseGIDs[currentIndex] = myCoarseGID;
905 }
906 }
907 }
908 coordinatesMap->getRemoteIndexList(ghostedCoarseNodes->GIDs(),
909 ghostedCoarseNodes->PIDs(),
910 ghostedCoarseNodes->LIDs());
911
912 // Phase 2 //
913 // ------------------------------------------------------------------ //
914 // Build a list of GIDs to import the required ghost nodes. //
915 // The ordering of the ghosts nodes will be as natural as possible, //
916 // i.e. it should follow the GID ordering of the mesh. //
917 // ------------------------------------------------------------------ //
918
919 // Saddly we have to more or less redo what was just done to figure out the value of
920 // lNumGhostNodes, there might be some optimization possibility here...
921 ghostGIDs.resize(lNumGhostNodes);
922 LO countGhosts = 0;
923 // Get the GID of the first point on the current partition.
924 GO startingGID = minGlobalIndex;
925 Array<GO> startingIndices(3);
926 // We still want ghost nodes even if have with a 0 offset,
927 // except when we are on a boundary
928 if (ghostInterface[4] && (myOffset[2] == 0)) {
929 if (gIndices[2] + coarseRate[2] > gFineNodesPerDir[2]) {
930 startingGID -= endRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
931 } else {
932 startingGID -= coarseRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
933 }
934 } else {
935 startingGID -= myOffset[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
936 }
937 if (ghostInterface[2] && (myOffset[1] == 0)) {
938 if (gIndices[1] + coarseRate[1] > gFineNodesPerDir[1]) {
939 startingGID -= endRate[1] * gFineNodesPerDir[0];
940 } else {
941 startingGID -= coarseRate[1] * gFineNodesPerDir[0];
942 }
943 } else {
944 startingGID -= myOffset[1] * gFineNodesPerDir[0];
945 }
946 if (ghostInterface[0] && (myOffset[0] == 0)) {
947 if (gIndices[0] + coarseRate[0] > gFineNodesPerDir[0]) {
948 startingGID -= endRate[0];
949 } else {
950 startingGID -= coarseRate[0];
951 }
952 } else {
953 startingGID -= myOffset[0];
954 }
955
956 { // scope for tmp
957 GO tmp;
958 startingIndices[2] = startingGID / (gFineNodesPerDir[1] * gFineNodesPerDir[0]);
959 tmp = startingGID % (gFineNodesPerDir[1] * gFineNodesPerDir[0]);
960 startingIndices[1] = tmp / gFineNodesPerDir[0];
961 startingIndices[0] = tmp % gFineNodesPerDir[0];
962 }
963
964 GO ghostOffset[3] = {0, 0, 0};
965 LO lengthZero = lCoarseNodesPerDir[0];
966 LO lengthOne = lCoarseNodesPerDir[1];
967 LO lengthTwo = lCoarseNodesPerDir[2];
968 if (ghostInterface[0]) {
969 ++lengthZero;
970 }
971 if (ghostInterface[1]) {
972 ++lengthZero;
973 }
974 if (ghostInterface[2]) {
975 ++lengthOne;
976 }
977 if (ghostInterface[3]) {
978 ++lengthOne;
979 }
980 if (ghostInterface[4]) {
981 ++lengthTwo;
982 }
983 if (ghostInterface[5]) {
984 ++lengthTwo;
985 }
986
987 // First check the bottom face as it will have the lowest GIDs
988 if (ghostInterface[4]) {
989 ghostOffset[2] = startingGID;
990 for (LO j = 0; j < lengthOne; ++j) {
991 if ((j == lengthOne - 1) && (startingIndices[1] + j * coarseRate[1] + 1 > gFineNodesPerDir[1])) {
992 ghostOffset[1] = ((j - 1) * coarseRate[1] + endRate[1]) * gFineNodesPerDir[0];
993 } else {
994 ghostOffset[1] = j * coarseRate[1] * gFineNodesPerDir[0];
995 }
996 for (LO k = 0; k < lengthZero; ++k) {
997 if ((k == lengthZero - 1) && (startingIndices[0] + k * coarseRate[0] + 1 > gFineNodesPerDir[0])) {
998 ghostOffset[0] = (k - 1) * coarseRate[0] + endRate[0];
999 } else {
1000 ghostOffset[0] = k * coarseRate[0];
1001 }
1002 // If the partition includes a changed rate at the edge, ghost nodes need to be picked
1003 // carefully.
1004 // This if statement is repeated each time a ghost node is selected.
1005 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1006 ++countGhosts;
1007 }
1008 }
1009 }
1010
1011 // Sweep over the lCoarseNodesPerDir[2] coarse layers in direction 2 and gather necessary ghost
1012 // nodes located on these layers.
1013 for (LO i = 0; i < lengthTwo; ++i) {
1014 // Exclude the cases where ghost nodes exists on the faces in directions 2, these faces are
1015 // swept seperatly for efficiency.
1016 if (!((i == lengthTwo - 1) && ghostInterface[5]) && !((i == 0) && ghostInterface[4])) {
1017 // Set the ghostOffset in direction 2 taking into account a possible endRate different
1018 // from the regular coarseRate.
1019 if ((i == lengthTwo - 1) && !ghostInterface[5]) {
1020 ghostOffset[2] = startingGID + ((i - 1) * coarseRate[2] + endRate[2]) * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1021 } else {
1022 ghostOffset[2] = startingGID + i * coarseRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1023 }
1024 for (LO j = 0; j < lengthOne; ++j) {
1025 if ((j == 0) && ghostInterface[2]) {
1026 for (LO k = 0; k < lengthZero; ++k) {
1027 if ((k == lengthZero - 1) && (startingIndices[0] + k * coarseRate[0] + 1 > gFineNodesPerDir[0])) {
1028 if (k == 0) {
1029 ghostOffset[0] = endRate[0];
1030 } else {
1031 ghostOffset[0] = (k - 1) * coarseRate[0] + endRate[0];
1032 }
1033 } else {
1034 ghostOffset[0] = k * coarseRate[0];
1035 }
1036 // In this case j == 0 so ghostOffset[1] is zero and can be ignored
1037 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[0];
1038 ++countGhosts;
1039 }
1040 } else if ((j == lengthOne - 1) && ghostInterface[3]) {
1041 // Set the ghostOffset in direction 1 taking into account a possible endRate different
1042 // from the regular coarseRate.
1043 if ((j == lengthOne - 1) && (startingIndices[1] + j * coarseRate[1] + 1 > gFineNodesPerDir[1])) {
1044 ghostOffset[1] = ((j - 1) * coarseRate[1] + endRate[1]) * gFineNodesPerDir[0];
1045 } else {
1046 ghostOffset[1] = j * coarseRate[1] * gFineNodesPerDir[0];
1047 }
1048 for (LO k = 0; k < lengthZero; ++k) {
1049 if ((k == lengthZero - 1) && (startingIndices[0] + k * coarseRate[0] + 1 > gFineNodesPerDir[0])) {
1050 ghostOffset[0] = (k - 1) * coarseRate[0] + endRate[0];
1051 } else {
1052 ghostOffset[0] = k * coarseRate[0];
1053 }
1054 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1055 ++countGhosts;
1056 }
1057 } else {
1058 // Set ghostOffset[1] for side faces sweep
1059 if ((j == lengthOne - 1) && (startingIndices[1] + j * coarseRate[1] + 1 > gFineNodesPerDir[1])) {
1060 ghostOffset[1] = ((j - 1) * coarseRate[1] + endRate[1]) * gFineNodesPerDir[0];
1061 } else {
1062 ghostOffset[1] = j * coarseRate[1] * gFineNodesPerDir[0];
1063 }
1064
1065 // Set ghostOffset[0], ghostGIDs and countGhosts
1066 if (ghostInterface[0]) { // In that case ghostOffset[0]==0, so we can ignore it
1067 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1];
1068 ++countGhosts;
1069 }
1070 if (ghostInterface[1]) { // Grab ghost point at the end of direction 0.
1071 if ((startingIndices[0] + (lengthZero - 1) * coarseRate[0]) > gFineNodesPerDir[0] - 1) {
1072 if (lengthZero > 2) {
1073 ghostOffset[0] = (lengthZero - 2) * coarseRate[0] + endRate[0];
1074 } else {
1075 ghostOffset[0] = endRate[0];
1076 }
1077 } else {
1078 ghostOffset[0] = (lengthZero - 1) * coarseRate[0];
1079 }
1080 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1081 ++countGhosts;
1082 }
1083 }
1084 }
1085 }
1086 }
1087
1088 // Finally check the top face
1089 if (ghostInterface[5]) {
1090 if (startingIndices[2] + (lengthTwo - 1) * coarseRate[2] + 1 > gFineNodesPerDir[2]) {
1091 ghostOffset[2] = startingGID + ((lengthTwo - 2) * coarseRate[2] + endRate[2]) * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1092 } else {
1093 ghostOffset[2] = startingGID + (lengthTwo - 1) * coarseRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1094 }
1095 for (LO j = 0; j < lengthOne; ++j) {
1096 if ((j == lengthOne - 1) && (startingIndices[1] + j * coarseRate[1] + 1 > gFineNodesPerDir[1])) {
1097 ghostOffset[1] = ((j - 1) * coarseRate[1] + endRate[1]) * gFineNodesPerDir[0];
1098 } else {
1099 ghostOffset[1] = j * coarseRate[1] * gFineNodesPerDir[0];
1100 }
1101 for (LO k = 0; k < lengthZero; ++k) {
1102 if ((k == lengthZero - 1) && (startingIndices[0] + k * coarseRate[0] + 1 > gFineNodesPerDir[0])) {
1103 ghostOffset[0] = (k - 1) * coarseRate[0] + endRate[0];
1104 } else {
1105 ghostOffset[0] = k * coarseRate[0];
1106 }
1107 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1108 ++countGhosts;
1109 }
1110 }
1111 }
1112
1113 // Phase 3 //
1114 // ------------------------------------------------------------------ //
1115 // Final phase of this function, lists are being built to create the //
1116 // column and domain maps of the projection as well as the map and //
1117 // arrays for the coarse coordinates multivector. //
1118 // ------------------------------------------------------------------ //
1119
1120 Array<GO> gCoarseNodesGIDs(lNumCoarseNodes);
1121 LO currentNode, offset2, offset1, offset0;
1122 // Find the GIDs of the coarse nodes on the partition.
1123 for (LO ind2 = 0; ind2 < lCoarseNodesPerDir[2]; ++ind2) {
1124 if (myOffset[2] == 0) {
1125 offset2 = startingIndices[2] + myOffset[2];
1126 } else {
1127 if (startingIndices[2] + endRate[2] == gFineNodesPerDir[2] - 1) {
1128 offset2 = startingIndices[2] + endRate[2];
1129 } else {
1130 offset2 = startingIndices[2] + coarseRate[2];
1131 }
1132 }
1133 if (offset2 + ind2 * coarseRate[2] > gFineNodesPerDir[2] - 1) {
1134 offset2 += (ind2 - 1) * coarseRate[2] + endRate[2];
1135 } else {
1136 offset2 += ind2 * coarseRate[2];
1137 }
1138 offset2 = offset2 * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1139
1140 for (LO ind1 = 0; ind1 < lCoarseNodesPerDir[1]; ++ind1) {
1141 if (myOffset[1] == 0) {
1142 offset1 = startingIndices[1] + myOffset[1];
1143 } else {
1144 if (startingIndices[1] + endRate[1] == gFineNodesPerDir[1] - 1) {
1145 offset1 = startingIndices[1] + endRate[1];
1146 } else {
1147 offset1 = startingIndices[1] + coarseRate[1];
1148 }
1149 }
1150 if (offset1 + ind1 * coarseRate[1] > gFineNodesPerDir[1] - 1) {
1151 offset1 += (ind1 - 1) * coarseRate[1] + endRate[1];
1152 } else {
1153 offset1 += ind1 * coarseRate[1];
1154 }
1155 offset1 = offset1 * gFineNodesPerDir[0];
1156 for (LO ind0 = 0; ind0 < lCoarseNodesPerDir[0]; ++ind0) {
1157 offset0 = startingIndices[0];
1158 if (myOffset[0] == 0) {
1159 offset0 += ind0 * coarseRate[0];
1160 } else {
1161 offset0 += (ind0 + 1) * coarseRate[0];
1162 }
1163 if (offset0 > gFineNodesPerDir[0] - 1) {
1164 offset0 += endRate[0] - coarseRate[0];
1165 }
1166
1167 currentNode = ind2 * lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0] + ind1 * lCoarseNodesPerDir[0] + ind0;
1168 gCoarseNodesGIDs[currentNode] = offset2 + offset1 + offset0;
1169 }
1170 }
1171 }
1172
1173 // Actual loop over all the coarse/ghost nodes to find their index on the coarse mesh
1174 // and the corresponding dofs that will need to be added to colMapP.
1175 colGIDs.resize(BlkSize * (lNumCoarseNodes + lNumGhostNodes));
1176 coarseNodesGIDs.resize(lNumCoarseNodes);
1177 for (LO i = 0; i < numDimensions; ++i) {
1178 coarseNodes[i].resize(lNumCoarseNodes);
1179 }
1180 GO fineNodesPerCoarseSlab = coarseRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1181 GO fineNodesEndCoarseSlab = endRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1182 GO fineNodesPerCoarsePlane = coarseRate[1] * gFineNodesPerDir[0];
1183 GO fineNodesEndCoarsePlane = endRate[1] * gFineNodesPerDir[0];
1184 GO coarseNodesPerCoarseLayer = gCoarseNodesPerDir[1] * gCoarseNodesPerDir[0];
1185 GO gCoarseNodeOnCoarseGridGID;
1186 LO gInd[3], lCol;
1187 Array<int> ghostPIDs(lNumGhostNodes);
1188 Array<LO> ghostLIDs(lNumGhostNodes);
1189 Array<LO> ghostPermut(lNumGhostNodes);
1190 for (LO k = 0; k < lNumGhostNodes; ++k) {
1191 ghostPermut[k] = k;
1192 }
1193 coordinatesMap->getRemoteIndexList(ghostGIDs, ghostPIDs, ghostLIDs);
1194 sh_sort_permute(ghostPIDs.begin(), ghostPIDs.end(), ghostPermut.begin(), ghostPermut.end());
1195
1196 { // scope for tmpInds, tmpVars and tmp.
1197 GO tmpInds[3], tmpVars[2];
1198 LO tmp;
1199 // Loop over the coarse nodes of the partition and add them to colGIDs
1200 // that will be used to construct the column and domain maps of P as well
1201 // as to construct the coarse coordinates map.
1202 // for(LO col = 0; col < lNumCoarseNodes; ++col) { // This should most likely be replaced
1203 // by loops of lCoarseNodesPerDir[] to simplify arithmetics
1204 LO col = 0;
1205 LO firstCoarseNodeInds[3], currentCoarseNode;
1206 for (LO dim = 0; dim < 3; ++dim) {
1207 if (myOffset[dim] == 0) {
1208 firstCoarseNodeInds[dim] = 0;
1209 } else {
1210 firstCoarseNodeInds[dim] = coarseRate[dim] - myOffset[dim];
1211 }
1212 }
1213 Array<ArrayRCP<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > fineNodes(numDimensions);
1214 for (LO dim = 0; dim < numDimensions; ++dim) {
1215 fineNodes[dim] = coordinates->getData(dim);
1216 }
1217 for (LO k = 0; k < lCoarseNodesPerDir[2]; ++k) {
1218 for (LO j = 0; j < lCoarseNodesPerDir[1]; ++j) {
1219 for (LO i = 0; i < lCoarseNodesPerDir[0]; ++i) {
1220 col = k * lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0] + j * lCoarseNodesPerDir[0] + i;
1221
1222 // Check for endRate
1223 currentCoarseNode = 0;
1224 if (firstCoarseNodeInds[0] + i * coarseRate[0] > lFineNodesPerDir[0] - 1) {
1225 currentCoarseNode += firstCoarseNodeInds[0] + (i - 1) * coarseRate[0] + endRate[0];
1226 } else {
1227 currentCoarseNode += firstCoarseNodeInds[0] + i * coarseRate[0];
1228 }
1229 if (firstCoarseNodeInds[1] + j * coarseRate[1] > lFineNodesPerDir[1] - 1) {
1230 currentCoarseNode += (firstCoarseNodeInds[1] + (j - 1) * coarseRate[1] + endRate[1]) * lFineNodesPerDir[0];
1231 } else {
1232 currentCoarseNode += (firstCoarseNodeInds[1] + j * coarseRate[1]) * lFineNodesPerDir[0];
1233 }
1234 if (firstCoarseNodeInds[2] + k * coarseRate[2] > lFineNodesPerDir[2] - 1) {
1235 currentCoarseNode += (firstCoarseNodeInds[2] + (k - 1) * coarseRate[2] + endRate[2]) * lFineNodesPerDir[1] * lFineNodesPerDir[0];
1236 } else {
1237 currentCoarseNode += (firstCoarseNodeInds[2] + k * coarseRate[2]) * lFineNodesPerDir[1] * lFineNodesPerDir[0];
1238 }
1239 // Load coordinates
1240 for (LO dim = 0; dim < numDimensions; ++dim) {
1241 coarseNodes[dim][col] = fineNodes[dim][currentCoarseNode];
1242 }
1243
1244 if ((endRate[2] != coarseRate[2]) && (gCoarseNodesGIDs[col] > (gCoarseNodesPerDir[2] - 2) * fineNodesPerCoarseSlab + fineNodesEndCoarseSlab - 1)) {
1245 tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab + 1;
1246 tmpVars[0] = gCoarseNodesGIDs[col] - (tmpInds[2] - 1) * fineNodesPerCoarseSlab - fineNodesEndCoarseSlab;
1247 } else {
1248 tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab;
1249 tmpVars[0] = gCoarseNodesGIDs[col] % fineNodesPerCoarseSlab;
1250 }
1251 if ((endRate[1] != coarseRate[1]) && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2) * fineNodesPerCoarsePlane + fineNodesEndCoarsePlane - 1)) {
1252 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1253 tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1) * fineNodesPerCoarsePlane - fineNodesEndCoarsePlane;
1254 } else {
1255 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1256 tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1257 }
1258 if (tmpVars[1] == gFineNodesPerDir[0] - 1) {
1259 tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1260 } else {
1261 tmpInds[0] = tmpVars[1] / coarseRate[0];
1262 }
1263 gInd[2] = col / (lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0]);
1264 tmp = col % (lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0]);
1265 gInd[1] = tmp / lCoarseNodesPerDir[0];
1266 gInd[0] = tmp % lCoarseNodesPerDir[0];
1267 lCol = gInd[2] * (lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0]) + gInd[1] * lCoarseNodesPerDir[0] + gInd[0];
1268 gCoarseNodeOnCoarseGridGID = tmpInds[2] * coarseNodesPerCoarseLayer + tmpInds[1] * gCoarseNodesPerDir[0] + tmpInds[0];
1269 coarseNodesGIDs[lCol] = gCoarseNodeOnCoarseGridGID;
1270 for (LO dof = 0; dof < BlkSize; ++dof) {
1271 colGIDs[BlkSize * lCol + dof] = BlkSize * gCoarseNodeOnCoarseGridGID + dof;
1272 }
1273 }
1274 }
1275 }
1276 // Now loop over the ghost nodes of the partition to add them to colGIDs
1277 // since they will need to be included in the column map of P
1278 for (col = lNumCoarseNodes; col < lNumCoarseNodes + lNumGhostNodes; ++col) {
1279 if ((endRate[2] != coarseRate[2]) && (ghostGIDs[ghostPermut[col - lNumCoarseNodes]] > (gCoarseNodesPerDir[2] - 2) * fineNodesPerCoarseSlab + fineNodesEndCoarseSlab - 1)) {
1280 tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab + 1;
1281 tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] - (tmpInds[2] - 1) * fineNodesPerCoarseSlab - fineNodesEndCoarseSlab;
1282 } else {
1283 tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab;
1284 tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] % fineNodesPerCoarseSlab;
1285 }
1286 if ((endRate[1] != coarseRate[1]) && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2) * fineNodesPerCoarsePlane + fineNodesEndCoarsePlane - 1)) {
1287 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1288 tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1) * fineNodesPerCoarsePlane - fineNodesEndCoarsePlane;
1289 } else {
1290 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1291 tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1292 }
1293 if (tmpVars[1] == gFineNodesPerDir[0] - 1) {
1294 tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1295 } else {
1296 tmpInds[0] = tmpVars[1] / coarseRate[0];
1297 }
1298 gCoarseNodeOnCoarseGridGID = tmpInds[2] * coarseNodesPerCoarseLayer + tmpInds[1] * gCoarseNodesPerDir[0] + tmpInds[0];
1299 for (LO dof = 0; dof < BlkSize; ++dof) {
1300 colGIDs[BlkSize * col + dof] = BlkSize * gCoarseNodeOnCoarseGridGID + dof;
1301 }
1302 }
1303 } // End of scope for tmpInds, tmpVars and tmp
1304
1305} // GetGeometricData()
1306
1307template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1309 ComputeLocalEntries(const RCP<const Matrix>& Aghost, const Array<LO> coarseRate,
1310 const Array<LO> /* endRate */, const LO BlkSize, const Array<LO> elemInds,
1311 const Array<LO> /* lCoarseElementsPerDir */, const LO numDimensions,
1312 const Array<LO> lFineNodesPerDir, const Array<GO> /* gFineNodesPerDir */,
1313 const Array<GO> /* gIndices */, const Array<LO> /* lCoarseNodesPerDir */,
1314 const Array<bool> ghostInterface, const Array<int> elementFlags,
1315 const std::string stencilType, const std::string /* blockStrategy */,
1316 const Array<LO> elementNodesPerDir, const LO numNodesInElement,
1317 const Array<GO> /* colGIDs */,
1318 Teuchos::SerialDenseMatrix<LO, SC>& Pi, Teuchos::SerialDenseMatrix<LO, SC>& Pf,
1319 Teuchos::SerialDenseMatrix<LO, SC>& Pe, Array<LO>& dofType,
1320 Array<LO>& lDofInd) const {
1321 // First extract data from Aghost and move it to the corresponding dense matrix
1322 // This step requires to compute the number of nodes (resp dofs) in the current
1323 // coarse element, then allocate storage for local dense matrices, finally loop
1324 // over the dofs and extract the corresponding row in Aghost before dispactching
1325 // its data to the dense matrices.
1326 // At the same time we want to compute a mapping from the element indexing to
1327 // group indexing. The groups are the following: corner, edge, face and interior
1328 // nodes. We uses these groups to operate on the dense matrices but need to
1329 // store the nodes in their original order after groupd operations are completed.
1330 LO countInterior = 0, countFace = 0, countEdge = 0, countCorner = 0;
1331 Array<LO> collapseDir(numNodesInElement * BlkSize);
1332 for (LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1333 for (LO je = 0; je < elementNodesPerDir[1]; ++je) {
1334 for (LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1335 // Check for corner node
1336 if ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1337 for (LO dof = 0; dof < BlkSize; ++dof) {
1338 dofType[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 0;
1339 lDofInd[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = BlkSize * countCorner + dof;
1340 }
1341 ++countCorner;
1342
1343 // Check for edge node
1344 } else if (((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1)) || ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) || ((je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1))) {
1345 for (LO dof = 0; dof < BlkSize; ++dof) {
1346 dofType[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 1;
1347 lDofInd[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = BlkSize * countEdge + dof;
1348 if ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1)) {
1349 collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 0;
1350 } else if ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1351 collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 1;
1352 } else if ((je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1353 collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 2;
1354 }
1355 }
1356 ++countEdge;
1357
1358 // Check for face node
1359 } else if ((ke == 0 || ke == elementNodesPerDir[2] - 1) || (je == 0 || je == elementNodesPerDir[1] - 1) || (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1360 for (LO dof = 0; dof < BlkSize; ++dof) {
1361 dofType[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 2;
1362 lDofInd[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = BlkSize * countFace + dof;
1363 if (ke == 0 || ke == elementNodesPerDir[2] - 1) {
1364 collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 2;
1365 } else if (je == 0 || je == elementNodesPerDir[1] - 1) {
1366 collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 1;
1367 } else if (ie == 0 || ie == elementNodesPerDir[0] - 1) {
1368 collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 0;
1369 }
1370 }
1371 ++countFace;
1372
1373 // Otherwise it is an interior node
1374 } else {
1375 for (LO dof = 0; dof < BlkSize; ++dof) {
1376 dofType[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 3;
1377 lDofInd[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = BlkSize * countInterior + dof;
1378 }
1379 ++countInterior;
1380 }
1381 }
1382 }
1383 }
1384
1385 LO numInteriorNodes = 0, numFaceNodes = 0, numEdgeNodes = 0, numCornerNodes = 8;
1386 numInteriorNodes = (elementNodesPerDir[0] - 2) * (elementNodesPerDir[1] - 2) * (elementNodesPerDir[2] - 2);
1387 numFaceNodes = 2 * (elementNodesPerDir[0] - 2) * (elementNodesPerDir[1] - 2) + 2 * (elementNodesPerDir[0] - 2) * (elementNodesPerDir[2] - 2) + 2 * (elementNodesPerDir[1] - 2) * (elementNodesPerDir[2] - 2);
1388 numEdgeNodes = 4 * (elementNodesPerDir[0] - 2) + 4 * (elementNodesPerDir[1] - 2) + 4 * (elementNodesPerDir[2] - 2);
1389 // Diagonal blocks
1390 Teuchos::SerialDenseMatrix<LO, SC> Aii(BlkSize * numInteriorNodes, BlkSize * numInteriorNodes);
1391 Teuchos::SerialDenseMatrix<LO, SC> Aff(BlkSize * numFaceNodes, BlkSize * numFaceNodes);
1392 Teuchos::SerialDenseMatrix<LO, SC> Aee(BlkSize * numEdgeNodes, BlkSize * numEdgeNodes);
1393 // Upper triangular blocks
1394 Teuchos::SerialDenseMatrix<LO, SC> Aif(BlkSize * numInteriorNodes, BlkSize * numFaceNodes);
1395 Teuchos::SerialDenseMatrix<LO, SC> Aie(BlkSize * numInteriorNodes, BlkSize * numEdgeNodes);
1396 Teuchos::SerialDenseMatrix<LO, SC> Afe(BlkSize * numFaceNodes, BlkSize * numEdgeNodes);
1397 // Coarse nodes "right hand sides" and local prolongators
1398 Teuchos::SerialDenseMatrix<LO, SC> Aic(BlkSize * numInteriorNodes, BlkSize * numCornerNodes);
1399 Teuchos::SerialDenseMatrix<LO, SC> Afc(BlkSize * numFaceNodes, BlkSize * numCornerNodes);
1400 Teuchos::SerialDenseMatrix<LO, SC> Aec(BlkSize * numEdgeNodes, BlkSize * numCornerNodes);
1401 Pi.shape(BlkSize * numInteriorNodes, BlkSize * numCornerNodes);
1402 Pf.shape(BlkSize * numFaceNodes, BlkSize * numCornerNodes);
1403 Pe.shape(BlkSize * numEdgeNodes, BlkSize * numCornerNodes);
1404
1405 ArrayView<const LO> rowIndices;
1406 ArrayView<const SC> rowValues;
1407 LO idof, iInd, jInd;
1408 int iType = 0, jType = 0;
1409 int orientation = -1;
1410 int collapseFlags[3] = {};
1411 Array<SC> stencil((std::pow(3, numDimensions)) * BlkSize);
1412 // LBV Note: we could skip the extraction of rows corresponding to coarse nodes
1413 // we might want to fuse the first three loops and do integer arithmetic
1414 // to have more optimization during compilation...
1415 for (LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1416 for (LO je = 0; je < elementNodesPerDir[1]; ++je) {
1417 for (LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1418 collapseFlags[0] = 0;
1419 collapseFlags[1] = 0;
1420 collapseFlags[2] = 0;
1421 if ((elementFlags[0] == 1 || elementFlags[0] == 3) && ie == 0) {
1422 collapseFlags[0] += 1;
1423 }
1424 if ((elementFlags[0] == 2 || elementFlags[0] == 3) && ie == elementNodesPerDir[0] - 1) {
1425 collapseFlags[0] += 2;
1426 }
1427 if ((elementFlags[1] == 1 || elementFlags[1] == 3) && je == 0) {
1428 collapseFlags[1] += 1;
1429 }
1430 if ((elementFlags[1] == 2 || elementFlags[1] == 3) && je == elementNodesPerDir[1] - 1) {
1431 collapseFlags[1] += 2;
1432 }
1433 if ((elementFlags[2] == 1 || elementFlags[2] == 3) && ke == 0) {
1434 collapseFlags[2] += 1;
1435 }
1436 if ((elementFlags[2] == 2 || elementFlags[2] == 3) && ke == elementNodesPerDir[2] - 1) {
1437 collapseFlags[2] += 2;
1438 }
1439
1440 // Based on (ie, je, ke) we detect the type of node we are dealing with
1441 GetNodeInfo(ie, je, ke, elementNodesPerDir, &iType, iInd, &orientation);
1442 for (LO dof0 = 0; dof0 < BlkSize; ++dof0) {
1443 // Compute the dof index for each dof at point (ie, je, ke)
1444 idof = ((elemInds[2] * coarseRate[2] + ke) * lFineNodesPerDir[1] * lFineNodesPerDir[0] + (elemInds[1] * coarseRate[1] + je) * lFineNodesPerDir[0] + elemInds[0] * coarseRate[0] + ie) * BlkSize + dof0;
1445 Aghost->getLocalRowView(idof, rowIndices, rowValues);
1446 FormatStencil(BlkSize, ghostInterface, ie, je, ke, rowValues,
1447 elementNodesPerDir, collapseFlags, stencilType, stencil);
1448 LO io, jo, ko;
1449 if (iType == 3) { // interior node, no stencil collapse needed
1450 for (LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1451 // Find (if, jf, kf) the indices associated with the interacting node
1452 ko = ke + (interactingNode / 9 - 1);
1453 {
1454 LO tmp = interactingNode % 9;
1455 jo = je + (tmp / 3 - 1);
1456 io = ie + (tmp % 3 - 1);
1457 int dummy;
1458 GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1459 }
1460 if ((io > -1 && io < elementNodesPerDir[0]) &&
1461 (jo > -1 && jo < elementNodesPerDir[1]) &&
1462 (ko > -1 && ko < elementNodesPerDir[2])) {
1463 for (LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1464 if (jType == 3) {
1465 Aii(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1466 stencil[interactingNode * BlkSize + dof1];
1467 } else if (jType == 2) {
1468 Aif(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1469 stencil[interactingNode * BlkSize + dof1];
1470 } else if (jType == 1) {
1471 Aie(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1472 stencil[interactingNode * BlkSize + dof1];
1473 } else if (jType == 0) {
1474 Aic(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1475 -stencil[interactingNode * BlkSize + dof1];
1476 }
1477 }
1478 }
1479 }
1480 } else if (iType == 2) { // Face node, collapse stencil along face normal (*orientation)
1481 CollapseStencil(2, orientation, collapseFlags, stencil);
1482 for (LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1483 // Find (if, jf, kf) the indices associated with the interacting node
1484 ko = ke + (interactingNode / 9 - 1);
1485 {
1486 LO tmp = interactingNode % 9;
1487 jo = je + (tmp / 3 - 1);
1488 io = ie + (tmp % 3 - 1);
1489 int dummy;
1490 GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1491 }
1492 if ((io > -1 && io < elementNodesPerDir[0]) &&
1493 (jo > -1 && jo < elementNodesPerDir[1]) &&
1494 (ko > -1 && ko < elementNodesPerDir[2])) {
1495 for (LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1496 if (jType == 2) {
1497 Aff(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1498 stencil[interactingNode * BlkSize + dof1];
1499 } else if (jType == 1) {
1500 Afe(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1501 stencil[interactingNode * BlkSize + dof1];
1502 } else if (jType == 0) {
1503 Afc(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1504 -stencil[interactingNode * BlkSize + dof1];
1505 }
1506 }
1507 }
1508 }
1509 } else if (iType == 1) { // Edge node, collapse stencil perpendicular to edge direction
1510 CollapseStencil(1, orientation, collapseFlags, stencil);
1511 for (LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1512 // Find (if, jf, kf) the indices associated with the interacting node
1513 ko = ke + (interactingNode / 9 - 1);
1514 {
1515 LO tmp = interactingNode % 9;
1516 jo = je + (tmp / 3 - 1);
1517 io = ie + (tmp % 3 - 1);
1518 int dummy;
1519 GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1520 }
1521 if ((io > -1 && io < elementNodesPerDir[0]) &&
1522 (jo > -1 && jo < elementNodesPerDir[1]) &&
1523 (ko > -1 && ko < elementNodesPerDir[2])) {
1524 for (LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1525 if (jType == 1) {
1526 Aee(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1527 stencil[interactingNode * BlkSize + dof1];
1528 } else if (jType == 0) {
1529 Aec(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1530 -stencil[interactingNode * BlkSize + dof1];
1531 }
1532 } // Check if interacting node is in element or not
1533 } // Pc is the identity so no need to treat iType == 0
1534 } // Loop over interacting nodes within a row
1535 } // Check for current node type
1536 } // Loop over degrees of freedom associated to a given node
1537 } // Loop over i
1538 } // Loop over j
1539 } // Loop over k
1540
1541 // Compute the projection operators for edge and interior nodes
1542 //
1543 // [P_i] = [A_{ii} & A_{if} & A_{ie}]^{-1} [A_{ic}]
1544 // [P_f] = [ 0 & A_{ff} & A_{fe}] [A_{fc}]
1545 // [P_e] = [ 0 & 0 & A_{ee}] [A_{ec}]
1546 // [P_c] = I_c
1547 //
1548 { // Compute P_e
1549 // We need to solve for P_e in: A_{ee}*P_e = A_{fc}
1550 // So we simple do P_e = A_{ee}^{-1)*A_{ec}
1551 Teuchos::SerialDenseSolver<LO, SC> problem;
1552 problem.setMatrix(Teuchos::rcp(&Aee, false));
1553 problem.setVectors(Teuchos::rcp(&Pe, false), Teuchos::rcp(&Aec, false));
1554 problem.factorWithEquilibration(true);
1555 problem.solve();
1556 problem.unequilibrateLHS();
1557 }
1558
1559 { // Compute P_f
1560 // We need to solve for P_f in: A_{ff}*P_f + A_{fe}*P_e = A_{fc}
1561 // Step one: A_{fc} = 1.0*A_{fc} + (-1.0)*A_{fe}*P_e
1562 Afc.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Afe, Pe, 1.0);
1563 // Step two: P_f = A_{ff}^{-1}*A_{fc}
1564 Teuchos::SerialDenseSolver<LO, SC> problem;
1565 problem.setMatrix(Teuchos::rcp(&Aff, false));
1566 problem.setVectors(Teuchos::rcp(&Pf, false), Teuchos::rcp(&Afc, false));
1567 problem.factorWithEquilibration(true);
1568 problem.solve();
1569 problem.unequilibrateLHS();
1570 }
1571
1572 { // Compute P_i
1573 // We need to solve for P_i in: A_{ii}*P_i + A_{if}*P_f + A_{ie}*P_e = A_{ic}
1574 // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{ie}*P_e
1575 Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aie, Pe, 1.0);
1576 // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{if}*P_f
1577 Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aif, Pf, 1.0);
1578 // Step two: P_i = A_{ii}^{-1}*A_{ic}
1579 Teuchos::SerialDenseSolver<LO, SC> problem;
1580 problem.setMatrix(Teuchos::rcp(&Aii, false));
1581 problem.setVectors(Teuchos::rcp(&Pi, false), Teuchos::rcp(&Aic, false));
1582 problem.factorWithEquilibration(true);
1583 problem.solve();
1584 problem.unequilibrateLHS();
1585 }
1586
1587} // ComputeLocalEntries()
1588
1589template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1591 CollapseStencil(const int type, const int orientation, const int /* collapseFlags */[3],
1592 Array<SC>& stencil) const {
1593 if (type == 2) { // Face stencil collapse
1594 // Let's hope things vectorize well inside this if statement
1595 if (orientation == 0) {
1596 for (LO i = 0; i < 9; ++i) {
1597 stencil[3 * i + 1] = stencil[3 * i] + stencil[3 * i + 1] + stencil[3 * i + 2];
1598 stencil[3 * i] = 0;
1599 stencil[3 * i + 2] = 0;
1600 }
1601 } else if (orientation == 1) {
1602 for (LO i = 0; i < 3; ++i) {
1603 stencil[9 * i + 3] = stencil[9 * i + 0] + stencil[9 * i + 3] + stencil[9 * i + 6];
1604 stencil[9 * i + 0] = 0;
1605 stencil[9 * i + 6] = 0;
1606 stencil[9 * i + 4] = stencil[9 * i + 1] + stencil[9 * i + 4] + stencil[9 * i + 7];
1607 stencil[9 * i + 1] = 0;
1608 stencil[9 * i + 7] = 0;
1609 stencil[9 * i + 5] = stencil[9 * i + 2] + stencil[9 * i + 5] + stencil[9 * i + 8];
1610 stencil[9 * i + 2] = 0;
1611 stencil[9 * i + 8] = 0;
1612 }
1613 } else if (orientation == 2) {
1614 for (LO i = 0; i < 9; ++i) {
1615 stencil[i + 9] = stencil[i] + stencil[i + 9] + stencil[i + 18];
1616 stencil[i] = 0;
1617 stencil[i + 18] = 0;
1618 }
1619 }
1620 } else if (type == 1) { // Edge stencil collapse
1621 SC tmp1 = 0, tmp2 = 0, tmp3 = 0;
1622 if (orientation == 0) {
1623 for (LO i = 0; i < 9; ++i) {
1624 tmp1 += stencil[0 + 3 * i];
1625 stencil[0 + 3 * i] = 0;
1626 tmp2 += stencil[1 + 3 * i];
1627 stencil[1 + 3 * i] = 0;
1628 tmp3 += stencil[2 + 3 * i];
1629 stencil[2 + 3 * i] = 0;
1630 }
1631 stencil[12] = tmp1;
1632 stencil[13] = tmp2;
1633 stencil[14] = tmp3;
1634 } else if (orientation == 1) {
1635 for (LO i = 0; i < 3; ++i) {
1636 tmp1 += stencil[0 + i] + stencil[9 + i] + stencil[18 + i];
1637 stencil[0 + i] = 0;
1638 stencil[9 + i] = 0;
1639 stencil[18 + i] = 0;
1640 tmp2 += stencil[3 + i] + stencil[12 + i] + stencil[21 + i];
1641 stencil[3 + i] = 0;
1642 stencil[12 + i] = 0;
1643 stencil[21 + i] = 0;
1644 tmp3 += stencil[6 + i] + stencil[15 + i] + stencil[24 + i];
1645 stencil[6 + i] = 0;
1646 stencil[15 + i] = 0;
1647 stencil[24 + i] = 0;
1648 }
1649 stencil[10] = tmp1;
1650 stencil[13] = tmp2;
1651 stencil[16] = tmp3;
1652 } else if (orientation == 2) {
1653 for (LO i = 0; i < 9; ++i) {
1654 tmp1 += stencil[i];
1655 stencil[i] = 0;
1656 tmp2 += stencil[i + 9];
1657 stencil[i + 9] = 0;
1658 tmp3 += stencil[i + 18];
1659 stencil[i + 18] = 0;
1660 }
1661 stencil[4] = tmp1;
1662 stencil[13] = tmp2;
1663 stencil[22] = tmp3;
1664 }
1665 }
1666} // CollapseStencil
1667
1668template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1670 FormatStencil(const LO BlkSize, const Array<bool> /* ghostInterface */, const LO /* ie */, const LO /* je */,
1671 const LO /* ke */, const ArrayView<const SC> rowValues, const Array<LO> /* elementNodesPerDir */,
1672 const int collapseFlags[3], const std::string stencilType, Array<SC>& stencil)
1673 const {
1674 if (stencilType == "reduced") {
1675 Array<int> fullStencilInds(7);
1676 fullStencilInds[0] = 4;
1677 fullStencilInds[1] = 10;
1678 fullStencilInds[2] = 12;
1679 fullStencilInds[3] = 13;
1680 fullStencilInds[4] = 14;
1681 fullStencilInds[5] = 16;
1682 fullStencilInds[6] = 22;
1683
1684 // Create a mask array to automate mesh boundary processing
1685 Array<int> mask(7);
1686 int stencilSize = rowValues.size();
1687 if (collapseFlags[0] + collapseFlags[1] + collapseFlags[2] > 0) {
1688 if (stencilSize == 1) {
1689 // The assumption is made that if only one non-zero exists in the row
1690 // then this represent a Dirichlet BC being enforced.
1691 // One might want to zero out the corresponding row in the prolongator.
1692 mask[0] = 1;
1693 mask[1] = 1;
1694 mask[2] = 1;
1695 mask[4] = 1;
1696 mask[5] = 1;
1697 mask[6] = 1;
1698 } else {
1699 // Here we are looking at Neumann like BC where only a flux is prescribed
1700 // and less non-zeros will be present in the corresponding row.
1701 if (collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1702 mask[0] = 1;
1703 }
1704 if (collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1705 mask[6] = 1;
1706 }
1707 if (collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1708 mask[1] = 1;
1709 }
1710 if (collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1711 mask[5] = 1;
1712 }
1713 if (collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1714 mask[2] = 1;
1715 }
1716 if (collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1717 mask[4] = 1;
1718 }
1719 }
1720 }
1721
1722 int offset = 0;
1723 for (int ind = 0; ind < 7; ++ind) {
1724 if (mask[ind] == 1) {
1725 for (int dof = 0; dof < BlkSize; ++dof) {
1726 stencil[BlkSize * fullStencilInds[ind] + dof] = 0.0;
1727 }
1728 ++offset; // The offset is used to stay within rowValues bounds
1729 } else {
1730 for (int dof = 0; dof < BlkSize; ++dof) {
1731 stencil[BlkSize * fullStencilInds[ind] + dof] = rowValues[BlkSize * (ind - offset) + dof];
1732 }
1733 }
1734 }
1735 } else if (stencilType == "full") {
1736 // Create a mask array to automate mesh boundary processing
1737 Array<int> mask(27);
1738 if (collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1739 for (int ind = 0; ind < 9; ++ind) {
1740 mask[ind] = 1;
1741 }
1742 }
1743 if (collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1744 for (int ind = 0; ind < 9; ++ind) {
1745 mask[18 + ind] = 1;
1746 }
1747 }
1748 if (collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1749 for (int ind1 = 0; ind1 < 3; ++ind1) {
1750 for (int ind2 = 0; ind2 < 3; ++ind2) {
1751 mask[ind1 * 9 + ind2] = 1;
1752 }
1753 }
1754 }
1755 if (collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1756 for (int ind1 = 0; ind1 < 3; ++ind1) {
1757 for (int ind2 = 0; ind2 < 3; ++ind2) {
1758 mask[ind1 * 9 + ind2 + 6] = 1;
1759 }
1760 }
1761 }
1762 if (collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1763 for (int ind = 0; ind < 9; ++ind) {
1764 mask[3 * ind] = 1;
1765 }
1766 }
1767 if (collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1768 for (int ind = 0; ind < 9; ++ind) {
1769 mask[3 * ind + 2] = 1;
1770 }
1771 }
1772
1773 // Now the stencil is extracted and formated
1774 int offset = 0;
1775 for (LO ind = 0; ind < 27; ++ind) {
1776 if (mask[ind] == 0) {
1777 for (int dof = 0; dof < BlkSize; ++dof) {
1778 stencil[BlkSize * ind + dof] = 0.0;
1779 }
1780 ++offset; // The offset is used to stay within rowValues bounds
1781 } else {
1782 for (int dof = 0; dof < BlkSize; ++dof) {
1783 stencil[BlkSize * ind + dof] = rowValues[BlkSize * (ind - offset) + dof];
1784 }
1785 }
1786 }
1787 } // stencilTpye
1788} // FormatStencil()
1789
1790template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1792 const LO ie, const LO je, const LO ke,
1793 const Array<LO> elementNodesPerDir,
1794 int* type, LO& ind, int* orientation) const {
1795 // Initialize flags with -1 to be able to catch issues easily
1796 *type = -1, ind = 0, *orientation = -1;
1797 if ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1798 // Corner node
1799 *type = 0;
1800 ind = 4 * ke / (elementNodesPerDir[2] - 1) + 2 * je / (elementNodesPerDir[1] - 1) + ie / (elementNodesPerDir[0] - 1);
1801 } else if (((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1)) || ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) || ((je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1))) {
1802 // Edge node
1803 *type = 1;
1804 if (ke > 0) {
1805 ind += 2 * (elementNodesPerDir[0] - 2 + elementNodesPerDir[1] - 2);
1806 }
1807 if (ke == elementNodesPerDir[2] - 1) {
1808 ind += 4 * (elementNodesPerDir[2] - 2);
1809 }
1810 if ((ke == 0) || (ke == elementNodesPerDir[2] - 1)) { // je or ie edges
1811 if (je == 0) { // jlo ie edge
1812 *orientation = 0;
1813 ind += ie - 1;
1814 } else if (je == elementNodesPerDir[1] - 1) { // jhi ie edge
1815 *orientation = 0;
1816 ind += 2 * (elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1817 } else { // ilo or ihi je edge
1818 *orientation = 1;
1819 ind += elementNodesPerDir[0] - 2 + 2 * (je - 1) + ie / (elementNodesPerDir[0] - 1);
1820 }
1821 } else { // ke edges
1822 *orientation = 2;
1823 ind += 4 * (ke - 1) + 2 * (je / (elementNodesPerDir[1] - 1)) + ie / (elementNodesPerDir[0] - 1);
1824 }
1825 } else if ((ke == 0 || ke == elementNodesPerDir[2] - 1) || (je == 0 || je == elementNodesPerDir[1] - 1) || (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1826 // Face node
1827 *type = 2;
1828 if (ke == 0) { // current node is on "bottom" face
1829 *orientation = 2;
1830 ind = (je - 1) * (elementNodesPerDir[0] - 2) + ie - 1;
1831 } else {
1832 // add nodes from "bottom" face
1833 ind += (elementNodesPerDir[1] - 2) * (elementNodesPerDir[0] - 2);
1834 // add nodes from side faces
1835 ind += 2 * (ke - 1) * (elementNodesPerDir[1] - 2 + elementNodesPerDir[0] - 2);
1836 if (ke == elementNodesPerDir[2] - 1) { // current node is on "top" face
1837 *orientation = 2;
1838 ind += (je - 1) * (elementNodesPerDir[0] - 2) + ie - 1;
1839 } else { // current node is on a side face
1840 if (je == 0) {
1841 *orientation = 1;
1842 ind += ie - 1;
1843 } else if (je == elementNodesPerDir[1] - 1) {
1844 *orientation = 1;
1845 ind += 2 * (elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1846 } else {
1847 *orientation = 0;
1848 ind += elementNodesPerDir[0] - 2 + 2 * (je - 1) + ie / (elementNodesPerDir[0] - 1);
1849 }
1850 }
1851 }
1852 } else {
1853 // Interior node
1854 *type = 3;
1855 ind = (ke - 1) * (elementNodesPerDir[1] - 2) * (elementNodesPerDir[0] - 2) + (je - 1) * (elementNodesPerDir[0] - 2) + ie - 1;
1856 }
1857} // GetNodeInfo()
1858
1859template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1861 const typename Teuchos::Array<LocalOrdinal>::iterator& first1,
1862 const typename Teuchos::Array<LocalOrdinal>::iterator& last1,
1863 const typename Teuchos::Array<LocalOrdinal>::iterator& first2,
1864 const typename Teuchos::Array<LocalOrdinal>::iterator& /* last2 */) const {
1865 typedef typename std::iterator_traits<typename Teuchos::Array<LocalOrdinal>::iterator>::difference_type DT;
1866 DT n = last1 - first1;
1867 DT m = n / 2;
1868 DT z = Teuchos::OrdinalTraits<DT>::zero();
1869 while (m > z) {
1870 DT max = n - m;
1871 for (DT j = 0; j < max; j++) {
1872 for (DT k = j; k >= 0; k -= m) {
1873 if (first1[first2[k + m]] >= first1[first2[k]])
1874 break;
1875 std::swap(first2[k + m], first2[k]);
1876 }
1877 }
1878 m = m / 2;
1879 }
1880}
1881
1882} // namespace MueLu
1883
1884#define MUELU_BLACKBOXPFACTORY_SHORT
1885#endif // MUELU_BLACKBOXPFACTORY_DEF_HPP
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void FormatStencil(const LO BlkSize, const Array< bool > ghostInterface, const LO ie, const LO je, const LO ke, const ArrayView< const SC > rowValues, const Array< LO > elementNodesPerDir, const int collapseFlags[3], const std::string stencilType, Array< SC > &stencil) const
void GetNodeInfo(const LO ie, const LO je, const LO ke, const Array< LO > elementNodesPerDir, int *type, LO &ind, int *orientation) const
void sh_sort_permute(const typename Teuchos::Array< LocalOrdinal >::iterator &first1, const typename Teuchos::Array< LocalOrdinal >::iterator &last1, const typename Teuchos::Array< LocalOrdinal >::iterator &first2, const typename Teuchos::Array< LocalOrdinal >::iterator &last2) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void ComputeLocalEntries(const RCP< const Matrix > &Aghost, const Array< LO > coarseRate, const Array< LO > endRate, const LO BlkSize, const Array< LO > elemInds, const Array< LO > lCoarseElementsPerDir, const LO numDimensions, const Array< LO > lFineNodesPerDir, const Array< GO > gFineNodesPerDir, const Array< GO > gIndices, const Array< LO > lCoarseNodesPerDir, const Array< bool > ghostInterface, const Array< int > elementFlags, const std::string stencilType, const std::string blockStrategy, const Array< LO > elementNodesPerDir, const LO numNodesInElement, const Array< GO > colGIDs, Teuchos::SerialDenseMatrix< LO, SC > &Pi, Teuchos::SerialDenseMatrix< LO, SC > &Pf, Teuchos::SerialDenseMatrix< LO, SC > &Pe, Array< LO > &dofType, Array< LO > &lDofInd) const
void CollapseStencil(const int type, const int orientation, const int collapseFlags[3], Array< SC > &stencil) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void GetGeometricData(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::magnitudeType, LO, GO, NO > > &coordinates, const Array< LO > coarseRate, const Array< GO > gFineNodesPerDir, const Array< LO > lFineNodesPerDir, const LO BlkSize, Array< GO > &gIndices, Array< LO > &myOffset, Array< bool > &ghostInterface, Array< LO > &endRate, Array< GO > &gCoarseNodesPerDir, Array< LO > &lCoarseNodesPerDir, Array< LO > &glCoarseNodesPerDir, Array< GO > &ghostGIDs, Array< GO > &coarseNodesGIDs, Array< GO > &colGIDs, GO &gNumCoarseNodes, LO &lNumCoarseNodes, ArrayRCP< Array< typename Teuchos::ScalarTraits< Scalar >::magnitudeType > > coarseNodes, Array< int > &boundaryFlags, RCP< NodesIDs > ghostedCoarseNodes) const
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
static const NoFactory * get()
static void sortCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< LocalOrdinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals, const UnderlyingLib lib)
Sort the entries of the (raw CSR) matrix by column index within each row.
Namespace for MueLu classes and methods.
@ Runtime0
One-liner description of what is happening.