119 RCP<Matrix> A = Get<RCP<Matrix>>(fineLevel,
"A");
120 RCP<MultiVector> fineNullspace =
121 Get<RCP<MultiVector>>(fineLevel,
"Nullspace");
124 const ParameterList &pL = GetParameterList();
125 LO CoarsenRate = as<LO>(pL.get<
int>(
"semicoarsen: coarsen rate"));
126 TEUCHOS_TEST_FOR_EXCEPTION(
128 "semicoarsen: coarsen rate must be greater than 1");
131 LO BlkSize = A->GetFixedBlockSize();
132 RCP<const Map> rowMap = A->getRowMap();
133 LO Ndofs = rowMap->getLocalNumElements();
134 LO Nnodes = Ndofs / BlkSize;
138 LO FineNumZLayers = Get<LO>(fineLevel,
"CoarseNumZLayers");
139 Teuchos::ArrayRCP<LO> TVertLineId =
140 Get<Teuchos::ArrayRCP<LO>>(fineLevel,
"LineDetection_VertLineIds");
141 Teuchos::ArrayRCP<LO> TLayerId =
142 Get<Teuchos::ArrayRCP<LO>>(fineLevel,
"LineDetection_Layers");
146 "Cannot coarsen further");
147 using coordT =
typename Teuchos::ScalarTraits<Scalar>::coordinateType;
148 LO CoarseNumZLayers =
149 (LO)floor(((coordT)(FineNumZLayers + 1)) / ((coordT)CoarsenRate) - 1.0);
150 if (CoarseNumZLayers < 1)
151 CoarseNumZLayers = 1;
155 RCP<MultiVector> coarseNullspace;
156 BuildSemiCoarsenP(coarseLevel, Ndofs, Nnodes, BlkSize, FineNumZLayers,
157 CoarseNumZLayers, TLayerId, TVertLineId, A, fineNullspace, P,
163 coarseLevel.
Set(
"NumZLayers", Teuchos::as<LO>(CoarseNumZLayers),
167 Set(coarseLevel,
"P", P);
168 Set(coarseLevel,
"Nullspace", coarseNullspace);
171 if (bTransferCoordinates_) {
173 typedef Xpetra::MultiVector<
174 typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO, NO>
176 RCP<xdMV> fineCoords = Teuchos::null;
181 RCP<const FactoryBase> myCoordsFact = GetFactory(
"Coordinates");
182 if (myCoordsFact == Teuchos::null) {
185 if (fineLevel.
IsAvailable(
"Coordinates", myCoordsFact.get())) {
187 fineLevel.
Get<RCP<xdMV>>(
"Coordinates", myCoordsFact.get());
191 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords == Teuchos::null,
193 "No Coordinates found provided by the user.");
195 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3,
197 "Three coordinates arrays must be supplied if "
198 "line detection orientation not given.");
199 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> x =
200 fineCoords->getDataNonConst(0);
201 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> y =
202 fineCoords->getDataNonConst(1);
203 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> z =
204 fineCoords->getDataNonConst(2);
208 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_max =
209 -Teuchos::ScalarTraits<
210 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
211 Teuchos::ScalarTraits<
212 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
213 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_min =
214 Teuchos::ScalarTraits<
215 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
216 Teuchos::ScalarTraits<
217 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
218 for (
auto it = z.begin(); it != z.end(); ++it) {
225 LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
227 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType>
228 myZLayerCoords = Teuchos::arcp<
229 typename Teuchos::ScalarTraits<Scalar>::coordinateType>(
231 if (myCoarseZLayers == 1) {
232 myZLayerCoords[0] = zval_min;
234 typename Teuchos::ScalarTraits<Scalar>::coordinateType dz =
235 (zval_max - zval_min) / (myCoarseZLayers - 1);
236 for (LO k = 0; k < myCoarseZLayers; ++k) {
237 myZLayerCoords[k] = k * dz;
245 LO numVertLines = Nnodes / FineNumZLayers;
246 LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
248 RCP<const Map> coarseCoordMap = MapFactory::Build(
249 fineCoords->getMap()->lib(),
250 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
251 Teuchos::as<size_t>(numLocalCoarseNodes),
252 fineCoords->getMap()->getIndexBase(), fineCoords->getMap()->getComm());
253 RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<
254 typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO,
255 NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
256 coarseCoords->putScalar(-1.0);
257 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cx =
258 coarseCoords->getDataNonConst(0);
259 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cy =
260 coarseCoords->getDataNonConst(1);
261 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cz =
262 coarseCoords->getDataNonConst(2);
265 LO cntCoarseNodes = 0;
266 for (LO vt = 0; vt < TVertLineId.size(); ++vt) {
268 LO curVertLineId = TVertLineId[vt];
270 if (cx[curVertLineId * myCoarseZLayers] == -1.0 &&
271 cy[curVertLineId * myCoarseZLayers] == -1.0) {
273 for (LO n = 0; n < myCoarseZLayers; ++n) {
274 cx[curVertLineId * myCoarseZLayers + n] = x[vt];
275 cy[curVertLineId * myCoarseZLayers + n] = y[vt];
276 cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
278 cntCoarseNodes += myCoarseZLayers;
281 TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes,
283 "number of coarse nodes is inconsistent.");
284 if (cntCoarseNodes == numLocalCoarseNodes)
289 Set(coarseLevel,
"Coordinates", coarseCoords);
298 BuildSemiCoarsenP(
Level &coarseLevel,
const LO NFRows,
const LO NFNodes,
299 const LO DofsPerNode,
const LO NFLayers,
300 const LO NCLayers,
const ArrayRCP<LO> LayerId,
301 const ArrayRCP<LO> VertLineId,
const RCP<Matrix> &Amat,
302 const RCP<MultiVector> fineNullspace, RCP<Matrix> &P,
303 RCP<MultiVector> &coarseNullspace)
const {
305#if KOKKOS_VERSION >= 40799
306 using impl_SC =
typename KokkosKernels::ArithTraits<SC>::val_type;
308 using impl_SC =
typename Kokkos::ArithTraits<SC>::val_type;
310#if KOKKOS_VERSION >= 40799
311 using impl_ATS = KokkosKernels::ArithTraits<impl_SC>;
313 using impl_ATS = Kokkos::ArithTraits<impl_SC>;
315 using LOView1D = Kokkos::View<LO *, DeviceType>;
316 using LOView2D = Kokkos::View<LO **, DeviceType>;
320 const auto FCol2LayerVector =
321 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getColMap());
322 const auto localTemp =
323 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getDomainMap());
324 RCP<const Import> importer = Amat->getCrsGraph()->getImporter();
325 if (importer == Teuchos::null)
326 importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
329 const auto localTempHost = localTemp->getLocalViewHost(Tpetra::Access::ReadWrite);
330 for (
int row = 0; row < NFRows; row++)
331 localTempHost(row, 0) = LayerId[row / DofsPerNode];
332 const auto localTempView = localTemp->getLocalViewDevice(Tpetra::Access::ReadWrite);
333 Kokkos::deep_copy(localTempView, localTempHost);
334 FCol2LayerVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
336 const auto FCol2LayerView = FCol2LayerVector->getLocalViewDevice(Tpetra::Access::ReadOnly);
337 const auto FCol2Layer = Kokkos::subview(FCol2LayerView, Kokkos::ALL(), 0);
341 const auto FCol2DofVector =
342 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getColMap());
345 const auto localTempHost = localTemp->getLocalViewHost(Tpetra::Access::ReadWrite);
346 for (
int row = 0; row < NFRows; row++)
347 localTempHost(row, 0) = row % DofsPerNode;
348 const auto localTempView = localTemp->getLocalViewDevice(Tpetra::Access::ReadWrite);
349 Kokkos::deep_copy(localTempView, localTempHost);
350 FCol2DofVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
352 const auto FCol2DofView = FCol2DofVector->getLocalViewDevice(Tpetra::Access::ReadOnly);
353 const auto FCol2Dof = Kokkos::subview(FCol2DofView, Kokkos::ALL(), 0);
359 NVertLines = VertLineId[0];
360 for (
int node = 1; node < NFNodes; ++node)
361 if (VertLineId[node] > NVertLines)
362 NVertLines = VertLineId[node];
366 LOView2D LineLayer2Node(
"LineLayer2Node", NVertLines, NFLayers);
367 typename LOView2D::host_mirror_type LineLayer2NodeHost =
368 Kokkos::create_mirror_view(LineLayer2Node);
369 for (
int node = 0; node < NFNodes; ++node)
370 LineLayer2NodeHost(VertLineId[node], LayerId[node]) = node;
371 Kokkos::deep_copy(LineLayer2Node, LineLayer2NodeHost);
374 LOView1D CLayer2FLayer(
"CLayer2FLayer", NCLayers);
375 typename LOView1D::host_mirror_type CLayer2FLayerHost =
376 Kokkos::create_mirror_view(CLayer2FLayer);
377 using coordT =
typename Teuchos::ScalarTraits<Scalar>::coordinateType;
378 const LO FirstStride =
379 (LO)ceil(((coordT)(NFLayers + 1)) / ((coordT)(NCLayers + 1)));
380 const coordT RestStride =
381 ((coordT)(NFLayers - FirstStride + 1)) / ((coordT)NCLayers);
383 (LO)floor((((coordT)(NFLayers - FirstStride + 1)) / RestStride) + .00001);
385 "sizes do not match.");
386 coordT stride = (coordT)FirstStride;
387 for (
int clayer = 0; clayer < NCLayers; ++clayer) {
388 CLayer2FLayerHost(clayer) = (LO)floor(stride) - 1;
389 stride += RestStride;
391 Kokkos::deep_copy(CLayer2FLayer, CLayer2FLayerHost);
395 int MaxStencilSize = 1;
396 LOView1D CLayer2StartLayer(
"CLayer2StartLayer", NCLayers);
397 LOView1D CLayer2StencilSize(
"CLayer2StencilSize", NCLayers);
398 typename LOView1D::host_mirror_type CLayer2StartLayerHost =
399 Kokkos::create_mirror_view(CLayer2StartLayer);
400 typename LOView1D::host_mirror_type CLayer2StencilSizeHost =
401 Kokkos::create_mirror_view(CLayer2StencilSize);
402 for (
int clayer = 0; clayer < NCLayers; ++clayer) {
403 const int startLayer = (clayer > 0) ? CLayer2FLayerHost(clayer - 1) + 1 : 0;
404 const int stencilSize = (clayer < NCLayers - 1)
405 ? CLayer2FLayerHost(clayer + 1) - startLayer
406 : NFLayers - startLayer;
408 if (MaxStencilSize < stencilSize)
409 MaxStencilSize = stencilSize;
410 CLayer2StartLayerHost(clayer) = startLayer;
411 CLayer2StencilSizeHost(clayer) = stencilSize;
413 Kokkos::deep_copy(CLayer2StartLayer, CLayer2StartLayerHost);
414 Kokkos::deep_copy(CLayer2StencilSize, CLayer2StencilSizeHost);
421 int Nmax = MaxStencilSize * DofsPerNode;
422 Kokkos::View<impl_SC ***, DeviceType> BandMat(
423 "BandMat", NVertLines, Nmax, Nmax);
424 Kokkos::View<impl_SC ***, DeviceType> BandSol(
425 "BandSol", NVertLines, Nmax, DofsPerNode);
431 for (
int clayer = 0; clayer < NCLayers; ++clayer)
432 NnzP += CLayer2StencilSizeHost(clayer);
433 NnzP *= NVertLines * DofsPerNode * DofsPerNode;
434 Kokkos::View<impl_SC *, DeviceType> Pvals(
"Pvals", NnzP);
435 Kokkos::View<LO *, DeviceType> Pcols(
"Pcols", NnzP);
440 Kokkos::View<size_t *, DeviceType> Pptr(
"Pptr", NFRows + 1);
441 typename Kokkos::View<size_t *, DeviceType>::host_mirror_type PptrHost =
442 Kokkos::create_mirror_view(Pptr);
443 Kokkos::deep_copy(PptrHost, 0);
444 for (
int line = 0; line < NVertLines; ++line) {
445 for (
int clayer = 0; clayer < NCLayers; ++clayer) {
446 const int stencilSize = CLayer2StencilSizeHost(clayer);
447 const int startLayer = CLayer2StartLayerHost(clayer);
448 for (
int snode = 0; snode < stencilSize; ++snode) {
449 for (
int dofi = 0; dofi < DofsPerNode; ++dofi) {
450 const int layer = startLayer + snode;
451 const int AmatBlkRow = LineLayer2NodeHost(line, layer);
452 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
453 PptrHost(AmatRow + 1) += DofsPerNode;
458 for (
int i = 2; i < NFRows + 1; ++i)
459 PptrHost(i) += PptrHost(i - 1);
460 TEUCHOS_TEST_FOR_EXCEPTION(NnzP != (
int)PptrHost(NFRows),
462 "Number of nonzeros in P does not match");
463 Kokkos::deep_copy(Pptr, PptrHost);
467 Kokkos::View<LO *, Kokkos::DefaultHostExecutionSpace> layerBuckets(
468 "layerBuckets", NFLayers);
469 Kokkos::deep_copy(layerBuckets, 0);
470 LOView2D CLayerSNode2PptrOffset(
"CLayerSNode2PptrOffset", NCLayers,
472 typename LOView2D::host_mirror_type CLayerSNode2PptrOffsetHost =
473 Kokkos::create_mirror_view(CLayerSNode2PptrOffset);
474 for (
int clayer = 0; clayer < NCLayers; ++clayer) {
475 const int stencilSize = CLayer2StencilSizeHost(clayer);
476 const int startLayer = CLayer2StartLayerHost(clayer);
477 for (
int snode = 0; snode < stencilSize; ++snode) {
478 const int layer = startLayer + snode;
479 CLayerSNode2PptrOffsetHost(clayer, snode) = layerBuckets(layer);
480 layerBuckets(layer)++;
483 Kokkos::deep_copy(CLayerSNode2PptrOffset, CLayerSNode2PptrOffsetHost);
488 const auto localAmat = Amat->getLocalMatrixDevice();
489 const auto zero = impl_ATS::zero();
490 const auto one = impl_ATS::one();
492 using range_policy = Kokkos::RangePolicy<execution_space>;
493 Kokkos::parallel_for(
494 "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Fill P",
495 range_policy(0, NVertLines), KOKKOS_LAMBDA(
const int line) {
496 for (
int clayer = 0; clayer < NCLayers; ++clayer) {
499 Kokkos::subview(BandSol, line, Kokkos::ALL(), Kokkos::ALL());
500 for (
int row = 0; row < Nmax; ++row)
501 for (
int dof = 0; dof < DofsPerNode; ++dof)
502 bandSol(row, dof) = zero;
505 const int stencilSize = CLayer2StencilSize(clayer);
506 const int N = stencilSize * DofsPerNode;
508 Kokkos::subview(BandMat, line, Kokkos::ALL(), Kokkos::ALL());
509 for (
int row = 0; row < Nmax; ++row)
510 for (
int col = 0; col < Nmax; ++col)
512 (row == col && row >= N) ? one : zero;
515 const int flayer = CLayer2FLayer(clayer);
516 const int startLayer = CLayer2StartLayer(clayer);
517 for (
int snode = 0; snode < stencilSize; ++snode) {
518 const int layer = startLayer + snode;
519 if (layer == flayer) {
520 for (
int dof = 0; dof < DofsPerNode; ++dof) {
521 const int row = snode * DofsPerNode + dof;
522 bandMat(row, row) = one;
523 bandSol(row, dof) = one;
526 const int AmatBlkRow = LineLayer2Node(line, layer);
527 for (
int dofi = 0; dofi < DofsPerNode; ++dofi) {
529 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
530 const auto localAmatRow = localAmat.rowConst(AmatRow);
531 const int AmatRowLeng = localAmatRow.length;
533 const int row = snode * DofsPerNode + dofi;
534 for (
int dofj = 0; dofj < DofsPerNode; ++dofj) {
535 const int col = snode * DofsPerNode + dofj;
540 for (
int i = 0; i < AmatRowLeng; ++i) {
541 const int colidx = localAmatRow.colidx(i);
542 if (FCol2Layer(colidx) == layer &&
543 FCol2Dof(colidx) == dofj)
544 val += localAmatRow.value(i);
546 bandMat(row, col) = val;
552 for (
int i = 0; i < AmatRowLeng; ++i) {
553 const int colidx = localAmatRow.colidx(i);
554 if (FCol2Layer(colidx) == layer - 1 &&
555 FCol2Dof(colidx) == dofj)
556 val += localAmatRow.value(i);
558 bandMat(row, col - DofsPerNode) = val;
561 if (snode < stencilSize - 1) {
565 for (
int i = 0; i < AmatRowLeng; ++i) {
566 const int colidx = localAmatRow.colidx(i);
567 if (FCol2Layer(colidx) == layer + 1 &&
568 FCol2Dof(colidx) == dofj)
569 val += localAmatRow.value(i);
571 bandMat(row, col + DofsPerNode) = val;
579 namespace KB = KokkosBatched;
580 using lu_type =
typename KB::SerialLU<KB::Algo::LU::Unblocked>;
581 lu_type::invoke(bandMat);
583 typename KB::SerialTrsm<KB::Side::Left, KB::Uplo::Lower,
584 KB::Trans::NoTranspose, KB::Diag::Unit,
585 KB::Algo::Trsm::Unblocked>;
586 trsv_l_type::invoke(one, bandMat, bandSol);
587 using trsv_u_type =
typename KB::SerialTrsm<
588 KB::Side::Left, KB::Uplo::Upper, KB::Trans::NoTranspose,
589 KB::Diag::NonUnit, KB::Algo::Trsm::Unblocked>;
590 trsv_u_type::invoke(one, bandMat, bandSol);
593 for (
int snode = 0; snode < stencilSize; ++snode) {
594 for (
int dofj = 0; dofj < DofsPerNode; ++dofj) {
595 for (
int dofi = 0; dofi < DofsPerNode; ++dofi) {
596 const int layer = startLayer + snode;
597 const int AmatBlkRow = LineLayer2Node(line, layer);
598 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
600 const int pptrOffset = CLayerSNode2PptrOffset(clayer, snode);
602 Pptr(AmatRow) + pptrOffset * DofsPerNode + dofj;
605 line * NCLayers + clayer;
606 Pcols(pptr) = col * DofsPerNode + dofj;
607 Pvals(pptr) = bandSol(snode * DofsPerNode + dofi, dofj);
616 RCP<const Map> rowMap = Amat->getRowMap();
617 Xpetra::global_size_t GNdofs = rowMap->getGlobalNumElements();
618 Xpetra::global_size_t itemp = GNdofs / NFLayers;
619 std::vector<size_t> stridingInfo_{(size_t)DofsPerNode};
620 RCP<const Map> coarseMap = StridedMapFactory::Build(
621 rowMap->lib(), NCLayers * itemp, NCLayers * NVertLines * DofsPerNode, 0,
622 stridingInfo_, rowMap->getComm(), -1, 0);
623 P = rcp(
new CrsMatrixWrap(rowMap, coarseMap, 0));
624 RCP<CrsMatrix> PCrs = toCrsMatrix(P);
625 PCrs->setAllValues(Pptr, Pcols, Pvals);
626 PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
629 if (Amat->IsView(
"stridedMaps") ==
true)
630 P->CreateView(
"stridedMaps", Amat->getRowMap(
"stridedMaps"), coarseMap);
632 P->CreateView(
"stridedMaps", P->getRangeMap(), coarseMap);
636 MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
637 const int numVectors = fineNullspace->getNumVectors();
638 const auto fineNullspaceView = fineNullspace->getLocalViewDevice(Tpetra::Access::ReadOnly);
639 const auto coarseNullspaceView = coarseNullspace->getLocalViewDevice(Tpetra::Access::ReadWrite);
640 using range_policy = Kokkos::RangePolicy<execution_space>;
641 Kokkos::parallel_for(
642 "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Inject Nullspace",
643 range_policy(0, NVertLines), KOKKOS_LAMBDA(
const int line) {
644 for (
int clayer = 0; clayer < NCLayers; ++clayer) {
645 const int layer = CLayer2FLayer(clayer);
646 const int AmatBlkRow =
647 LineLayer2Node(line, layer);
649 line * NCLayers + clayer;
650 for (
int k = 0; k < numVectors; ++k) {
651 for (
int dofi = 0; dofi < DofsPerNode; ++dofi) {
652 const int fRow = AmatBlkRow * DofsPerNode + dofi;
653 const int cRow = col * DofsPerNode + dofi;
654 coarseNullspaceView(cRow, k) = fineNullspaceView(fRow, k);