32 Teuchos::ETransp mode,
35 using impl_scalar_type =
typename KokkosKernels::ArithTraits<Scalar>::val_type;
36 impl_scalar_type implAlpha = alpha;
42 typename Aggregates::aggregates_sizes_type::const_type aggSizes = aggregates_->ComputeAggregateSizes();
44 auto kokkos_view_X = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
45 auto kokkos_view_Y = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
46 LO numCols = kokkos_view_X.extent(1);
48 if (mode == Teuchos::TRANS) {
49 auto vertex2AggId = aggregates_->GetVertex2AggId();
50 auto vertex2AggIdView = vertex2AggId->getLocalViewDevice(Tpetra::Access::ReadOnly);
51 LO numNodes = kokkos_view_X.extent(0);
56 "MueLu:MatrixFreeTentativeR_kokkos:apply",
md_range_type({0, 0}, {numCols, numNodes}),
57 KOKKOS_LAMBDA(
const int colIdx,
const int NodeIdx) {
58 LO aggIdx = vertex2AggIdView(NodeIdx, 0);
60 Kokkos::atomic_add(&kokkos_view_Y(aggIdx, colIdx), implAlpha * kokkos_view_X(NodeIdx, colIdx) / Kokkos::sqrt(aggSizes(aggIdx)));
64 const auto vertex2Agg = aggregates_->GetVertex2AggId();
65 auto vertex2AggView = vertex2Agg->getLocalViewDevice(Tpetra::Access::ReadOnly);
66 LO numNodes = kokkos_view_Y.extent(0);
71 "MueLu:MatrixFreeTentativeP:apply",
md_range_type({0, 0}, {numCols, numNodes}),
72 KOKKOS_LAMBDA(
const int colIdx,
const int fineIdx) {
73 LO aggIdx = vertex2AggView(fineIdx, 0);
74 kokkos_view_Y(fineIdx, colIdx) += implAlpha * kokkos_view_X(aggIdx, colIdx) / Kokkos::sqrt(aggSizes(aggIdx));