32 Teuchos::ETransp mode,
35#if KOKKOS_VERSION >= 40799
36 using impl_scalar_type =
typename KokkosKernels::ArithTraits<Scalar>::val_type;
38 using impl_scalar_type =
typename Kokkos::ArithTraits<Scalar>::val_type;
40 impl_scalar_type implAlpha = alpha;
46 typename Aggregates::aggregates_sizes_type::const_type aggSizes = aggregates_->ComputeAggregateSizes();
48 auto kokkos_view_X = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
49 auto kokkos_view_Y = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
50 LO numCols = kokkos_view_X.extent(1);
52 if (mode == Teuchos::TRANS) {
53 auto vertex2AggId = aggregates_->GetVertex2AggId();
54 auto vertex2AggIdView = vertex2AggId->getLocalViewDevice(Tpetra::Access::ReadOnly);
55 LO numNodes = kokkos_view_X.extent(0);
60 "MueLu:MatrixFreeTentativeR_kokkos:apply",
md_range_type({0, 0}, {numCols, numNodes}),
61 KOKKOS_LAMBDA(
const int colIdx,
const int NodeIdx) {
62 LO aggIdx = vertex2AggIdView(NodeIdx, 0);
64 Kokkos::atomic_add(&kokkos_view_Y(aggIdx, colIdx), implAlpha * kokkos_view_X(NodeIdx, colIdx) / Kokkos::sqrt(aggSizes(aggIdx)));
68 const auto vertex2Agg = aggregates_->GetVertex2AggId();
69 auto vertex2AggView = vertex2Agg->getLocalViewDevice(Tpetra::Access::ReadOnly);
70 LO numNodes = kokkos_view_Y.extent(0);
75 "MueLu:MatrixFreeTentativeP:apply",
md_range_type({0, 0}, {numCols, numNodes}),
76 KOKKOS_LAMBDA(
const int colIdx,
const int fineIdx) {
77 LO aggIdx = vertex2AggView(fineIdx, 0);
78 kokkos_view_Y(fineIdx, colIdx) += implAlpha * kokkos_view_X(aggIdx, colIdx) / Kokkos::sqrt(aggSizes(aggIdx));