10#ifndef TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_DEF_HPP 
   11#define TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_DEF_HPP 
   13#include "TpetraCore_config.h" 
   14#include "Teuchos_Array.hpp" 
   15#include "Teuchos_ArrayView.hpp" 
   24#include "Kokkos_Core.hpp" 
   54namespace UnpackAndCombineCrsGraphImpl {
 
   65template <
class Packet, 
class GO, 
class Device, 
class BufferDevice>
 
   68          const Kokkos::View<int*, Device, Kokkos::MemoryUnmanaged>& 
pids_out,
 
   69          const Kokkos::View<const Packet*, BufferDevice>& imports,
 
   72  using size_type = 
typename Kokkos::View<GO*, Device>::size_type;
 
 
  110  using GO                 = 
typename IndicesView::value_type;
 
  116  using device_type     = 
typename IndicesView::device_type;
 
  117  using execution_space = 
typename device_type::execution_space;
 
  119  using num_packets_per_lid_type = Kokkos::View<const size_t*, buffer_device_type>;
 
  120  using offsets_type             = Kokkos::View<const size_t*, device_type>;
 
  121  using input_buffer_type        = Kokkos::View<const packet_type*, buffer_device_type>;
 
  122  using import_lids_type         = Kokkos::View<const LO*, buffer_device_type>;
 
  124  using gids_scratch_type = Kokkos::View<GO*, device_type>;
 
  125  using pids_scratch_type = Kokkos::View<int*, device_type>;
 
  130  input_buffer_type imports;
 
  131  num_packets_per_lid_type num_packets_per_lid;
 
  132  import_lids_type import_lids;
 
  133  offsets_type offsets;
 
  136  Kokkos::Experimental::UniqueToken<execution_space,
 
  137                                    Kokkos::Experimental::UniqueTokenScope::Global>
 
  139  gids_scratch_type gids_scratch;
 
  140  pids_scratch_type pids_scratch;
 
  143  using value_type = Kokkos::pair<int, LO>;
 
  164    , tokens(execution_space())
 
  165    , gids_scratch(
"gids_scratch", tokens.size() * max_num_ent)
 
  166    , pids_scratch(
"pids_scratch", tokens.size() * max_num_ent) {}
 
  169    using Tpetra::Details::OrdinalTraits;
 
  174  join(value_type& dst, 
const value_type& src)
 const {
 
  179    using Tpetra::Details::OrdinalTraits;
 
  186          src.second < dst.second) {
 
  193  void operator()(
const LO i, value_type& dst)
 const {
 
  194    using Kokkos::MemoryUnmanaged;
 
  195    using Kokkos::subview;
 
  197    using size_type = 
typename execution_space::size_type;
 
  198    using slice     = 
typename Kokkos::pair<size_type, size_type>;
 
  209      dst = Kokkos::make_pair(1, 
i);
 
  219    const size_t buf_size = imports.size();
 
  220    const size_t offset   = offsets(
i);
 
  223      dst = Kokkos::make_pair(2, 
i);  
 
  230    const size_type 
token  = tokens.acquire();
 
  231    const size_t a         = 
static_cast<size_t>(
token) * max_num_ent;
 
  239      dst = Kokkos::make_pair(3, 
i);
 
  240      tokens.release(
token);
 
  251    tokens.release(
token);
 
 
  267                                         Kokkos::MemoryUnmanaged>& imports,
 
  269                                         Kokkos::MemoryUnmanaged>& num_packets_per_lid,
 
  271                                         Kokkos::MemoryUnmanaged>& import_lids,
 
  274                      const bool unpack_pids,
 
  276                      const bool verbose) {
 
  279  using device_type     = 
typename Node::device_type;
 
  280  using execution_space = 
typename BufferDevice::execution_space;
 
  282      Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
 
  287      "Tpetra::Details::UnpackAndCombineCrsGraphImpl::unpackAndCombine: ";
 
  289  const size_t num_import_lids = 
static_cast<size_t>(import_lids.extent(0));
 
  300  Kokkos::View<size_t*, device_type> offsets(
"offsets", 
num_import_lids + 1);
 
  307  Kokkos::parallel_reduce(
 
  317      Kokkos::Max<size_t>(max_num_ent));
 
  321                        num_packets_per_lid, import_lids, offsets,
 
  322                        max_num_ent, unpack_pids);
 
  324  typename unpack_functor_type::value_type 
x;
 
  326  auto x_h = 
x.to_std_pair();
 
  328                             prefix << 
"UnpackAndCombineFunctor reported error code " 
  329                                    << 
x_h.first << 
" for the first bad row " << 
x_h.second);
 
 
  332template <
class Packet, 
class LocalGraph, 
class BufferDevice>
 
  336    const Kokkos::View<
const typename LocalGraph::data_type*,
 
  337                       typename LocalGraph::device_type,
 
  338                       Kokkos::MemoryUnmanaged>
 
  340    const Kokkos::View<const Packet*, BufferDevice>& ,
 
  341    const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid,
 
  343  using Kokkos::parallel_reduce;
 
  345  using LO               = 
typename local_graph_type::data_type;
 
  346  using device_type      = 
typename local_graph_type::device_type;
 
  347  using execution_space  = 
typename device_type::execution_space;
 
  348  using range_policy     = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
 
  360          update += 
static_cast<size_t>(local_graph.row_map[
lid + 1] - local_graph.row_map[
lid]);
 
  367  num_items = 
static_cast<LO
>(permute_from_lids.extent(0));
 
  371        range_policy(0, num_items),
 
  372        KOKKOS_LAMBDA(
const LO i, 
size_t& update) {
 
  373          const LO lid = permute_from_lids(i);
 
  374          update += 
static_cast<size_t>(local_graph.row_map[lid + 1] - local_graph.row_map[lid]);
 
  382    size_t tot_num_ent = 0;
 
  385        range_policy(0, num_packets_per_lid.size()),
 
  386        KOKKOS_LAMBDA(
const int& i, 
size_t& lsum) {
 
  387          lsum += num_packets_per_lid(i) / 2;
 
  389        Kokkos::Sum<size_t>(tot_num_ent));
 
  390    count += tot_num_ent;
 
  397template <
class Packet, 
class LO, 
class Device, 
class BufferDevice>
 
  399    const Kokkos::View<size_t*, Device>& 
tgt_rowptr,
 
  400    const Kokkos::View<const LO*, BufferDevice>& import_lids,
 
  401    const Kokkos::View<const Packet*, BufferDevice>& ,
 
  402    const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid) {
 
  403  using Kokkos::parallel_reduce;
 
  404  using device_type     = Device;
 
  405  using execution_space = 
typename device_type::execution_space;
 
  406  using size_type       = 
typename Kokkos::View<size_t*, device_type>::size_type;
 
  407  using range_policy    = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
 
  409  const size_type 
N = num_packets_per_lid.extent(0);
 
  411      "Setup row pointers for remotes",
 
 
  422template <
class Device>
 
  423void makeCrsRowPtrFromLengths(
 
  424    const Kokkos::View<size_t*, Device, Kokkos::MemoryUnmanaged>& 
tgt_rowptr,
 
  426  using Kokkos::parallel_scan;
 
  427  using device_type     = Device;
 
  428  using execution_space = 
typename device_type::execution_space;
 
  429  using size_type       = 
typename Kokkos::View<size_t*, device_type>::size_type;
 
  430  using range_policy    = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
 
  434      KOKKOS_LAMBDA(
const size_t& 
i, 
size_t& update, 
const bool& 
final) {
 
  444template <
class LocalGraph, 
class LocalMap>
 
  445void copyDataFromSameIDs(
 
  448    const Kokkos::View<int*, typename LocalMap::device_type>& tgt_pids,
 
  449    const Kokkos::View<size_t*, typename LocalMap::device_type>& new_start_row,
 
  450    const Kokkos::View<size_t*, typename LocalMap::device_type>& tgt_rowptr,
 
  451    const Kokkos::View<const int*, typename LocalMap::device_type>& src_pids,
 
  452    const LocalGraph& local_graph,
 
  453    const LocalMap& local_col_map,
 
  454    const size_t num_same_ids,
 
  456  using Kokkos::parallel_for;
 
  459  using execution_space = 
typename device_type::execution_space;
 
  460  using range_policy    = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_t>>;
 
  463      range_policy(0, num_same_ids),
 
  464      KOKKOS_LAMBDA(
const size_t i) {
 
  465        using atomic_incr_type = 
typename std::remove_reference<
decltype(new_start_row(0))>::type;
 
  467        const LO src_lid = 
static_cast<LO
>(i);
 
  468        size_t src_row   = local_graph.row_map(src_lid);
 
  470        const LO tgt_lid     = 
static_cast<LO
>(i);
 
  471        const size_t tgt_row = tgt_rowptr(tgt_lid);
 
  473        const size_t nsr = local_graph.row_map(src_lid + 1) - local_graph.row_map(src_lid);
 
  474        Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
 
  476        for (
size_t j = local_graph.row_map(src_lid);
 
  477             j < local_graph.row_map(src_lid + 1); ++j) {
 
  478          LO src_col                        = local_graph.entries(j);
 
  479          tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
 
  480          tgt_pids(tgt_row + j - src_row)   = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
 
  485template <
class LocalGraph, 
class LocalMap, 
class BufferDevice>
 
  486void copyDataFromPermuteIDs(
 
  489    const Kokkos::View<
int*,
 
  491    const Kokkos::View<
size_t*,
 
  493    const Kokkos::View<
size_t*,
 
  495    const Kokkos::View<
const int*,
 
  498                       BufferDevice, Kokkos::MemoryUnmanaged>& permute_to_lids,
 
  500                       BufferDevice, Kokkos::MemoryUnmanaged>& permute_from_lids,
 
  501    const LocalGraph& local_graph,
 
  502    const LocalMap& local_col_map,
 
  504  using Kokkos::parallel_for;
 
  507  using execution_space = 
typename device_type::execution_space;
 
  508  using size_type       = 
typename Kokkos::View<LO*, device_type>::size_type;
 
  509  using range_policy    = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
 
  511  const size_type num_permute_to_lids = permute_to_lids.extent(0);
 
  514      range_policy(0, num_permute_to_lids),
 
  515      KOKKOS_LAMBDA(
const size_t i) {
 
  516        using atomic_incr_type = 
typename std::remove_reference<
decltype(new_start_row(0))>::type;
 
  518        const LO src_lid     = permute_from_lids(i);
 
  519        const size_t src_row = local_graph.row_map(src_lid);
 
  521        const LO tgt_lid     = permute_to_lids(i);
 
  522        const size_t tgt_row = tgt_rowptr(tgt_lid);
 
  524        size_t nsr = local_graph.row_map(src_lid + 1) - local_graph.row_map(src_lid);
 
  525        Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
 
  527        for (
size_t j = local_graph.row_map(src_lid);
 
  528             j < local_graph.row_map(src_lid + 1); ++j) {
 
  529          LO src_col                        = local_graph.entries(j);
 
  530          tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
 
  531          tgt_pids(tgt_row + j - src_row)   = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
 
  536template <
class Packet, 
class LocalGraph, 
class LocalMap, 
class BufferDevice>
 
  537void unpackAndCombineIntoCrsArrays2(
 
  538    const Kokkos::View<typename LocalMap::global_ordinal_type*, typename LocalMap::device_type>& tgt_colind,
 
  539    const Kokkos::View<int*, typename LocalMap::device_type>& tgt_pids,
 
  540    const Kokkos::View<size_t*, typename LocalMap::device_type>& new_start_row,
 
  541    const Kokkos::View<const size_t*, typename LocalMap::device_type>& offsets,
 
  545        Kokkos::MemoryUnmanaged>& import_lids,
 
  546    const Kokkos::View<const Packet*, BufferDevice>& imports,
 
  547    const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid,
 
  551  using Kokkos::atomic_fetch_add;
 
  552  using Kokkos::MemoryUnmanaged;
 
  553  using Kokkos::parallel_reduce;
 
  554  using Kokkos::subview;
 
  560  using execution_space = 
typename device_type::execution_space;
 
  561  using size_type       = 
typename Kokkos::View<LO*, device_type>::size_type;
 
  562  using slice           = 
typename Kokkos::pair<size_type, size_type>;
 
  563  using range_policy    = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_type>>;
 
  565  using pids_out_type = View<int*, device_type, MemoryUnmanaged>;
 
  566  using gids_out_type = View<GO*, device_type, MemoryUnmanaged>;
 
  568  const size_type num_import_lids = import_lids.size();
 
  569  const char prefix[]             = 
"UnpackAndCombineCrsGraphImpl::unpackAndCombineIntoCrsArrays2: ";
 
  574      "Unpack and combine into CRS",
 
  575      range_policy(0, num_import_lids),
 
  576      KOKKOS_LAMBDA(
const size_t i, 
int& err) {
 
  577        using atomic_incr_type            = 
typename std::remove_reference<
decltype(new_start_row(0))>::type;
 
  578        const size_t num_packets_this_lid = num_packets_per_lid(i);
 
  579        const size_t num_ent              = num_packets_this_lid / 2;
 
  580        const size_t offset               = offsets(i);
 
  581        const LO lcl_row                  = import_lids(i);
 
  582        const size_t start_row            = atomic_fetch_add(&new_start_row(lcl_row), atomic_incr_type(num_ent));
 
  583        const size_t end_row              = start_row + num_ent;
 
  585        gids_out_type gids_out = subview(tgt_colind, slice(start_row, end_row));
 
  586        pids_out_type pids_out = subview(tgt_pids, slice(start_row, end_row));
 
  588        err += 
unpackRow(gids_out, pids_out, imports, offset, num_ent);
 
  591        for (
size_t j = 0; j < static_cast<size_t>(num_ent); ++j) {
 
  592          const int pid = pids_out(j);
 
  593          pids_out(j)   = (pid != my_pid) ? pid : -1;
 
  598  TEUCHOS_TEST_FOR_EXCEPTION(gbl_err_count != 0,
 
  599                             std::invalid_argument, prefix << 
"Attempting to unpack PIDs, but num_ent is not even; this should never " 
  600                                                              "happen!  Please report this bug to the Tpetra developers.");
 
  605template <
class Packet, 
class LocalGraph, 
class LocalMap, 
class BufferDevice>
 
  607    const LocalGraph& local_graph,
 
  608    const LocalMap& local_col_map,
 
  611                       Kokkos::MemoryUnmanaged>& import_lids,
 
  612    const Kokkos::View<const Packet*, BufferDevice>& imports,
 
  613    const Kokkos::View<const size_t*, BufferDevice>& num_packets_per_lid,
 
  616                       Kokkos::MemoryUnmanaged>& permute_to_lids,
 
  619                       Kokkos::MemoryUnmanaged>& permute_from_lids,
 
  620    const Kokkos::View<
size_t*,
 
  622                       Kokkos::MemoryUnmanaged>& tgt_rowptr,
 
  625                       Kokkos::MemoryUnmanaged>& tgt_colind,
 
  626    const Kokkos::View<
const int*,
 
  628                       Kokkos::MemoryUnmanaged>& src_pids,
 
  629    const Kokkos::View<
int*,
 
  631                       Kokkos::MemoryUnmanaged>& tgt_pids,
 
  632    const size_t num_same_ids,
 
  633    const size_t tgt_num_rows,
 
  634    const size_t tgt_num_nonzeros,
 
  635    const int my_tgt_pid) {
 
  636  using Kokkos::MemoryUnmanaged;
 
  637  using Kokkos::parallel_for;
 
  638  using Kokkos::subview;
 
  640  using packet_type        = Packet;
 
  641  using local_map_type     = LocalMap;
 
  642  using local_graph_type   = LocalGraph;
 
  643  using buffer_device_type = BufferDevice;
 
  646  using execution_space    = 
typename device_type::execution_space;
 
  647  using size_type          = 
typename Kokkos::View<LO*, device_type>::size_type;
 
  648  using range_policy       = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<size_t>>;
 
  650  const char prefix[] = 
"UnpackAndCombineCrsGraphImpl::unpackAndCombineIntoCrsArrays: ";
 
  652  const size_t N     = tgt_num_rows;
 
  653  const size_t mynnz = tgt_num_nonzeros;
 
  657  const int my_pid = my_tgt_pid;
 
  666      range_policy(0, N + 1),
 
  667      KOKKOS_LAMBDA(
const size_t i) {
 
  673      range_policy(0, num_same_ids),
 
  674      KOKKOS_LAMBDA(
const size_t i) {
 
  675        const LO tgt_lid    = 
static_cast<LO
>(i);
 
  676        const LO src_lid    = 
static_cast<LO
>(i);
 
  677        tgt_rowptr(tgt_lid) = local_graph.row_map(src_lid + 1) - local_graph.row_map(src_lid);
 
  681  const size_type num_permute_to_lids = permute_to_lids.extent(0);
 
  683      range_policy(0, num_permute_to_lids),
 
  684      KOKKOS_LAMBDA(
const size_t i) {
 
  685        const LO tgt_lid    = permute_to_lids(i);
 
  686        const LO src_lid    = permute_from_lids(i);
 
  687        tgt_rowptr(tgt_lid) = local_graph.row_map(src_lid + 1) - local_graph.row_map(src_lid);
 
  691  const size_type num_import_lids = import_lids.extent(0);
 
  692  View<size_t*, device_type> offsets(
"offsets", num_import_lids + 1);
 
  695#ifdef HAVE_TPETRA_DEBUG 
  697    auto nth_offset_h = getEntryOnHost(offsets, num_import_lids);
 
  698    const bool condition =
 
  699        nth_offset_h != 
static_cast<size_t>(imports.extent(0));
 
  700    TEUCHOS_TEST_FOR_EXCEPTION(condition, std::logic_error, prefix << 
"The final offset in bytes " << nth_offset_h << 
" != imports.size() = " << imports.extent(0) << 
".  Please report this bug to the Tpetra developers.");
 
  705  setupRowPointersForRemotes<packet_type, LO, device_type, buffer_device_type>(
 
  706      tgt_rowptr, import_lids, imports, num_packets_per_lid);
 
  710  View<size_t*, device_type> new_start_row(
"new_start_row", N + 1);
 
  713  makeCrsRowPtrFromLengths(tgt_rowptr, new_start_row);
 
  715    auto nth_tgt_rowptr_h = getEntryOnHost(tgt_rowptr, N);
 
  716    bool condition        = nth_tgt_rowptr_h != mynnz;
 
  717    TEUCHOS_TEST_FOR_EXCEPTION(condition, std::invalid_argument,
 
  718                               prefix << 
"CRS_rowptr[last] = " << nth_tgt_rowptr_h << 
"!= mynnz = " << mynnz << 
".");
 
  722  copyDataFromSameIDs<LocalGraph, LocalMap>(tgt_colind, tgt_pids, new_start_row,
 
  723                                            tgt_rowptr, src_pids, local_graph, local_col_map, num_same_ids, my_pid);
 
  725  copyDataFromPermuteIDs<LocalGraph, LocalMap>(tgt_colind, tgt_pids, new_start_row,
 
  726                                               tgt_rowptr, src_pids, permute_to_lids, permute_from_lids,
 
  727                                               local_graph, local_col_map, my_pid);
 
  729  if (imports.extent(0) <= 0) {
 
  733  unpackAndCombineIntoCrsArrays2<
 
  734      packet_type, local_graph_type, local_map_type, buffer_device_type>(
 
  735      tgt_colind, tgt_pids, new_start_row, offsets, import_lids, imports,
 
  736      num_packets_per_lid, local_graph, local_col_map, my_pid);
 
  790template <
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  794    const Teuchos::ArrayView<const LocalOrdinal>& 
importLIDs,
 
  802  using Kokkos::MemoryUnmanaged;
 
  804  using device_type             = 
typename Node::device_type;
 
  808  const char prefix[]           = 
"unpackAndCombineWithOwningPIDsCount: ";
 
  812                                                                                            "permuteFromLIDs.size() = " 
  818                                                                               "CrsGraph 'sourceGraph' must be locally indexed.");
 
  821                                                                                      "numPacketsPerLID.size() = " 
  824  auto local_graph = 
sourceGraph.getLocalGraphDevice();
 
  829                                             "permute_from_lids");
 
  833                                             imports.size(), 
true,
 
  839                                             "num_packets_per_lid");
 
  841  return UnpackAndCombineCrsGraphImpl::unpackAndCombineWithOwningPIDsCount<
 
  842      packet_type, local_graph_device_type, buffer_device_type>(
 
 
  859template <
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  862    const Teuchos::ArrayView<const LocalOrdinal>& 
importLIDs,
 
  867    const size_t numSameIDs,
 
  874    const Teuchos::ArrayView<GlobalOrdinal>& 
CRS_colind,
 
  875    const Teuchos::ArrayView<const int>& 
SourcePids,
 
  877  using Kokkos::deep_copy;
 
  879  using Teuchos::outArg;
 
  880  using Teuchos::REDUCE_MAX;
 
  881  using Teuchos::reduceAll;
 
  885  using packet_type             = 
typename crs_graph_type::packet_type;
 
  886  using local_graph_device_type = 
typename crs_graph_type::local_graph_device_type;
 
  887  using buffer_device_type      = 
typename crs_graph_type::buffer_device_type;
 
  888  using device_type             = 
typename Node::device_type;
 
  889  using size_type               = 
typename Teuchos::ArrayView<const LO>::size_type;
 
  891  const char prefix[] = 
"Tpetra::Details::unpackAndCombineIntoCrsArrays: ";
 
  906                                                           "numPacketsPerLID.size() = " 
  916  auto local_graph   = 
sourceGraph.getLocalGraphDevice();
 
  917  auto local_col_map = 
sourceGraph.getColMap()->getLocalMap();
 
  927  Kokkos::View<const packet_type*, buffer_device_type> 
imports_d =
 
  929                                             imports.size(), 
true, 
"imports");
 
  934                                             true, 
"num_packets_per_lid");
 
  939                                             true, 
"permute_to_lids");
 
  944                                             true, 
"permute_from_lids");
 
  956  Kokkos::View<const int*, device_type> 
src_pids_d =
 
  966  using local_map_type = 
decltype(local_col_map);
 
  967  UnpackAndCombineCrsGraphImpl::unpackAndCombineIntoCrsArrays<
 
  968      packet_type, local_graph_device_type, local_map_type, buffer_device_type>(
 
 
  992#define TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_INSTANT(LO, GO, NT)                \ 
  994  Details::unpackAndCombineIntoCrsArrays<LO, GO, NT>(                              \ 
  995      const CrsGraph<LO, GO, NT>&,                                                 \ 
  996      const Teuchos::ArrayView<const LO>&,                                         \ 
  997      const Teuchos::ArrayView<const typename CrsGraph<LO, GO, NT>::packet_type>&, \ 
  998      const Teuchos::ArrayView<const size_t>&,                                     \ 
 1000      const CombineMode,                                                           \ 
 1002      const Teuchos::ArrayView<const LO>&,                                         \ 
 1003      const Teuchos::ArrayView<const LO>&,                                         \ 
 1007      const Teuchos::ArrayView<size_t>&,                                           \ 
 1008      const Teuchos::ArrayView<GO>&,                                               \ 
 1009      const Teuchos::ArrayView<const int>&,                                        \ 
 1010      Teuchos::Array<int>&);                                                       \ 
 1012  Details::unpackAndCombineWithOwningPIDsCount<LO, GO, NT>(                        \ 
 1013      const CrsGraph<LO, GO, NT>&,                                                 \ 
 1014      const Teuchos::ArrayView<const LO>&,                                         \ 
 1015      const Teuchos::ArrayView<const typename CrsGraph<LO, GO, NT>::packet_type>&, \ 
 1016      const Teuchos::ArrayView<const size_t>&,                                     \ 
 1020      const Teuchos::ArrayView<const LO>&,                                         \ 
 1021      const Teuchos::ArrayView<const LO>&); 
Declaration of the Tpetra::CrsGraph class.
 
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
 
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
 
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
 
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
 
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
 
Functions for manipulating CRS arrays.
 
Declaration and definition of Tpetra::Details::getEntryOnHost.
 
void unpackAndCombine(const RowView &row_ptrs_beg, const RowView &row_ptrs_end, IndicesView &indices, const Kokkos::View< const GlobalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &imports, const Kokkos::View< const size_t *, BufferDevice, Kokkos::MemoryUnmanaged > &num_packets_per_lid, const Kokkos::View< const LocalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &import_lids, const typename CrsGraph< LocalOrdinal, GlobalOrdinal, Node >::padding_type &padding, const bool unpack_pids, const int myRank, const bool verbose)
Perform the unpack operation for the graph.
 
KOKKOS_FUNCTION int unpackRow(const Kokkos::View< GO *, Device, Kokkos::MemoryUnmanaged > &gids_out, const Kokkos::View< int *, Device, Kokkos::MemoryUnmanaged > &pids_out, const Kokkos::View< const Packet *, BufferDevice > &imports, const size_t offset, const size_t num_ent)
Unpack a single row of a CrsGraph.
 
void setupRowPointersForRemotes(const Kokkos::View< size_t *, Device > &tgt_rowptr, const Kokkos::View< const LO *, BufferDevice > &import_lids, const Kokkos::View< const Packet *, BufferDevice > &, const Kokkos::View< const size_t *, BufferDevice > &num_packets_per_lid)
Setup row pointers for remotes.
 
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
 
KokkosSparse::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
 
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
 
Struct that holds views of the contents of a CrsMatrix.
 
LocalOrdinal local_ordinal_type
The type of local indices.
 
GlobalOrdinal global_ordinal_type
The type of global indices.
 
DeviceType device_type
The device type.
 
Unpacks and combines a single row of the CrsGraph.
 
Implementation details of Tpetra.
 
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries....
 
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
 
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
 
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
 
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
 
Namespace Tpetra contains the class and methods constituting the Tpetra library.
 
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
 
CombineMode
Rule for combining data in an Import or Export.