10#ifndef TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_DEF_HPP 
   11#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_DEF_HPP 
   15#include "TpetraCore_config.h" 
   16#include "Kokkos_Core.hpp" 
   17#include "Teuchos_Array.hpp" 
   18#include "Teuchos_ArrayView.hpp" 
   19#include "Teuchos_OrdinalTraits.hpp" 
   20#include "Teuchos_TimeMonitor.hpp" 
   28#include "Tpetra_Details_DefaultTypes.hpp" 
   59namespace UnpackAndCombineCrsMatrixImpl {
 
   70template <
class ST, 
class LO, 
class GO>
 
   79          const size_t bytes_per_value) {
 
   84  bool unpack_pids = 
pids_out.size() > 0;
 
  101  const char* 
const pids_in    = unpack_pids ? imports + 
pids_beg : 
nullptr;
 
  112    Kokkos::pair<int, size_t> 
p;
 
 
  151template <
class LocalMatrix, 
class LocalMap, 
class BufferDeviceType>
 
  156  typedef typename local_matrix_type::value_type ST;
 
  160  typedef typename DT::execution_space XS;
 
  162  typedef Kokkos::View<const size_t*, BufferDeviceType>
 
  163      num_packets_per_lid_type;
 
  164  typedef Kokkos::View<const size_t*, DT> offsets_type;
 
  165  typedef Kokkos::View<const char*, BufferDeviceType> input_buffer_type;
 
  166  typedef Kokkos::View<const LO*, BufferDeviceType> import_lids_type;
 
  168  typedef Kokkos::View<int, DT> error_type;
 
  169  using member_type = 
typename Kokkos::TeamPolicy<XS>::member_type;
 
  171  static_assert(std::is_same<LO, typename local_matrix_type::ordinal_type>::value,
 
  172                "LocalMap::local_ordinal_type and " 
  173                "LocalMatrix::ordinal_type must be the same.");
 
  177  input_buffer_type imports;
 
  178  num_packets_per_lid_type num_packets_per_lid;
 
  179  import_lids_type import_lids;
 
  180  Kokkos::View<const LO* [2], DT> batch_info;
 
  181  offsets_type offsets;
 
  184  size_t bytes_per_value;
 
  186  error_type error_code;
 
  211    , error_code(
"error") {}
 
  214  void operator()(member_type team_member)
 const {
 
  215    using Kokkos::MemoryUnmanaged;
 
  216    using Kokkos::subview;
 
  219    const LO 
batch    = team_member.league_rank();
 
  231    const size_t buf_size = imports.size();
 
  250          "*** Error: UnpackCrsMatrixAndCombineFunctor: " 
  251          "At row %d, the expected number of bytes (%d) != number of unpacked bytes (%d)\n",
 
  254      Kokkos::atomic_compare_exchange(error_code.data(), 0, 21);
 
  260          "*** Error: UnpackCrsMatrixAndCombineFunctor: " 
  261          "At row %d, the offset (%d) > buffer size (%d)\n",
 
  264      Kokkos::atomic_compare_exchange(error_code.data(), 0, 22);
 
  296          "*** Error: UnpackCrsMatrixAndCombineFunctor: " 
  297          "At row %d, number of entries (%d) != number of entries unpacked (%d)\n",
 
  300      Kokkos::atomic_compare_exchange(error_code.data(), 0, 23);
 
  306    Kokkos::parallel_for(
 
  308        [=, *
this](
const LO& 
j) {
 
  325          if (combine_mode == 
ADD) {
 
  330            (
void)local_matrix.sumIntoValues(
 
  337          } 
else if (combine_mode == 
REPLACE) {
 
  342            (
void)local_matrix.replaceValues(
 
  352                "*** Error: UnpackCrsMatrixAndCombineFunctor: " 
  353                "At row %d, an unknown error occurred during unpack\n",
 
  355            Kokkos::atomic_compare_exchange(error_code.data(), 0, 31);
 
  359    team_member.team_barrier();
 
  364    auto error_code_h = Kokkos::create_mirror_view_and_copy(
 
  365        Kokkos::HostSpace(), error_code);
 
 
 
  371struct MaxNumEntTag {};
 
  372struct TotNumEntTag {};
 
  382template <
class LO, 
class DT, 
class BDT>
 
  385  typedef Kokkos::View<const size_t*, BDT> num_packets_per_lid_type;
 
  386  typedef Kokkos::View<const size_t*, DT> offsets_type;
 
  387  typedef Kokkos::View<const char*, BDT> input_buffer_type;
 
  393  num_packets_per_lid_type num_packets_per_lid;
 
  394  offsets_type offsets;
 
  395  input_buffer_type imports;
 
  406  operator()(
const MaxNumEntTag, 
const LO 
i, 
value_type& update)
 const {
 
  408    const size_t num_bytes = num_packets_per_lid(
i);
 
  411      const char* 
const in_buf = imports.data() + offsets(
i);
 
  420  join(
const MaxNumEntTag,
 
  423    if (dst < src) dst = src;
 
  429    const size_t num_bytes = num_packets_per_lid(
i);
 
  432      const char* 
const in_buf = imports.data() + offsets(
i);
 
 
  446template <
class LO, 
class DT, 
class BDT>
 
  449    const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
 
  450    const Kokkos::View<const size_t*, DT>& offsets,
 
  451    const Kokkos::View<const char*, BDT>& imports) {
 
  452  typedef typename DT::execution_space XS;
 
  453  typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>,
 
  460      static_cast<LO
>(num_packets_per_lid.extent(0));
 
  461  size_t max_num_ent = 0;
 
  462  Kokkos::parallel_reduce(
"Max num entries in CRS",
 
 
  475template <
class LO, 
class DT, 
class BDT>
 
  478    const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
 
  479    const Kokkos::View<const size_t*, DT>& offsets,
 
  480    const Kokkos::View<const char*, BDT>& imports) {
 
  481  typedef typename DT::execution_space XS;
 
  482  typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>, TotNumEntTag> 
range_policy;
 
  487      static_cast<LO
>(num_packets_per_lid.extent(0));
 
  488  Kokkos::parallel_reduce(
"Total num entries in CRS to unpack",
 
 
  497    unpackRowCount(
const char imports[],
 
  511  return static_cast<size_t>(num_ent_LO);
 
  518template <
class View1, 
class View2>
 
  523  using LO     = 
typename View2::value_type;
 
  527      batch_info(
batch, 0) = 
static_cast<LO
>(
i);
 
  532  return batch == batch_info.extent(0);
 
 
  542template <
class LocalMatrix, 
class LocalMap, 
class BufferDeviceType>
 
  546    const Kokkos::View<const char*, BufferDeviceType>& imports,
 
  547    const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
 
  550  using ST = 
typename LocalMatrix::value_type;
 
  553  using XS = 
typename DT::execution_space;
 
  555      "Tpetra::Details::UnpackAndCombineCrsMatrixImpl::" 
  556      "unpackAndCombineIntoCrsMatrix: ";
 
  558  const size_t num_import_lids = 
static_cast<size_t>(import_lids.extent(0));
 
  567                               std::invalid_argument,
 
  568                               prefix << 
"ABSMAX combine mode is not yet implemented for a matrix that has a " 
  569                                         "static graph (i.e., was constructed with the CrsMatrix constructor " 
  570                                         "that takes a const CrsGraph pointer).");
 
  573                               std::invalid_argument,
 
  574                               prefix << 
"INSERT combine mode is not allowed if the matrix has a static graph " 
  575                                         "(i.e., was constructed with the CrsMatrix constructor that takes a " 
  576                                         "const CrsGraph pointer).");
 
  580                               std::invalid_argument,
 
  581                               prefix << 
"Invalid combine mode; should never get " 
  582                                         "here!  Please report this bug to the Tpetra developers.");
 
  588                               std::invalid_argument,
 
  590                                                                                     "numPacketsPerLID.size() (" 
  591                                      << num_packets_per_lid.extent(0) << 
").");
 
  605  Kokkos::View<LO* [2], DT> batch_info(
"", 
num_batches);
 
  608  Kokkos::parallel_reduce(
 
  609      Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t>>(0, 
num_import_lids),
 
  612            imports.data(), offsets(
i), num_packets_per_lid(
i));
 
  636  const bool atomic = XS().concurrency() != 1;
 
  651  using policy           = Kokkos::TeamPolicy<XS, Kokkos::IndexType<LO>>;
 
  653  if (!Spaces::is_gpu_exec_space<XS>() || 
team_size == Teuchos::OrdinalTraits<size_t>::invalid()) {
 
  659  auto error_code = 
f.error();
 
  663      prefix << 
"UnpackCrsMatrixAndCombineFunctor reported error code " << error_code);
 
 
  666template <
class LocalMatrix, 
class BufferDeviceType>
 
  672    const Kokkos::View<const char*, BufferDeviceType, void, void>& imports,
 
  673    const Kokkos::View<const size_t*, BufferDeviceType, void, void>& num_packets_per_lid,
 
  675    const Kokkos::View<const char*, BufferDeviceType>& imports,
 
  676    const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
 
  679  using Kokkos::parallel_reduce;
 
  680  typedef typename LocalMatrix::ordinal_type LO;
 
  681  typedef typename LocalMatrix::device_type device_type;
 
  682  typedef typename device_type::execution_space XS;
 
  683  typedef typename Kokkos::View<LO*, device_type>::size_type size_type;
 
  684  typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>> 
range_policy;
 
  697          update += 
static_cast<size_t>(local_matrix.graph.row_map[
lid + 1] - local_matrix.graph.row_map[
lid]);
 
  704  num_items = 
static_cast<LO
>(permute_from_lids.extent(0));
 
  708        range_policy(0, num_items),
 
  709        KOKKOS_LAMBDA(
const LO i, 
size_t& update) {
 
  710          const LO lid = permute_from_lids(i);
 
  711          update += 
static_cast<size_t>(local_matrix.graph.row_map[lid + 1] - local_matrix.graph.row_map[lid]);
 
  719    const size_type np = num_packets_per_lid.extent(0);
 
  720    Kokkos::View<size_t*, device_type> offsets(
"offsets", np + 1);
 
  723        compute_total_num_entries<LO, device_type, BDT>(num_packets_per_lid,
 
  731template <
class LO, 
class DT, 
class BDT>
 
  732int setupRowPointersForRemotes(
 
  735    const Kokkos::View<const char*, BDT>& imports,
 
  736    const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
 
  738  using Kokkos::parallel_reduce;
 
  739  typedef typename DT::execution_space XS;
 
  741  typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type>> 
range_policy;
 
  744  const size_type 
N       = num_packets_per_lid.extent(0);
 
  748      "Setup row pointers for remotes",
 
  752        const size_t num_bytes = num_packets_per_lid(
i);
 
  753        const size_t offset    = offsets(
i);
 
 
  766void makeCrsRowPtrFromLengths(
 
  769  using Kokkos::parallel_scan;
 
  770  typedef typename DT::execution_space XS;
 
  771  typedef typename Kokkos::View<size_t*, DT>::size_type size_type;
 
  772  typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type>> 
range_policy;
 
  776      KOKKOS_LAMBDA(
const size_t& 
i, 
size_t& update, 
const bool& 
final) {
 
  786template <
class LocalMatrix, 
class LocalMap>
 
  787void copyDataFromSameIDs(
 
  789    const typename PackTraits<int>::output_array_type& tgt_pids,
 
  791    const Kokkos::View<size_t*, typename LocalMap::device_type>& new_start_row,
 
  793    const typename PackTraits<int>::input_array_type& src_pids,
 
  794    const LocalMatrix& local_matrix,
 
  795    const LocalMap& local_col_map,
 
  796    const size_t num_same_ids,
 
  798  using Kokkos::parallel_for;
 
  801  typedef typename DT::execution_space XS;
 
  802  typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t>> range_policy;
 
  805      range_policy(0, num_same_ids),
 
  806      KOKKOS_LAMBDA(
const size_t i) {
 
  807        typedef typename std::remove_reference<
decltype(new_start_row(0))>::type atomic_incr_type;
 
  809        const LO src_lid = 
static_cast<LO
>(i);
 
  810        size_t src_row   = local_matrix.graph.row_map(src_lid);
 
  812        const LO tgt_lid     = 
static_cast<LO
>(i);
 
  813        const size_t tgt_row = tgt_rowptr(tgt_lid);
 
  815        const size_t nsr = local_matrix.graph.row_map(src_lid + 1) - local_matrix.graph.row_map(src_lid);
 
  816        Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
 
  818        for (
size_t j = local_matrix.graph.row_map(src_lid);
 
  819             j < local_matrix.graph.row_map(src_lid + 1); ++j) {
 
  820          LO src_col                        = local_matrix.graph.entries(j);
 
  821          tgt_vals(tgt_row + j - src_row)   = local_matrix.values(j);
 
  822          tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
 
  823          tgt_pids(tgt_row + j - src_row)   = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
 
  828template <
class LocalMatrix, 
class LocalMap>
 
  829void copyDataFromPermuteIDs(
 
  831    const typename PackTraits<int>::output_array_type& tgt_pids,
 
  833    const Kokkos::View<size_t*, typename LocalMap::device_type>& new_start_row,
 
  835    const typename PackTraits<int>::input_array_type& src_pids,
 
  838    const LocalMatrix& local_matrix,
 
  839    const LocalMap& local_col_map,
 
  841  using Kokkos::parallel_for;
 
  844  typedef typename DT::execution_space XS;
 
  845  typedef typename PackTraits<LO>::input_array_type::size_type size_type;
 
  846  typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type>> range_policy;
 
  848  const size_type num_permute_to_lids = permute_to_lids.extent(0);
 
  851      range_policy(0, num_permute_to_lids),
 
  852      KOKKOS_LAMBDA(
const size_t i) {
 
  853        typedef typename std::remove_reference<
decltype(new_start_row(0))>::type atomic_incr_type;
 
  855        const LO src_lid     = permute_from_lids(i);
 
  856        const size_t src_row = local_matrix.graph.row_map(src_lid);
 
  858        const LO tgt_lid     = permute_to_lids(i);
 
  859        const size_t tgt_row = tgt_rowptr(tgt_lid);
 
  861        size_t nsr = local_matrix.graph.row_map(src_lid + 1) - local_matrix.graph.row_map(src_lid);
 
  862        Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
 
  864        for (
size_t j = local_matrix.graph.row_map(src_lid);
 
  865             j < local_matrix.graph.row_map(src_lid + 1); ++j) {
 
  866          LO src_col                        = local_matrix.graph.entries(j);
 
  867          tgt_vals(tgt_row + j - src_row)   = local_matrix.values(j);
 
  868          tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
 
  869          tgt_pids(tgt_row + j - src_row)   = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
 
  874template <
typename LocalMatrix, 
typename LocalMap, 
typename BufferDeviceType>
 
  875int unpackAndCombineIntoCrsArrays2(
 
  877    const typename PackTraits<int>::output_array_type& tgt_pids,
 
  879    const Kokkos::View<size_t*, typename LocalMap::device_type>& new_start_row,
 
  882#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
 
  883    const Kokkos::View<const char*, BufferDeviceType, void, void>& imports,
 
  884    const Kokkos::View<const size_t*, BufferDeviceType, void, void>& num_packets_per_lid,
 
  886    const Kokkos::View<const char*, BufferDeviceType>& imports,
 
  887    const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
 
  892    const size_t bytes_per_value) {
 
  893  using Kokkos::atomic_fetch_add;
 
  894  using Kokkos::MemoryUnmanaged;
 
  895  using Kokkos::parallel_reduce;
 
  896  using Kokkos::subview;
 
  902  typedef typename LocalMatrix::value_type ST;
 
  903  typedef typename DT::execution_space XS;
 
  904  typedef typename Kokkos::View<LO*, DT>::size_type size_type;
 
  905  typedef typename Kokkos::pair<size_type, size_type> slice;
 
  906  typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type>> range_policy;
 
  908  typedef View<int*, DT, MemoryUnmanaged> pids_out_type;
 
  909  typedef View<GO*, DT, MemoryUnmanaged> gids_out_type;
 
  910  typedef View<ST*, DT, MemoryUnmanaged> vals_out_type;
 
  912  const size_t InvalidNum = OrdinalTraits<size_t>::invalid();
 
  915  const size_type num_import_lids = import_lids.size();
 
  919      "Unpack and combine into CRS",
 
  920      range_policy(0, num_import_lids),
 
  921      KOKKOS_LAMBDA(
const size_t i, 
int& k_error) {
 
  922        typedef typename std::remove_reference<
decltype(new_start_row(0))>::type atomic_incr_type;
 
  923        const size_t num_bytes = num_packets_per_lid(i);
 
  924        const size_t offset    = offsets(i);
 
  925        if (num_bytes == 0) {
 
  929        size_t num_ent = unpackRowCount<LO>(imports.data(), offset, num_bytes);
 
  930        if (num_ent == InvalidNum) {
 
  934        const LO lcl_row       = import_lids(i);
 
  935        const size_t start_row = atomic_fetch_add(&new_start_row(lcl_row), atomic_incr_type(num_ent));
 
  936        const size_t end_row   = start_row + num_ent;
 
  938        gids_out_type gids_out = subview(tgt_colind, slice(start_row, end_row));
 
  939        vals_out_type vals_out = subview(tgt_vals, slice(start_row, end_row));
 
  940        pids_out_type pids_out = subview(tgt_pids, slice(start_row, end_row));
 
  942        k_error += unpackRow<ST, LO, GO>(gids_out, pids_out, vals_out,
 
  943                                         imports.data(), offset, num_bytes,
 
  944                                         num_ent, bytes_per_value);
 
  947        for (
size_t j = 0; j < static_cast<size_t>(num_ent); ++j) {
 
  948          const int pid = pids_out(j);
 
  949          pids_out(j)   = (pid != my_pid) ? pid : -1;
 
  957template <
typename LocalMatrix, 
typename LocalMap, 
typename BufferDeviceType>
 
  959    const LocalMatrix& local_matrix,
 
  960    const LocalMap& local_col_map,
 
  962#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
 
  963    const Kokkos::View<const char*, BufferDeviceType, void, void>& imports,
 
  964    const Kokkos::View<const size_t*, BufferDeviceType, void, void>& num_packets_per_lid,
 
  966    const Kokkos::View<const char*, BufferDeviceType>& imports,
 
  967    const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
 
  974    const typename PackTraits<int>::input_array_type& src_pids,
 
  975    const typename PackTraits<int>::output_array_type& tgt_pids,
 
  976    const size_t num_same_ids,
 
  977    const size_t tgt_num_rows,
 
  978    const size_t tgt_num_nonzeros,
 
  979    const int my_tgt_pid,
 
  980    const size_t bytes_per_value) {
 
  981  using Kokkos::MemoryUnmanaged;
 
  982  using Kokkos::parallel_for;
 
  983  using Kokkos::subview;
 
  988  typedef typename DT::execution_space XS;
 
  989  typedef typename Kokkos::View<LO*, DT>::size_type size_type;
 
  990  typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t>> range_policy;
 
  991  typedef BufferDeviceType BDT;
 
  993  const char prefix[] = 
"unpackAndCombineIntoCrsArrays: ";
 
  995  const size_t N = tgt_num_rows;
 
  999  const int my_pid = my_tgt_pid;
 
 1003      range_policy(0, N + 1),
 
 1004      KOKKOS_LAMBDA(
const size_t i) {
 
 1010      range_policy(0, num_same_ids),
 
 1011      KOKKOS_LAMBDA(
const size_t i) {
 
 1012        const LO tgt_lid    = 
static_cast<LO
>(i);
 
 1013        const LO src_lid    = 
static_cast<LO
>(i);
 
 1014        tgt_rowptr(tgt_lid) = local_matrix.graph.row_map(src_lid + 1) - local_matrix.graph.row_map(src_lid);
 
 1018  const size_type num_permute_to_lids = permute_to_lids.extent(0);
 
 1020      range_policy(0, num_permute_to_lids),
 
 1021      KOKKOS_LAMBDA(
const size_t i) {
 
 1022        const LO tgt_lid    = permute_to_lids(i);
 
 1023        const LO src_lid    = permute_from_lids(i);
 
 1024        tgt_rowptr(tgt_lid) = local_matrix.graph.row_map(src_lid + 1) - local_matrix.graph.row_map(src_lid);
 
 1028  const size_type num_import_lids = import_lids.extent(0);
 
 1029  View<size_t*, DT> offsets(
"offsets", num_import_lids + 1);
 
 1032#ifdef HAVE_TPETRA_DEBUG 
 1034    auto nth_offset_h = getEntryOnHost(offsets, num_import_lids);
 
 1035    const bool condition =
 
 1036        nth_offset_h != 
static_cast<size_t>(imports.extent(0));
 
 1037    TEUCHOS_TEST_FOR_EXCEPTION(condition, std::logic_error, prefix << 
"The final offset in bytes " << nth_offset_h << 
" != imports.size() = " << imports.extent(0) << 
".  Please report this bug to the Tpetra developers.");
 
 1043      setupRowPointersForRemotes<LO, DT, BDT>(tgt_rowptr,
 
 1044                                              import_lids, imports, num_packets_per_lid, offsets);
 
 1045  TEUCHOS_TEST_FOR_EXCEPTION(k_error != 0, std::logic_error, prefix << 
" Error transferring data to target row pointers.  " 
 1046                                                                       "Please report this bug to the Tpetra developers.");
 
 1050  View<size_t*, DT> new_start_row(
"new_start_row", N + 1);
 
 1053  makeCrsRowPtrFromLengths(tgt_rowptr, new_start_row);
 
 1056  copyDataFromSameIDs(tgt_colind, tgt_pids, tgt_vals, new_start_row,
 
 1057                      tgt_rowptr, src_pids, local_matrix, local_col_map, num_same_ids, my_pid);
 
 1059  copyDataFromPermuteIDs(tgt_colind, tgt_pids, tgt_vals, new_start_row,
 
 1060                         tgt_rowptr, src_pids, permute_to_lids, permute_from_lids,
 
 1061                         local_matrix, local_col_map, my_pid);
 
 1063  if (imports.extent(0) <= 0) {
 
 1067  int unpack_err = unpackAndCombineIntoCrsArrays2(tgt_colind, tgt_pids,
 
 1068                                                  tgt_vals, new_start_row, offsets, import_lids, imports, num_packets_per_lid,
 
 1069                                                  local_matrix, local_col_map, my_pid, bytes_per_value);
 
 1070  TEUCHOS_TEST_FOR_EXCEPTION(
 
 1071      unpack_err != 0, std::logic_error, prefix << 
"unpack loop failed.  This " 
 1072                                                   "should never happen.  Please report this bug to the Tpetra developers.");
 
 1118template <
typename ST, 
typename LO, 
typename GO, 
typename Node>
 
 1121    const Teuchos::ArrayView<const char>& imports,
 
 1123    const Teuchos::ArrayView<const LO>& 
importLIDs,
 
 1127  typedef typename Node::device_type device_type;
 
 1129  static_assert(std::is_same<device_type, typename local_matrix_device_type::device_type>::value,
 
 1130                "Node::device_type and LocalMatrix::device_type must be the same.");
 
 1148                                             imports.size(), 
true, 
"imports");
 
 1150  auto local_matrix  = 
sourceMatrix.getLocalMatrixDevice();
 
 1151  auto local_col_map = 
sourceMatrix.getColMap()->getLocalMap();
 
 1162  UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix(
 
 
 1167template <
typename ST, 
typename LO, 
typename GO, 
typename NT>
 
 1168void unpackCrsMatrixAndCombineNew(
 
 1170    Kokkos::DualView<
char*,
 
 1173    Kokkos::DualView<
size_t*,
 
 1176    const Kokkos::DualView<
const LO*,
 
 1183  using device_type              = 
typename crs_matrix_type::device_type;
 
 1184  using local_matrix_device_type = 
typename crs_matrix_type::local_matrix_device_type;
 
 1185  using buffer_device_type       = 
typename dist_object_type::buffer_device_type;
 
 1187  static_assert(std::is_same<device_type, typename local_matrix_device_type::device_type>::value,
 
 1188                "crs_matrix_type::device_type and local_matrix_device_type::device_type " 
 1189                "must be the same.");
 
 1194  auto num_packets_per_lid_d = numPacketsPerLID.view_device();
 
 1196  TEUCHOS_ASSERT(!importLIDs.need_sync_device());
 
 1197  auto import_lids_d = importLIDs.view_device();
 
 1199  if (imports.need_sync_device()) {
 
 1200    imports.sync_device();
 
 1202  auto imports_d = imports.view_device();
 
 1204  auto local_matrix  = sourceMatrix.getLocalMatrixDevice();
 
 1205  auto local_col_map = sourceMatrix.getColMap()->getLocalMap();
 
 1206  typedef decltype(local_col_map) local_map_type;
 
 1208  UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix<
 
 1209      local_matrix_device_type,
 
 1211      buffer_device_type>(local_matrix, local_col_map, imports_d, num_packets_per_lid_d,
 
 1212                          import_lids_d, combineMode);
 
 1270template <
typename Scalar, 
typename LocalOrdinal, 
typename GlobalOrdinal, 
typename Node>
 
 1274    const Teuchos::ArrayView<const LocalOrdinal>& 
importLIDs,
 
 1275    const Teuchos::ArrayView<const char>& imports,
 
 1280    const Teuchos::ArrayView<const LocalOrdinal>& 
permuteToLIDs,
 
 1282  using Kokkos::MemoryUnmanaged;
 
 1284  typedef typename Node::device_type DT;
 
 1285  const char prefix[] = 
"unpackAndCombineWithOwningPIDsCount: ";
 
 1289                                                                                            "permuteFromLIDs.size() = " 
 1295                                                                               "CrsMatrix 'sourceMatrix' must be locally indexed.");
 
 1298                                                                                      "numPacketsPerLID.size() = " 
 1301  auto local_matrix = 
sourceMatrix.getLocalMatrixDevice();
 
 1303  using kokkos_device_type = Kokkos::Device<
typename Node::device_type::execution_space,
 
 1304                                            Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>;
 
 1306#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 
 1314                                             "permute_from_lids");
 
 1316#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 
 1317  Kokkos::View<const char*, kokkos_device_type, void, void> 
imports_d =
 
 1319  Kokkos::View<const char*, kokkos_device_type> 
imports_d =
 
 1322                                             imports.getRawPtr(),
 
 1323                                             imports.size(), 
true,
 
 1326#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 
 1334                                             "num_packets_per_lid");
 
 1336  return UnpackAndCombineCrsMatrixImpl::unpackAndCombineWithOwningPIDsCount(
 
 
 1356template <
typename Scalar, 
typename LocalOrdinal, 
typename GlobalOrdinal, 
typename Node>
 
 1360                       Kokkos::Device<
typename Node::device_type::execution_space,
 
 1361                                      Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
 
 1368    const Kokkos::View<
const char*,
 
 1369                       Kokkos::Device<
typename Node::device_type::execution_space,
 
 1370                                      Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
 
 1377    const Kokkos::View<
const size_t*,
 
 1378                       Kokkos::Device<
typename Node::device_type::execution_space,
 
 1379                                      Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
 
 1386    const size_t numSameIDs,
 
 1388                       Kokkos::Device<
typename Node::device_type::execution_space,
 
 1389                                      Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
 
 1397                       Kokkos::Device<
typename Node::device_type::execution_space,
 
 1398                                      Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
 
 1407    Kokkos::View<size_t*, typename Node::device_type>& 
crs_rowptr_d,
 
 1408    Kokkos::View<GlobalOrdinal*, typename Node::device_type>& 
crs_colind_d,
 
 1410    const Teuchos::ArrayView<const int>& 
SourcePids,
 
 1411    Kokkos::View<int*, typename Node::device_type>& 
TargetPids) {
 
 1412  using execution_space = 
typename Node::execution_space;
 
 1415  using Kokkos::deep_copy;
 
 1418  using Teuchos::ArrayView;
 
 1419  using Teuchos::outArg;
 
 1420  using Teuchos::REDUCE_MAX;
 
 1421  using Teuchos::reduceAll;
 
 1423  typedef typename Node::device_type DT;
 
 1426  typedef typename matrix_type::impl_scalar_type ST;
 
 1428  const char prefix[] = 
"Tpetra::Details::unpackAndCombineIntoCrsArrays_new: ";
 
 1429#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1430  using Teuchos::TimeMonitor;
 
 1431  Teuchos::RCP<TimeMonitor> tm;
 
 1434  using Kokkos::MemoryUnmanaged;
 
 1438                                                                                                    "permute_from_lids_d.size() = " 
 1444                                                                               "CrsMatrix 'sourceMatrix' must be locally indexed.");
 
 1447                                                                                            "num_packets_per_lid_d.size() = " 
 1450  auto local_matrix = 
sourceMatrix.getLocalMatrixDevice();
 
 1453#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1454  tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"unpackAndCombineWithOwningPIDsCount"))));
 
 1457      UnpackAndCombineCrsMatrixImpl::unpackAndCombineWithOwningPIDsCount(
 
 1460#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1464#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1465  tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"resize CRS pointers"))));
 
 1470#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1482  Kokkos::deep_copy(execution_space(), 
TargetPids, -1);
 
 1485  auto local_col_map = 
sourceMatrix.getColMap()->getLocalMap();
 
 1487#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1488  tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"create mirror views from inputs"))));
 
 1497#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1501  size_t bytes_per_value = 0;
 
 1515    if (local_matrix.values.extent(0) > 0) {
 
 1516      const ST& 
val     = local_matrix.values(0);
 
 1522    Teuchos::reduceAll<int, size_t>(*(
sourceMatrix.getComm()),
 
 1523                                    Teuchos::REDUCE_MAX,
 
 1525                                    outArg(bytes_per_value));
 
 1528#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1529  tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"unpackAndCombineIntoCrsArrays"))));
 
 1531  UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsArrays(
 
 1537#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1542#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1543  tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"copy back to host"))));
 
 1546  Kokkos::parallel_for(
 
 1547      "setLocalEntriesToPID", Kokkos::RangePolicy<typename DT::execution_space>(0, 
TargetPids.size()), 
KOKKOS_LAMBDA(
const size_t i) {
 
 
 1553template <
typename Scalar, 
typename LocalOrdinal, 
typename GlobalOrdinal, 
typename Node>
 
 1557                       Kokkos::Device<
typename Node::device_type::execution_space,
 
 1558                                      Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
 
 1565    const Kokkos::View<
const char*,
 
 1566                       Kokkos::Device<
typename Node::device_type::execution_space,
 
 1567                                      Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
 
 1574    const Kokkos::View<
const size_t*,
 
 1575                       Kokkos::Device<
typename Node::device_type::execution_space,
 
 1576                                      Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
 
 1583    const size_t numSameIDs,
 
 1585                       Kokkos::Device<
typename Node::device_type::execution_space,
 
 1586                                      Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
 
 1594                       Kokkos::Device<
typename Node::device_type::execution_space,
 
 1595                                      Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
 
 1605    Teuchos::ArrayRCP<GlobalOrdinal>& 
CRS_colind,
 
 1606    Teuchos::ArrayRCP<Scalar>& 
CRS_vals,
 
 1607    const Teuchos::ArrayView<const int>& 
SourcePids,
 
 1609  using execution_space = 
typename Node::execution_space;
 
 1612  using Kokkos::deep_copy;
 
 1615  using Teuchos::ArrayView;
 
 1616  using Teuchos::outArg;
 
 1617  using Teuchos::REDUCE_MAX;
 
 1618  using Teuchos::reduceAll;
 
 1620  typedef typename Node::device_type DT;
 
 1623  typedef typename matrix_type::impl_scalar_type ST;
 
 1625  const char prefix[] = 
"Tpetra::Details::unpackAndCombineIntoCrsArrays_new: ";
 
 1626#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1627  using Teuchos::TimeMonitor;
 
 1628  Teuchos::RCP<TimeMonitor> tm;
 
 1631  using Kokkos::MemoryUnmanaged;
 
 1635                                                                                                    "permute_from_lids_d.size() = " 
 1641                                                                               "CrsMatrix 'sourceMatrix' must be locally indexed.");
 
 1644                                                                                            "num_packets_per_lid_d.size() = " 
 1647  auto local_matrix = 
sourceMatrix.getLocalMatrixDevice();
 
 1650#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1651  tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"unpackAndCombineWithOwningPIDsCount"))));
 
 1654      UnpackAndCombineCrsMatrixImpl::unpackAndCombineWithOwningPIDsCount(
 
 1657#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1661#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1662  tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"resize CRS pointers"))));
 
 1668#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1684  auto local_col_map = 
sourceMatrix.getColMap()->getLocalMap();
 
 1686#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1687  tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"create mirror views from inputs"))));
 
 1699#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE 
 1700  static_assert(!std::is_same<
 
 1701                    typename std::remove_const<
 
 1702                        typename std::decay<
 
 1704                    std::complex<double>>::value,
 
 1705                "CRS_vals::value_type is std::complex<double>; this should never happen" 
 1706                ", since std::complex does not work in Kokkos::View objects.");
 
 1713#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE 
 1714  static_assert(!std::is_same<
 
 1715                    typename decltype(
crs_vals_d)::non_const_value_type,
 
 1716                    std::complex<double>>::value,
 
 1717                "crs_vals_d::non_const_value_type is std::complex<double>; this should " 
 1718                "never happen, since std::complex does not work in Kokkos::View objects.");
 
 1729#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1733  size_t bytes_per_value = 0;
 
 1747    if (local_matrix.values.extent(0) > 0) {
 
 1748      const ST& 
val     = local_matrix.values(0);
 
 1754    Teuchos::reduceAll<int, size_t>(*(
sourceMatrix.getComm()),
 
 1755                                    Teuchos::REDUCE_MAX,
 
 1757                                    outArg(bytes_per_value));
 
 1760#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE 
 1761  static_assert(!std::is_same<
 
 1762                    typename decltype(
crs_vals_d)::non_const_value_type,
 
 1763                    std::complex<double>>::value,
 
 1764                "crs_vals_d::non_const_value_type is std::complex<double>; this should " 
 1765                "never happen, since std::complex does not work in Kokkos::View objects.");
 
 1768#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1769  tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"unpackAndCombineIntoCrsArrays"))));
 
 1771  UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsArrays(
 
 1777#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1782#ifdef HAVE_TPETRA_MMM_TIMINGS 
 1783  tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"copy back to host"))));
 
 
 1810#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT_KOKKOS_DEPRECATED_CODE_4_ON(ST, LO, GO, NT)                        \ 
 1812  Details::unpackCrsMatrixAndCombine<ST, LO, GO, NT>(                                                                       \ 
 1813      const CrsMatrix<ST, LO, GO, NT>&,                                                                                     \ 
 1814      const Teuchos::ArrayView<const char>&,                                                                                \ 
 1815      const Teuchos::ArrayView<const size_t>&,                                                                              \ 
 1816      const Teuchos::ArrayView<const LO>&,                                                                                  \ 
 1820  Details::unpackAndCombineWithOwningPIDsCount<ST, LO, GO, NT>(                                                             \ 
 1821      const CrsMatrix<ST, LO, GO, NT>&,                                                                                     \ 
 1822      const Teuchos::ArrayView<const LO>&,                                                                                  \ 
 1823      const Teuchos::ArrayView<const char>&,                                                                                \ 
 1824      const Teuchos::ArrayView<const size_t>&,                                                                              \ 
 1828      const Teuchos::ArrayView<const LO>&,                                                                                  \ 
 1829      const Teuchos::ArrayView<const LO>&);                                                                                 \ 
 1831  Details::unpackCrsMatrixAndCombineNew<ST, LO, GO, NT>(                                                                    \ 
 1832      const CrsMatrix<ST, LO, GO, NT>&,                                                                                     \ 
 1833      Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>,                                   \ 
 1834      Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>,                                 \ 
 1835      const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>&,                        \ 
 1837      const CombineMode);                                                                                                   \ 
 1839  Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT>(                                                                   \ 
 1840      const CrsMatrix<ST, LO, GO, NT>&,                                                                                     \ 
 1841      const Kokkos::View<LO const*,                                                                                         \ 
 1842                         Kokkos::Device<typename NT::device_type::execution_space,                                          \ 
 1843                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \ 
 1845      const Kokkos::View<const char*,                                                                                       \ 
 1846                         Kokkos::Device<typename NT::device_type::execution_space,                                          \ 
 1847                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \ 
 1849      const Kokkos::View<const size_t*,                                                                                     \ 
 1850                         Kokkos::Device<typename NT::device_type::execution_space,                                          \ 
 1851                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \ 
 1854      const Kokkos::View<LO const*,                                                                                         \ 
 1855                         Kokkos::Device<typename NT::device_type::execution_space,                                          \ 
 1856                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \ 
 1858      const Kokkos::View<LO const*,                                                                                         \ 
 1859                         Kokkos::Device<typename NT::device_type::execution_space,                                          \ 
 1860                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \ 
 1864      Kokkos::View<size_t*, typename NT::device_type>&,                                                                     \ 
 1865      Kokkos::View<GO*, typename NT::device_type>&,                                                                         \ 
 1866      Kokkos::View<typename CrsMatrix<ST, LO, GO, NT>::impl_scalar_type*, typename NT::device_type>&,                       \ 
 1867      const Teuchos::ArrayView<const int>&,                                                                                 \ 
 1868      Kokkos::View<int*, typename NT::device_type>&);                                                                       \ 
 1870  Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT>(                                                                   \ 
 1871      const CrsMatrix<ST, LO, GO, NT>&,                                                                                     \ 
 1872      const Kokkos::View<LO const*,                                                                                         \ 
 1873                         Kokkos::Device<typename NT::device_type::execution_space,                                          \ 
 1874                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \ 
 1876      const Kokkos::View<const char*,                                                                                       \ 
 1877                         Kokkos::Device<typename NT::device_type::execution_space,                                          \ 
 1878                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \ 
 1880      const Kokkos::View<const size_t*,                                                                                     \ 
 1881                         Kokkos::Device<typename NT::device_type::execution_space,                                          \ 
 1882                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \ 
 1885      const Kokkos::View<LO const*,                                                                                         \ 
 1886                         Kokkos::Device<typename NT::device_type::execution_space,                                          \ 
 1887                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \ 
 1889      const Kokkos::View<LO const*,                                                                                         \ 
 1890                         Kokkos::Device<typename NT::device_type::execution_space,                                          \ 
 1891                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \ 
 1895      Teuchos::ArrayRCP<size_t>&,                                                                                           \ 
 1896      Teuchos::ArrayRCP<GO>&,                                                                                               \ 
 1897      Teuchos::ArrayRCP<ST>&,                                                                                               \ 
 1898      const Teuchos::ArrayView<const int>&,                                                                                 \ 
 1899      Teuchos::Array<int>&); 
 1901#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT_KOKKOS_DEPRECATED_CODE_4_OFF(ST, LO, GO, NT)                        \ 
 1903  Details::unpackCrsMatrixAndCombine<ST, LO, GO, NT>(                                                                        \ 
 1904      const CrsMatrix<ST, LO, GO, NT>&,                                                                                      \ 
 1905      const Teuchos::ArrayView<const char>&,                                                                                 \ 
 1906      const Teuchos::ArrayView<const size_t>&,                                                                               \ 
 1907      const Teuchos::ArrayView<const LO>&,                                                                                   \ 
 1911  Details::unpackAndCombineWithOwningPIDsCount<ST, LO, GO, NT>(                                                              \ 
 1912      const CrsMatrix<ST, LO, GO, NT>&,                                                                                      \ 
 1913      const Teuchos::ArrayView<const LO>&,                                                                                   \ 
 1914      const Teuchos::ArrayView<const char>&,                                                                                 \ 
 1915      const Teuchos::ArrayView<const size_t>&,                                                                               \ 
 1919      const Teuchos::ArrayView<const LO>&,                                                                                   \ 
 1920      const Teuchos::ArrayView<const LO>&);                                                                                  \ 
 1922  Details::unpackCrsMatrixAndCombineNew<ST, LO, GO, NT>(                                                                     \ 
 1923      const CrsMatrix<ST, LO, GO, NT>&,                                                                                      \ 
 1924      Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>,                                    \ 
 1925      Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>,                                  \ 
 1926      const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>&,                         \ 
 1928      const CombineMode);                                                                                                    \ 
 1930  Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT>(                                                                    \ 
 1931      const CrsMatrix<ST, LO, GO, NT>&,                                                                                      \ 
 1932      const Kokkos::View<LO const*,                                                                                          \ 
 1933                         Kokkos::Device<typename NT::device_type::execution_space,                                           \ 
 1934                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \ 
 1935      const Kokkos::View<const char*,                                                                                        \ 
 1936                         Kokkos::Device<typename NT::device_type::execution_space,                                           \ 
 1937                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \ 
 1938      const Kokkos::View<const size_t*,                                                                                      \ 
 1939                         Kokkos::Device<typename NT::device_type::execution_space,                                           \ 
 1940                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \ 
 1942      const Kokkos::View<LO const*,                                                                                          \ 
 1943                         Kokkos::Device<typename NT::device_type::execution_space,                                           \ 
 1944                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \ 
 1945      const Kokkos::View<LO const*,                                                                                          \ 
 1946                         Kokkos::Device<typename NT::device_type::execution_space,                                           \ 
 1947                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \ 
 1950      Kokkos::View<size_t*, typename NT::device_type>&,                                                                      \ 
 1951      Kokkos::View<GO*, typename NT::device_type>&,                                                                          \ 
 1952      Kokkos::View<typename CrsMatrix<ST, LO, GO, NT>::impl_scalar_type*, typename NT::device_type>&,                        \ 
 1953      const Teuchos::ArrayView<const int>&,                                                                                  \ 
 1954      Kokkos::View<int*, typename NT::device_type>&);                                                                        \ 
 1956  Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT>(                                                                    \ 
 1957      const CrsMatrix<ST, LO, GO, NT>&,                                                                                      \ 
 1958      const Kokkos::View<LO const*,                                                                                          \ 
 1959                         Kokkos::Device<typename NT::device_type::execution_space,                                           \ 
 1960                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \ 
 1961      const Kokkos::View<const char*,                                                                                        \ 
 1962                         Kokkos::Device<typename NT::device_type::execution_space,                                           \ 
 1963                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \ 
 1964      const Kokkos::View<const size_t*,                                                                                      \ 
 1965                         Kokkos::Device<typename NT::device_type::execution_space,                                           \ 
 1966                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \ 
 1968      const Kokkos::View<LO const*,                                                                                          \ 
 1969                         Kokkos::Device<typename NT::device_type::execution_space,                                           \ 
 1970                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \ 
 1971      const Kokkos::View<LO const*,                                                                                          \ 
 1972                         Kokkos::Device<typename NT::device_type::execution_space,                                           \ 
 1973                                        Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \ 
 1976      Teuchos::ArrayRCP<size_t>&,                                                                                            \ 
 1977      Teuchos::ArrayRCP<GO>&,                                                                                                \ 
 1978      Teuchos::ArrayRCP<ST>&,                                                                                                \ 
 1979      const Teuchos::ArrayView<const int>&,                                                                                  \ 
 1980      Teuchos::Array<int>&); 
 1982#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 
 1983#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT(ST, LO, GO, NT) \ 
 1984  TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT_KOKKOS_DEPRECATED_CODE_4_ON(ST, LO, GO, NT) 
 1986#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT(ST, LO, GO, NT) \ 
 1987  TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT_KOKKOS_DEPRECATED_CODE_4_OFF(ST, LO, GO, NT) 
Declaration of the Tpetra::CrsMatrix class.
 
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
 
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
 
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
 
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
 
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
 
Declaration and definition of Tpetra::Details::getEntryOnHost.
 
size_t compute_total_num_entries(const Kokkos::View< const size_t *, BDT > &num_packets_per_lid, const Kokkos::View< const size_t *, DT > &offsets, const Kokkos::View< const char *, BDT > &imports)
Total number of entries in any row of the packed matrix.
 
void unpackAndCombineIntoCrsMatrix(const LocalMatrix &local_matrix, const LocalMap &local_map, const Kokkos::View< const char *, BufferDeviceType > &imports, const Kokkos::View< const size_t *, BufferDeviceType > &num_packets_per_lid, const typename PackTraits< typename LocalMap::local_ordinal_type >::input_array_type import_lids, const Tpetra::CombineMode combine_mode)
Perform the unpack operation for the matrix.
 
size_t compute_maximum_num_entries(const Kokkos::View< const size_t *, BDT > &num_packets_per_lid, const Kokkos::View< const size_t *, DT > &offsets, const Kokkos::View< const char *, BDT > &imports)
Maximum number of entries in any row of the packed matrix.
 
bool compute_batch_info(const View1 &batches_per_lid, View2 &batch_info)
Compute the index and batch number associated with each batch.
 
Struct that holds views of the contents of a CrsMatrix.
 
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
 
typename row_matrix_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
 
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
 
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
 
"Local" part of Map suitable for Kokkos kernels.
 
KOKKOS_INLINE_FUNCTION LocalOrdinal getLocalElement(const GlobalOrdinal globalIndex) const
Get the local index corresponding to the given global index. (device only)
 
LocalOrdinal local_ordinal_type
The type of local indices.
 
GlobalOrdinal global_ordinal_type
The type of global indices.
 
DeviceType device_type
The device type.
 
Kokkos::parallel_reduce functor to determine the number of entries (to unpack) in a KokkosSparse::Crs...
 
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
 
Implementation details of Tpetra.
 
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
 
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
 
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
 
void unpackCrsMatrixAndCombine(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &importLIDs, size_t constantNumPackets, CombineMode combineMode)
Unpack the imported column indices and values, and combine into matrix.
 
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
 
Namespace Tpetra contains the class and methods constituting the Tpetra library.
 
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
 
CombineMode
Rule for combining data in an Import or Export.
 
@ REPLACE
Replace existing values with new values.
 
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
 
@ INSERT
Insert new values that don't currently exist.
 
Traits class for packing / unpacking data of type T.
 
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > unpackArray(value_type outBuf[], const char inBuf[], const size_t numEnt)
Unpack numEnt value_type entries from the given input buffer of bytes, to the given output buffer of ...
 
static KOKKOS_INLINE_FUNCTION size_t unpackValue(T &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
 
Kokkos::View< value_type *, Kokkos::AnonymousSpace > output_array_type
The type of an output array of value_type.
 
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
 
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.
 
Unpacks and combines a single row of the CrsMatrix.
 
int error() const
Host function for getting the error.