10#ifndef TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_DEF_HPP
11#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_DEF_HPP
15#include "TpetraCore_config.h"
16#include "Kokkos_Core.hpp"
17#include "Teuchos_Array.hpp"
18#include "Teuchos_ArrayView.hpp"
19#include "Teuchos_OrdinalTraits.hpp"
20#include "Teuchos_TimeMonitor.hpp"
28#include "Tpetra_Details_DefaultTypes.hpp"
59namespace UnpackAndCombineCrsMatrixImpl {
70template <
class ST,
class LO,
class GO>
79 const size_t bytes_per_value) {
84 bool unpack_pids =
pids_out.size() > 0;
101 const char*
const pids_in = unpack_pids ? imports +
pids_beg :
nullptr;
112 Kokkos::pair<int, size_t>
p;
151template <
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
156 typedef typename local_matrix_type::value_type ST;
160 typedef typename DT::execution_space XS;
162 typedef Kokkos::View<const size_t*, BufferDeviceType>
163 num_packets_per_lid_type;
164 typedef Kokkos::View<const size_t*, DT> offsets_type;
165 typedef Kokkos::View<const char*, BufferDeviceType> input_buffer_type;
166 typedef Kokkos::View<const LO*, BufferDeviceType> import_lids_type;
168 typedef Kokkos::View<int, DT> error_type;
169 using member_type =
typename Kokkos::TeamPolicy<XS>::member_type;
171 static_assert(std::is_same<LO, typename local_matrix_type::ordinal_type>::value,
172 "LocalMap::local_ordinal_type and "
173 "LocalMatrix::ordinal_type must be the same.");
177 input_buffer_type imports;
178 num_packets_per_lid_type num_packets_per_lid;
179 import_lids_type import_lids;
180 Kokkos::View<const LO* [2], DT> batch_info;
181 offsets_type offsets;
184 size_t bytes_per_value;
186 error_type error_code;
211 , error_code(
"error") {}
214 void operator()(member_type team_member)
const {
215 using Kokkos::MemoryUnmanaged;
216 using Kokkos::subview;
219 const LO
batch = team_member.league_rank();
231 const size_t buf_size = imports.size();
250 "*** Error: UnpackCrsMatrixAndCombineFunctor: "
251 "At row %d, the expected number of bytes (%d) != number of unpacked bytes (%d)\n",
254 Kokkos::atomic_compare_exchange(error_code.data(), 0, 21);
260 "*** Error: UnpackCrsMatrixAndCombineFunctor: "
261 "At row %d, the offset (%d) > buffer size (%d)\n",
264 Kokkos::atomic_compare_exchange(error_code.data(), 0, 22);
296 "*** Error: UnpackCrsMatrixAndCombineFunctor: "
297 "At row %d, number of entries (%d) != number of entries unpacked (%d)\n",
300 Kokkos::atomic_compare_exchange(error_code.data(), 0, 23);
306 Kokkos::parallel_for(
308 [=, *
this](
const LO&
j) {
325 if (combine_mode ==
ADD) {
330 (
void)local_matrix.sumIntoValues(
337 }
else if (combine_mode ==
REPLACE) {
342 (
void)local_matrix.replaceValues(
352 "*** Error: UnpackCrsMatrixAndCombineFunctor: "
353 "At row %d, an unknown error occurred during unpack\n",
355 Kokkos::atomic_compare_exchange(error_code.data(), 0, 31);
359 team_member.team_barrier();
364 auto error_code_h = Kokkos::create_mirror_view_and_copy(
365 Kokkos::HostSpace(), error_code);
371struct MaxNumEntTag {};
372struct TotNumEntTag {};
382template <
class LO,
class DT,
class BDT>
385 typedef Kokkos::View<const size_t*, BDT> num_packets_per_lid_type;
386 typedef Kokkos::View<const size_t*, DT> offsets_type;
387 typedef Kokkos::View<const char*, BDT> input_buffer_type;
393 num_packets_per_lid_type num_packets_per_lid;
394 offsets_type offsets;
395 input_buffer_type imports;
406 operator()(
const MaxNumEntTag,
const LO
i,
value_type& update)
const {
408 const size_t num_bytes = num_packets_per_lid(
i);
411 const char*
const in_buf = imports.data() + offsets(
i);
420 join(
const MaxNumEntTag,
423 if (dst < src) dst = src;
429 const size_t num_bytes = num_packets_per_lid(
i);
432 const char*
const in_buf = imports.data() + offsets(
i);
446template <
class LO,
class DT,
class BDT>
449 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
450 const Kokkos::View<const size_t*, DT>& offsets,
451 const Kokkos::View<const char*, BDT>& imports) {
452 typedef typename DT::execution_space XS;
453 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>,
460 static_cast<LO
>(num_packets_per_lid.extent(0));
461 size_t max_num_ent = 0;
462 Kokkos::parallel_reduce(
"Max num entries in CRS",
475template <
class LO,
class DT,
class BDT>
478 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
479 const Kokkos::View<const size_t*, DT>& offsets,
480 const Kokkos::View<const char*, BDT>& imports) {
481 typedef typename DT::execution_space XS;
482 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>, TotNumEntTag> range_policy;
487 static_cast<LO
>(num_packets_per_lid.extent(0));
488 Kokkos::parallel_reduce(
"Total num entries in CRS to unpack",
497 unpackRowCount(
const char imports[],
511 return static_cast<size_t>(num_ent_LO);
518template <
class View1,
class View2>
523 using LO =
typename View2::value_type;
527 batch_info(
batch, 0) =
static_cast<LO
>(
i);
532 return batch == batch_info.extent(0);
542template <
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
546 const Kokkos::View<const char*, BufferDeviceType>& imports,
547 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
550 using ST =
typename LocalMatrix::value_type;
553 using XS =
typename DT::execution_space;
555 "Tpetra::Details::UnpackAndCombineCrsMatrixImpl::"
556 "unpackAndCombineIntoCrsMatrix: ";
558 const size_t num_import_lids =
static_cast<size_t>(import_lids.extent(0));
567 std::invalid_argument,
568 prefix <<
"ABSMAX combine mode is not yet implemented for a matrix that has a "
569 "static graph (i.e., was constructed with the CrsMatrix constructor "
570 "that takes a const CrsGraph pointer).");
573 std::invalid_argument,
574 prefix <<
"INSERT combine mode is not allowed if the matrix has a static graph "
575 "(i.e., was constructed with the CrsMatrix constructor that takes a "
576 "const CrsGraph pointer).");
580 std::invalid_argument,
581 prefix <<
"Invalid combine mode; should never get "
582 "here! Please report this bug to the Tpetra developers.");
588 std::invalid_argument,
590 "numPacketsPerLID.size() ("
591 << num_packets_per_lid.extent(0) <<
").");
605 Kokkos::View<LO* [2], DT> batch_info(
"",
num_batches);
608 Kokkos::parallel_reduce(
609 Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t>>(0,
num_import_lids),
612 imports.data(), offsets(
i), num_packets_per_lid(
i));
636 const bool atomic = XS().concurrency() != 1;
651 using policy = Kokkos::TeamPolicy<XS, Kokkos::IndexType<LO>>;
653 if (!Spaces::is_gpu_exec_space<XS>() ||
team_size == Teuchos::OrdinalTraits<size_t>::invalid()) {
659 auto error_code =
f.error();
663 prefix <<
"UnpackCrsMatrixAndCombineFunctor reported error code " << error_code);
666template <
class LocalMatrix,
class BufferDeviceType>
672 const Kokkos::View<const char*, BufferDeviceType, void, void>& imports,
673 const Kokkos::View<const size_t*, BufferDeviceType, void, void>& num_packets_per_lid,
675 const Kokkos::View<const char*, BufferDeviceType>& imports,
676 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
679 using Kokkos::parallel_reduce;
680 typedef typename LocalMatrix::ordinal_type LO;
681 typedef typename LocalMatrix::device_type device_type;
682 typedef typename device_type::execution_space XS;
683 typedef typename Kokkos::View<LO*, device_type>::size_type size_type;
684 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>> range_policy;
697 update +=
static_cast<size_t>(local_matrix.graph.row_map[
lid + 1] - local_matrix.graph.row_map[
lid]);
704 num_items =
static_cast<LO
>(permute_from_lids.extent(0));
708 range_policy(0, num_items),
709 KOKKOS_LAMBDA(
const LO i,
size_t& update) {
710 const LO lid = permute_from_lids(i);
711 update +=
static_cast<size_t>(local_matrix.graph.row_map[lid + 1] - local_matrix.graph.row_map[lid]);
719 const size_type np = num_packets_per_lid.extent(0);
720 Kokkos::View<size_t*, device_type> offsets(
"offsets", np + 1);
723 compute_total_num_entries<LO, device_type, BDT>(num_packets_per_lid,
731template <
class LO,
class DT,
class BDT>
732int setupRowPointersForRemotes(
735 const Kokkos::View<const char*, BDT>& imports,
736 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
738 using Kokkos::parallel_reduce;
739 typedef typename DT::execution_space XS;
741 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type>> range_policy;
744 const size_type
N = num_packets_per_lid.extent(0);
748 "Setup row pointers for remotes",
752 const size_t num_bytes = num_packets_per_lid(
i);
753 const size_t offset = offsets(
i);
766void makeCrsRowPtrFromLengths(
769 using Kokkos::parallel_scan;
770 typedef typename DT::execution_space XS;
771 typedef typename Kokkos::View<size_t*, DT>::size_type size_type;
772 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type>> range_policy;
776 KOKKOS_LAMBDA(
const size_t&
i,
size_t& update,
const bool&
final) {
786template <
class LocalMatrix,
class LocalMap>
787void copyDataFromSameIDs(
789 const typename PackTraits<int>::output_array_type& tgt_pids,
791 const Kokkos::View<size_t*, typename LocalMap::device_type>& new_start_row,
793 const typename PackTraits<int>::input_array_type& src_pids,
794 const LocalMatrix& local_matrix,
795 const LocalMap& local_col_map,
796 const size_t num_same_ids,
798 using Kokkos::parallel_for;
801 typedef typename DT::execution_space XS;
802 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t>> range_policy;
805 range_policy(0, num_same_ids),
806 KOKKOS_LAMBDA(
const size_t i) {
807 typedef typename std::remove_reference<
decltype(new_start_row(0))>::type atomic_incr_type;
809 const LO src_lid =
static_cast<LO
>(i);
810 size_t src_row = local_matrix.graph.row_map(src_lid);
812 const LO tgt_lid =
static_cast<LO
>(i);
813 const size_t tgt_row = tgt_rowptr(tgt_lid);
815 const size_t nsr = local_matrix.graph.row_map(src_lid + 1) - local_matrix.graph.row_map(src_lid);
816 Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
818 for (
size_t j = local_matrix.graph.row_map(src_lid);
819 j < local_matrix.graph.row_map(src_lid + 1); ++j) {
820 LO src_col = local_matrix.graph.entries(j);
821 tgt_vals(tgt_row + j - src_row) = local_matrix.values(j);
822 tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
823 tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
828template <
class LocalMatrix,
class LocalMap>
829void copyDataFromPermuteIDs(
831 const typename PackTraits<int>::output_array_type& tgt_pids,
833 const Kokkos::View<size_t*, typename LocalMap::device_type>& new_start_row,
835 const typename PackTraits<int>::input_array_type& src_pids,
838 const LocalMatrix& local_matrix,
839 const LocalMap& local_col_map,
841 using Kokkos::parallel_for;
844 typedef typename DT::execution_space XS;
845 typedef typename PackTraits<LO>::input_array_type::size_type size_type;
846 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type>> range_policy;
848 const size_type num_permute_to_lids = permute_to_lids.extent(0);
851 range_policy(0, num_permute_to_lids),
852 KOKKOS_LAMBDA(
const size_t i) {
853 typedef typename std::remove_reference<
decltype(new_start_row(0))>::type atomic_incr_type;
855 const LO src_lid = permute_from_lids(i);
856 const size_t src_row = local_matrix.graph.row_map(src_lid);
858 const LO tgt_lid = permute_to_lids(i);
859 const size_t tgt_row = tgt_rowptr(tgt_lid);
861 size_t nsr = local_matrix.graph.row_map(src_lid + 1) - local_matrix.graph.row_map(src_lid);
862 Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
864 for (
size_t j = local_matrix.graph.row_map(src_lid);
865 j < local_matrix.graph.row_map(src_lid + 1); ++j) {
866 LO src_col = local_matrix.graph.entries(j);
867 tgt_vals(tgt_row + j - src_row) = local_matrix.values(j);
868 tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
869 tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
874template <
typename LocalMatrix,
typename LocalMap,
typename BufferDeviceType>
875int unpackAndCombineIntoCrsArrays2(
877 const typename PackTraits<int>::output_array_type& tgt_pids,
879 const Kokkos::View<size_t*, typename LocalMap::device_type>& new_start_row,
882#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
883 const Kokkos::View<const char*, BufferDeviceType, void, void>& imports,
884 const Kokkos::View<const size_t*, BufferDeviceType, void, void>& num_packets_per_lid,
886 const Kokkos::View<const char*, BufferDeviceType>& imports,
887 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
892 const size_t bytes_per_value) {
893 using Kokkos::atomic_fetch_add;
894 using Kokkos::MemoryUnmanaged;
895 using Kokkos::parallel_reduce;
896 using Kokkos::subview;
902 typedef typename LocalMatrix::value_type ST;
903 typedef typename DT::execution_space XS;
904 typedef typename Kokkos::View<LO*, DT>::size_type size_type;
905 typedef typename Kokkos::pair<size_type, size_type> slice;
906 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type>> range_policy;
908 typedef View<int*, DT, MemoryUnmanaged> pids_out_type;
909 typedef View<GO*, DT, MemoryUnmanaged> gids_out_type;
910 typedef View<ST*, DT, MemoryUnmanaged> vals_out_type;
912 const size_t InvalidNum = OrdinalTraits<size_t>::invalid();
915 const size_type num_import_lids = import_lids.size();
919 "Unpack and combine into CRS",
920 range_policy(0, num_import_lids),
921 KOKKOS_LAMBDA(
const size_t i,
int& k_error) {
922 typedef typename std::remove_reference<
decltype(new_start_row(0))>::type atomic_incr_type;
923 const size_t num_bytes = num_packets_per_lid(i);
924 const size_t offset = offsets(i);
925 if (num_bytes == 0) {
929 size_t num_ent = unpackRowCount<LO>(imports.data(), offset, num_bytes);
930 if (num_ent == InvalidNum) {
934 const LO lcl_row = import_lids(i);
935 const size_t start_row = atomic_fetch_add(&new_start_row(lcl_row), atomic_incr_type(num_ent));
936 const size_t end_row = start_row + num_ent;
938 gids_out_type gids_out = subview(tgt_colind, slice(start_row, end_row));
939 vals_out_type vals_out = subview(tgt_vals, slice(start_row, end_row));
940 pids_out_type pids_out = subview(tgt_pids, slice(start_row, end_row));
942 k_error += unpackRow<ST, LO, GO>(gids_out, pids_out, vals_out,
943 imports.data(), offset, num_bytes,
944 num_ent, bytes_per_value);
947 for (
size_t j = 0; j < static_cast<size_t>(num_ent); ++j) {
948 const int pid = pids_out(j);
949 pids_out(j) = (pid != my_pid) ? pid : -1;
957template <
typename LocalMatrix,
typename LocalMap,
typename BufferDeviceType>
959 const LocalMatrix& local_matrix,
960 const LocalMap& local_col_map,
962#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
963 const Kokkos::View<const char*, BufferDeviceType, void, void>& imports,
964 const Kokkos::View<const size_t*, BufferDeviceType, void, void>& num_packets_per_lid,
966 const Kokkos::View<const char*, BufferDeviceType>& imports,
967 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
974 const typename PackTraits<int>::input_array_type& src_pids,
975 const typename PackTraits<int>::output_array_type& tgt_pids,
976 const size_t num_same_ids,
977 const size_t tgt_num_rows,
978 const size_t tgt_num_nonzeros,
979 const int my_tgt_pid,
980 const size_t bytes_per_value) {
981 using Kokkos::MemoryUnmanaged;
982 using Kokkos::parallel_for;
983 using Kokkos::subview;
988 typedef typename DT::execution_space XS;
989 typedef typename Kokkos::View<LO*, DT>::size_type size_type;
990 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t>> range_policy;
991 typedef BufferDeviceType BDT;
993 const char prefix[] =
"unpackAndCombineIntoCrsArrays: ";
995 const size_t N = tgt_num_rows;
999 const int my_pid = my_tgt_pid;
1003 range_policy(0, N + 1),
1004 KOKKOS_LAMBDA(
const size_t i) {
1010 range_policy(0, num_same_ids),
1011 KOKKOS_LAMBDA(
const size_t i) {
1012 const LO tgt_lid =
static_cast<LO
>(i);
1013 const LO src_lid =
static_cast<LO
>(i);
1014 tgt_rowptr(tgt_lid) = local_matrix.graph.row_map(src_lid + 1) - local_matrix.graph.row_map(src_lid);
1018 const size_type num_permute_to_lids = permute_to_lids.extent(0);
1020 range_policy(0, num_permute_to_lids),
1021 KOKKOS_LAMBDA(
const size_t i) {
1022 const LO tgt_lid = permute_to_lids(i);
1023 const LO src_lid = permute_from_lids(i);
1024 tgt_rowptr(tgt_lid) = local_matrix.graph.row_map(src_lid + 1) - local_matrix.graph.row_map(src_lid);
1028 const size_type num_import_lids = import_lids.extent(0);
1029 View<size_t*, DT> offsets(
"offsets", num_import_lids + 1);
1032#ifdef HAVE_TPETRA_DEBUG
1034 auto nth_offset_h = getEntryOnHost(offsets, num_import_lids);
1035 const bool condition =
1036 nth_offset_h !=
static_cast<size_t>(imports.extent(0));
1037 TEUCHOS_TEST_FOR_EXCEPTION(condition, std::logic_error, prefix <<
"The final offset in bytes " << nth_offset_h <<
" != imports.size() = " << imports.extent(0) <<
". Please report this bug to the Tpetra developers.");
1043 setupRowPointersForRemotes<LO, DT, BDT>(tgt_rowptr,
1044 import_lids, imports, num_packets_per_lid, offsets);
1045 TEUCHOS_TEST_FOR_EXCEPTION(k_error != 0, std::logic_error, prefix <<
" Error transferring data to target row pointers. "
1046 "Please report this bug to the Tpetra developers.");
1050 View<size_t*, DT> new_start_row(
"new_start_row", N + 1);
1053 makeCrsRowPtrFromLengths(tgt_rowptr, new_start_row);
1056 copyDataFromSameIDs(tgt_colind, tgt_pids, tgt_vals, new_start_row,
1057 tgt_rowptr, src_pids, local_matrix, local_col_map, num_same_ids, my_pid);
1059 copyDataFromPermuteIDs(tgt_colind, tgt_pids, tgt_vals, new_start_row,
1060 tgt_rowptr, src_pids, permute_to_lids, permute_from_lids,
1061 local_matrix, local_col_map, my_pid);
1063 if (imports.extent(0) <= 0) {
1067 int unpack_err = unpackAndCombineIntoCrsArrays2(tgt_colind, tgt_pids,
1068 tgt_vals, new_start_row, offsets, import_lids, imports, num_packets_per_lid,
1069 local_matrix, local_col_map, my_pid, bytes_per_value);
1070 TEUCHOS_TEST_FOR_EXCEPTION(
1071 unpack_err != 0, std::logic_error, prefix <<
"unpack loop failed. This "
1072 "should never happen. Please report this bug to the Tpetra developers.");
1113template <
typename ST,
typename LO,
typename GO,
typename Node>
1116 const Teuchos::ArrayView<const char>& imports,
1118 const Teuchos::ArrayView<const LO>&
importLIDs,
1122 typedef typename Node::device_type device_type;
1124 static_assert(std::is_same<device_type, typename local_matrix_device_type::device_type>::value,
1125 "Node::device_type and LocalMatrix::device_type must be the same.");
1143 imports.size(),
true,
"imports");
1145 auto local_matrix =
sourceMatrix.getLocalMatrixDevice();
1146 auto local_col_map =
sourceMatrix.getColMap()->getLocalMap();
1157 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix(
1162template <
typename ST,
typename LO,
typename GO,
typename NT>
1163void unpackCrsMatrixAndCombineNew(
1165 Kokkos::DualView<
char*,
1168 Kokkos::DualView<
size_t*,
1171 const Kokkos::DualView<
const LO*,
1178 using device_type =
typename crs_matrix_type::device_type;
1179 using local_matrix_device_type =
typename crs_matrix_type::local_matrix_device_type;
1180 using buffer_device_type =
typename dist_object_type::buffer_device_type;
1182 static_assert(std::is_same<device_type, typename local_matrix_device_type::device_type>::value,
1183 "crs_matrix_type::device_type and local_matrix_device_type::device_type "
1184 "must be the same.");
1189 auto num_packets_per_lid_d = numPacketsPerLID.view_device();
1191 TEUCHOS_ASSERT(!importLIDs.need_sync_device());
1192 auto import_lids_d = importLIDs.view_device();
1194 if (imports.need_sync_device()) {
1195 imports.sync_device();
1197 auto imports_d = imports.view_device();
1199 auto local_matrix = sourceMatrix.getLocalMatrixDevice();
1200 auto local_col_map = sourceMatrix.getColMap()->getLocalMap();
1201 typedef decltype(local_col_map) local_map_type;
1203 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix<
1204 local_matrix_device_type,
1206 buffer_device_type>(local_matrix, local_col_map, imports_d, num_packets_per_lid_d,
1207 import_lids_d, combineMode);
1265template <
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1269 const Teuchos::ArrayView<const LocalOrdinal>&
importLIDs,
1270 const Teuchos::ArrayView<const char>& imports,
1275 const Teuchos::ArrayView<const LocalOrdinal>&
permuteToLIDs,
1277 using Kokkos::MemoryUnmanaged;
1279 typedef typename Node::device_type DT;
1280 const char prefix[] =
"unpackAndCombineWithOwningPIDsCount: ";
1284 "permuteFromLIDs.size() = "
1290 "CrsMatrix 'sourceMatrix' must be locally indexed.");
1293 "numPacketsPerLID.size() = "
1296 auto local_matrix =
sourceMatrix.getLocalMatrixDevice();
1298 using kokkos_device_type = Kokkos::Device<
typename Node::device_type::execution_space,
1299 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>;
1301#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
1309 "permute_from_lids");
1311#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
1312 Kokkos::View<const char*, kokkos_device_type, void, void>
imports_d =
1314 Kokkos::View<const char*, kokkos_device_type>
imports_d =
1317 imports.getRawPtr(),
1318 imports.size(),
true,
1321#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
1329 "num_packets_per_lid");
1331 return UnpackAndCombineCrsMatrixImpl::unpackAndCombineWithOwningPIDsCount(
1351template <
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1355 Kokkos::Device<
typename Node::device_type::execution_space,
1356 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1363 const Kokkos::View<
const char*,
1364 Kokkos::Device<
typename Node::device_type::execution_space,
1365 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1372 const Kokkos::View<
const size_t*,
1373 Kokkos::Device<
typename Node::device_type::execution_space,
1374 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1381 const size_t numSameIDs,
1383 Kokkos::Device<
typename Node::device_type::execution_space,
1384 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1392 Kokkos::Device<
typename Node::device_type::execution_space,
1393 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1402 Kokkos::View<size_t*, typename Node::device_type>&
crs_rowptr_d,
1403 Kokkos::View<GlobalOrdinal*, typename Node::device_type>&
crs_colind_d,
1405 const Teuchos::ArrayView<const int>&
SourcePids,
1406 Kokkos::View<int*, typename Node::device_type>&
TargetPids) {
1407 using execution_space =
typename Node::execution_space;
1410 using Kokkos::deep_copy;
1413 using Teuchos::ArrayView;
1414 using Teuchos::outArg;
1415 using Teuchos::REDUCE_MAX;
1416 using Teuchos::reduceAll;
1418 typedef typename Node::device_type DT;
1421 typedef typename matrix_type::impl_scalar_type ST;
1423 const char prefix[] =
"Tpetra::Details::unpackAndCombineIntoCrsArrays_new: ";
1424 Teuchos::RCP<Tpetra::Details::ProfilingRegion> tm;
1426 using Kokkos::MemoryUnmanaged;
1430 "permute_from_lids_d.size() = "
1436 "CrsMatrix 'sourceMatrix' must be locally indexed.");
1439 "num_packets_per_lid_d.size() = "
1442 auto local_matrix =
sourceMatrix.getLocalMatrixDevice();
1447 UnpackAndCombineCrsMatrixImpl::unpackAndCombineWithOwningPIDsCount(
1466 Kokkos::deep_copy(execution_space(),
TargetPids, -1);
1469 auto local_col_map =
sourceMatrix.getColMap()->getLocalMap();
1481 size_t bytes_per_value = 0;
1495 if (local_matrix.values.extent(0) > 0) {
1496 const ST&
val = local_matrix.values(0);
1502 Teuchos::reduceAll<int, size_t>(*(
sourceMatrix.getComm()),
1503 Teuchos::REDUCE_MAX,
1505 outArg(bytes_per_value));
1509 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsArrays(
1520 Kokkos::parallel_for(
1521 "setLocalEntriesToPID", Kokkos::RangePolicy<typename DT::execution_space>(0,
TargetPids.size()),
KOKKOS_LAMBDA(
const size_t i) {
1527template <
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1531 Kokkos::Device<
typename Node::device_type::execution_space,
1532 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1539 const Kokkos::View<
const char*,
1540 Kokkos::Device<
typename Node::device_type::execution_space,
1541 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1548 const Kokkos::View<
const size_t*,
1549 Kokkos::Device<
typename Node::device_type::execution_space,
1550 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1557 const size_t numSameIDs,
1559 Kokkos::Device<
typename Node::device_type::execution_space,
1560 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1568 Kokkos::Device<
typename Node::device_type::execution_space,
1569 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1579 Teuchos::ArrayRCP<GlobalOrdinal>&
CRS_colind,
1580 Teuchos::ArrayRCP<Scalar>&
CRS_vals,
1581 const Teuchos::ArrayView<const int>&
SourcePids,
1583 using execution_space =
typename Node::execution_space;
1586 using Kokkos::deep_copy;
1589 using Teuchos::ArrayView;
1590 using Teuchos::outArg;
1591 using Teuchos::REDUCE_MAX;
1592 using Teuchos::reduceAll;
1594 typedef typename Node::device_type DT;
1597 typedef typename matrix_type::impl_scalar_type ST;
1599 const char prefix[] =
"Tpetra::Details::unpackAndCombineIntoCrsArrays_new: ";
1600 Teuchos::RCP<Tpetra::Details::ProfilingRegion> tm;
1602 using Kokkos::MemoryUnmanaged;
1606 "permute_from_lids_d.size() = "
1612 "CrsMatrix 'sourceMatrix' must be locally indexed.");
1615 "num_packets_per_lid_d.size() = "
1618 auto local_matrix =
sourceMatrix.getLocalMatrixDevice();
1623 UnpackAndCombineCrsMatrixImpl::unpackAndCombineWithOwningPIDsCount(
1647 auto local_col_map =
sourceMatrix.getColMap()->getLocalMap();
1660#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE
1661 static_assert(!std::is_same<
1662 typename std::remove_const<
1663 typename std::decay<
1665 std::complex<double>>::value,
1666 "CRS_vals::value_type is std::complex<double>; this should never happen"
1667 ", since std::complex does not work in Kokkos::View objects.");
1674#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE
1675 static_assert(!std::is_same<
1676 typename decltype(
crs_vals_d)::non_const_value_type,
1677 std::complex<double>>::value,
1678 "crs_vals_d::non_const_value_type is std::complex<double>; this should "
1679 "never happen, since std::complex does not work in Kokkos::View objects.");
1692 size_t bytes_per_value = 0;
1706 if (local_matrix.values.extent(0) > 0) {
1707 const ST&
val = local_matrix.values(0);
1713 Teuchos::reduceAll<int, size_t>(*(
sourceMatrix.getComm()),
1714 Teuchos::REDUCE_MAX,
1716 outArg(bytes_per_value));
1719#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE
1720 static_assert(!std::is_same<
1721 typename decltype(
crs_vals_d)::non_const_value_type,
1722 std::complex<double>>::value,
1723 "crs_vals_d::non_const_value_type is std::complex<double>; this should "
1724 "never happen, since std::complex does not work in Kokkos::View objects.");
1728 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsArrays(
1763#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT_KOKKOS_DEPRECATED_CODE_4_ON(ST, LO, GO, NT) \
1765 Details::unpackCrsMatrixAndCombine<ST, LO, GO, NT>( \
1766 const CrsMatrix<ST, LO, GO, NT>&, \
1767 const Teuchos::ArrayView<const char>&, \
1768 const Teuchos::ArrayView<const size_t>&, \
1769 const Teuchos::ArrayView<const LO>&, \
1773 Details::unpackAndCombineWithOwningPIDsCount<ST, LO, GO, NT>( \
1774 const CrsMatrix<ST, LO, GO, NT>&, \
1775 const Teuchos::ArrayView<const LO>&, \
1776 const Teuchos::ArrayView<const char>&, \
1777 const Teuchos::ArrayView<const size_t>&, \
1781 const Teuchos::ArrayView<const LO>&, \
1782 const Teuchos::ArrayView<const LO>&); \
1784 Details::unpackCrsMatrixAndCombineNew<ST, LO, GO, NT>( \
1785 const CrsMatrix<ST, LO, GO, NT>&, \
1786 Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>, \
1787 Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>, \
1788 const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1790 const CombineMode); \
1792 Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT>( \
1793 const CrsMatrix<ST, LO, GO, NT>&, \
1794 const Kokkos::View<LO const*, \
1795 Kokkos::Device<typename NT::device_type::execution_space, \
1796 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1798 const Kokkos::View<const char*, \
1799 Kokkos::Device<typename NT::device_type::execution_space, \
1800 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1802 const Kokkos::View<const size_t*, \
1803 Kokkos::Device<typename NT::device_type::execution_space, \
1804 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1807 const Kokkos::View<LO const*, \
1808 Kokkos::Device<typename NT::device_type::execution_space, \
1809 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1811 const Kokkos::View<LO const*, \
1812 Kokkos::Device<typename NT::device_type::execution_space, \
1813 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1817 Kokkos::View<size_t*, typename NT::device_type>&, \
1818 Kokkos::View<GO*, typename NT::device_type>&, \
1819 Kokkos::View<typename CrsMatrix<ST, LO, GO, NT>::impl_scalar_type*, typename NT::device_type>&, \
1820 const Teuchos::ArrayView<const int>&, \
1821 Kokkos::View<int*, typename NT::device_type>&); \
1823 Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT>( \
1824 const CrsMatrix<ST, LO, GO, NT>&, \
1825 const Kokkos::View<LO const*, \
1826 Kokkos::Device<typename NT::device_type::execution_space, \
1827 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1829 const Kokkos::View<const char*, \
1830 Kokkos::Device<typename NT::device_type::execution_space, \
1831 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1833 const Kokkos::View<const size_t*, \
1834 Kokkos::Device<typename NT::device_type::execution_space, \
1835 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1838 const Kokkos::View<LO const*, \
1839 Kokkos::Device<typename NT::device_type::execution_space, \
1840 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1842 const Kokkos::View<LO const*, \
1843 Kokkos::Device<typename NT::device_type::execution_space, \
1844 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1848 Teuchos::ArrayRCP<size_t>&, \
1849 Teuchos::ArrayRCP<GO>&, \
1850 Teuchos::ArrayRCP<ST>&, \
1851 const Teuchos::ArrayView<const int>&, \
1852 Teuchos::Array<int>&);
1854#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT_KOKKOS_DEPRECATED_CODE_4_OFF(ST, LO, GO, NT) \
1856 Details::unpackCrsMatrixAndCombine<ST, LO, GO, NT>( \
1857 const CrsMatrix<ST, LO, GO, NT>&, \
1858 const Teuchos::ArrayView<const char>&, \
1859 const Teuchos::ArrayView<const size_t>&, \
1860 const Teuchos::ArrayView<const LO>&, \
1864 Details::unpackAndCombineWithOwningPIDsCount<ST, LO, GO, NT>( \
1865 const CrsMatrix<ST, LO, GO, NT>&, \
1866 const Teuchos::ArrayView<const LO>&, \
1867 const Teuchos::ArrayView<const char>&, \
1868 const Teuchos::ArrayView<const size_t>&, \
1872 const Teuchos::ArrayView<const LO>&, \
1873 const Teuchos::ArrayView<const LO>&); \
1875 Details::unpackCrsMatrixAndCombineNew<ST, LO, GO, NT>( \
1876 const CrsMatrix<ST, LO, GO, NT>&, \
1877 Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>, \
1878 Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>, \
1879 const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1881 const CombineMode); \
1883 Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT>( \
1884 const CrsMatrix<ST, LO, GO, NT>&, \
1885 const Kokkos::View<LO const*, \
1886 Kokkos::Device<typename NT::device_type::execution_space, \
1887 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1888 const Kokkos::View<const char*, \
1889 Kokkos::Device<typename NT::device_type::execution_space, \
1890 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1891 const Kokkos::View<const size_t*, \
1892 Kokkos::Device<typename NT::device_type::execution_space, \
1893 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1895 const Kokkos::View<LO const*, \
1896 Kokkos::Device<typename NT::device_type::execution_space, \
1897 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1898 const Kokkos::View<LO const*, \
1899 Kokkos::Device<typename NT::device_type::execution_space, \
1900 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1903 Kokkos::View<size_t*, typename NT::device_type>&, \
1904 Kokkos::View<GO*, typename NT::device_type>&, \
1905 Kokkos::View<typename CrsMatrix<ST, LO, GO, NT>::impl_scalar_type*, typename NT::device_type>&, \
1906 const Teuchos::ArrayView<const int>&, \
1907 Kokkos::View<int*, typename NT::device_type>&); \
1909 Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT>( \
1910 const CrsMatrix<ST, LO, GO, NT>&, \
1911 const Kokkos::View<LO const*, \
1912 Kokkos::Device<typename NT::device_type::execution_space, \
1913 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1914 const Kokkos::View<const char*, \
1915 Kokkos::Device<typename NT::device_type::execution_space, \
1916 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1917 const Kokkos::View<const size_t*, \
1918 Kokkos::Device<typename NT::device_type::execution_space, \
1919 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1921 const Kokkos::View<LO const*, \
1922 Kokkos::Device<typename NT::device_type::execution_space, \
1923 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1924 const Kokkos::View<LO const*, \
1925 Kokkos::Device<typename NT::device_type::execution_space, \
1926 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1929 Teuchos::ArrayRCP<size_t>&, \
1930 Teuchos::ArrayRCP<GO>&, \
1931 Teuchos::ArrayRCP<ST>&, \
1932 const Teuchos::ArrayView<const int>&, \
1933 Teuchos::Array<int>&);
1935#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
1936#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT(ST, LO, GO, NT) \
1937 TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT_KOKKOS_DEPRECATED_CODE_4_ON(ST, LO, GO, NT)
1939#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT(ST, LO, GO, NT) \
1940 TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT_KOKKOS_DEPRECATED_CODE_4_OFF(ST, LO, GO, NT)
Declaration of the Tpetra::CrsMatrix class.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
size_t compute_total_num_entries(const Kokkos::View< const size_t *, BDT > &num_packets_per_lid, const Kokkos::View< const size_t *, DT > &offsets, const Kokkos::View< const char *, BDT > &imports)
Total number of entries in any row of the packed matrix.
void unpackAndCombineIntoCrsMatrix(const LocalMatrix &local_matrix, const LocalMap &local_map, const Kokkos::View< const char *, BufferDeviceType > &imports, const Kokkos::View< const size_t *, BufferDeviceType > &num_packets_per_lid, const typename PackTraits< typename LocalMap::local_ordinal_type >::input_array_type import_lids, const Tpetra::CombineMode combine_mode)
Perform the unpack operation for the matrix.
size_t compute_maximum_num_entries(const Kokkos::View< const size_t *, BDT > &num_packets_per_lid, const Kokkos::View< const size_t *, DT > &offsets, const Kokkos::View< const char *, BDT > &imports)
Maximum number of entries in any row of the packed matrix.
bool compute_batch_info(const View1 &batches_per_lid, View2 &batch_info)
Compute the index and batch number associated with each batch.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
typename row_matrix_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
Struct that holds views of the contents of a CrsMatrix.
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
"Local" part of Map suitable for Kokkos kernels.
KOKKOS_INLINE_FUNCTION LocalOrdinal getLocalElement(const GlobalOrdinal globalIndex) const
Get the local index corresponding to the given global index. (device only)
LocalOrdinal local_ordinal_type
The type of local indices.
GlobalOrdinal global_ordinal_type
The type of global indices.
DeviceType device_type
The device type.
Kokkos::parallel_reduce functor to determine the number of entries (to unpack) in a KokkosSparse::Crs...
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
Implementation details of Tpetra.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
void unpackCrsMatrixAndCombine(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &importLIDs, size_t constantNumPackets, CombineMode combineMode)
Unpack the imported column indices and values, and combine into matrix.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
CombineMode
Rule for combining data in an Import or Export.
@ REPLACE
Replace existing values with new values.
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
@ INSERT
Insert new values that don't currently exist.
Traits class for packing / unpacking data of type T.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > unpackArray(value_type outBuf[], const char inBuf[], const size_t numEnt)
Unpack numEnt value_type entries from the given input buffer of bytes, to the given output buffer of ...
static KOKKOS_INLINE_FUNCTION size_t unpackValue(T &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
Kokkos::View< value_type *, Kokkos::AnonymousSpace > output_array_type
The type of an output array of value_type.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.
Unpacks and combines a single row of the CrsMatrix.
int error() const
Host function for getting the error.