10#ifndef TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_DEF_HPP
11#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_DEF_HPP
15#include "TpetraCore_config.h"
16#include "Kokkos_Core.hpp"
17#include "Teuchos_Array.hpp"
18#include "Teuchos_ArrayView.hpp"
19#include "Teuchos_OrdinalTraits.hpp"
20#include "Teuchos_TimeMonitor.hpp"
28#include "Tpetra_Details_DefaultTypes.hpp"
59namespace UnpackAndCombineCrsMatrixImpl {
70template <
class ST,
class LO,
class GO>
79 const size_t bytes_per_value) {
84 bool unpack_pids =
pids_out.size() > 0;
101 const char*
const pids_in = unpack_pids ? imports +
pids_beg :
nullptr;
112 Kokkos::pair<int, size_t>
p;
151template <
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
156 typedef typename local_matrix_type::value_type ST;
160 typedef typename DT::execution_space XS;
162 typedef Kokkos::View<const size_t*, BufferDeviceType>
163 num_packets_per_lid_type;
164 typedef Kokkos::View<const size_t*, DT> offsets_type;
165 typedef Kokkos::View<const char*, BufferDeviceType> input_buffer_type;
166 typedef Kokkos::View<const LO*, BufferDeviceType> import_lids_type;
168 typedef Kokkos::View<int, DT> error_type;
169 using member_type =
typename Kokkos::TeamPolicy<XS>::member_type;
171 static_assert(std::is_same<LO, typename local_matrix_type::ordinal_type>::value,
172 "LocalMap::local_ordinal_type and "
173 "LocalMatrix::ordinal_type must be the same.");
177 input_buffer_type imports;
178 num_packets_per_lid_type num_packets_per_lid;
179 import_lids_type import_lids;
180 Kokkos::View<const LO* [2], DT> batch_info;
181 offsets_type offsets;
184 size_t bytes_per_value;
186 error_type error_code;
211 , error_code(
"error") {}
214 void operator()(member_type team_member)
const {
215 using Kokkos::MemoryUnmanaged;
216 using Kokkos::subview;
219 const LO
batch = team_member.league_rank();
231 const size_t buf_size = imports.size();
250 "*** Error: UnpackCrsMatrixAndCombineFunctor: "
251 "At row %d, the expected number of bytes (%d) != number of unpacked bytes (%d)\n",
254 Kokkos::atomic_compare_exchange(error_code.data(), 0, 21);
260 "*** Error: UnpackCrsMatrixAndCombineFunctor: "
261 "At row %d, the offset (%d) > buffer size (%d)\n",
264 Kokkos::atomic_compare_exchange(error_code.data(), 0, 22);
296 "*** Error: UnpackCrsMatrixAndCombineFunctor: "
297 "At row %d, number of entries (%d) != number of entries unpacked (%d)\n",
300 Kokkos::atomic_compare_exchange(error_code.data(), 0, 23);
306 Kokkos::parallel_for(
308 [=, *
this](
const LO&
j) {
325 if (combine_mode ==
ADD) {
330 (
void)local_matrix.sumIntoValues(
337 }
else if (combine_mode ==
REPLACE) {
342 (
void)local_matrix.replaceValues(
352 "*** Error: UnpackCrsMatrixAndCombineFunctor: "
353 "At row %d, an unknown error occurred during unpack\n",
355 Kokkos::atomic_compare_exchange(error_code.data(), 0, 31);
359 team_member.team_barrier();
364 auto error_code_h = Kokkos::create_mirror_view_and_copy(
365 Kokkos::HostSpace(), error_code);
371struct MaxNumEntTag {};
372struct TotNumEntTag {};
382template <
class LO,
class DT,
class BDT>
385 typedef Kokkos::View<const size_t*, BDT> num_packets_per_lid_type;
386 typedef Kokkos::View<const size_t*, DT> offsets_type;
387 typedef Kokkos::View<const char*, BDT> input_buffer_type;
393 num_packets_per_lid_type num_packets_per_lid;
394 offsets_type offsets;
395 input_buffer_type imports;
406 operator()(
const MaxNumEntTag,
const LO
i,
value_type& update)
const {
408 const size_t num_bytes = num_packets_per_lid(
i);
411 const char*
const in_buf = imports.data() + offsets(
i);
420 join(
const MaxNumEntTag,
423 if (dst < src) dst = src;
429 const size_t num_bytes = num_packets_per_lid(
i);
432 const char*
const in_buf = imports.data() + offsets(
i);
446template <
class LO,
class DT,
class BDT>
449 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
450 const Kokkos::View<const size_t*, DT>& offsets,
451 const Kokkos::View<const char*, BDT>& imports) {
452 typedef typename DT::execution_space XS;
453 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>,
460 static_cast<LO
>(num_packets_per_lid.extent(0));
461 size_t max_num_ent = 0;
462 Kokkos::parallel_reduce(
"Max num entries in CRS",
475template <
class LO,
class DT,
class BDT>
478 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
479 const Kokkos::View<const size_t*, DT>& offsets,
480 const Kokkos::View<const char*, BDT>& imports) {
481 typedef typename DT::execution_space XS;
482 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>, TotNumEntTag> range_policy;
487 static_cast<LO
>(num_packets_per_lid.extent(0));
488 Kokkos::parallel_reduce(
"Total num entries in CRS to unpack",
497 unpackRowCount(
const char imports[],
511 return static_cast<size_t>(num_ent_LO);
518template <
class View1,
class View2>
523 using LO =
typename View2::value_type;
527 batch_info(
batch, 0) =
static_cast<LO
>(
i);
532 return batch == batch_info.extent(0);
542template <
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
546 const Kokkos::View<const char*, BufferDeviceType>& imports,
547 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
550 using ST =
typename LocalMatrix::value_type;
553 using XS =
typename DT::execution_space;
555 "Tpetra::Details::UnpackAndCombineCrsMatrixImpl::"
556 "unpackAndCombineIntoCrsMatrix: ";
558 const size_t num_import_lids =
static_cast<size_t>(import_lids.extent(0));
567 std::invalid_argument,
568 prefix <<
"ABSMAX combine mode is not yet implemented for a matrix that has a "
569 "static graph (i.e., was constructed with the CrsMatrix constructor "
570 "that takes a const CrsGraph pointer).");
573 std::invalid_argument,
574 prefix <<
"INSERT combine mode is not allowed if the matrix has a static graph "
575 "(i.e., was constructed with the CrsMatrix constructor that takes a "
576 "const CrsGraph pointer).");
580 std::invalid_argument,
581 prefix <<
"Invalid combine mode; should never get "
582 "here! Please report this bug to the Tpetra developers.");
588 std::invalid_argument,
590 "numPacketsPerLID.size() ("
591 << num_packets_per_lid.extent(0) <<
").");
605 Kokkos::View<LO* [2], DT> batch_info(
"",
num_batches);
608 Kokkos::parallel_reduce(
609 Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t>>(0,
num_import_lids),
612 imports.data(), offsets(
i), num_packets_per_lid(
i));
636 const bool atomic = XS().concurrency() != 1;
651 using policy = Kokkos::TeamPolicy<XS, Kokkos::IndexType<LO>>;
653 if (!Spaces::is_gpu_exec_space<XS>() ||
team_size == Teuchos::OrdinalTraits<size_t>::invalid()) {
659 auto error_code =
f.error();
663 prefix <<
"UnpackCrsMatrixAndCombineFunctor reported error code " << error_code);
666template <
class LocalMatrix,
class BufferDeviceType>
672 const Kokkos::View<const char*, BufferDeviceType, void, void>& imports,
673 const Kokkos::View<const size_t*, BufferDeviceType, void, void>& num_packets_per_lid,
675 const Kokkos::View<const char*, BufferDeviceType>& imports,
676 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
679 using Kokkos::parallel_reduce;
680 typedef typename LocalMatrix::ordinal_type LO;
681 typedef typename LocalMatrix::device_type device_type;
682 typedef typename device_type::execution_space XS;
683 typedef typename Kokkos::View<LO*, device_type>::size_type size_type;
684 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>> range_policy;
697 update +=
static_cast<size_t>(local_matrix.graph.row_map[
lid + 1] - local_matrix.graph.row_map[
lid]);
704 num_items =
static_cast<LO
>(permute_from_lids.extent(0));
708 range_policy(0, num_items),
709 KOKKOS_LAMBDA(
const LO i,
size_t& update) {
710 const LO lid = permute_from_lids(i);
711 update +=
static_cast<size_t>(local_matrix.graph.row_map[lid + 1] - local_matrix.graph.row_map[lid]);
719 const size_type np = num_packets_per_lid.extent(0);
720 Kokkos::View<size_t*, device_type> offsets(
"offsets", np + 1);
723 compute_total_num_entries<LO, device_type, BDT>(num_packets_per_lid,
731template <
class LO,
class DT,
class BDT>
732int setupRowPointersForRemotes(
735 const Kokkos::View<const char*, BDT>& imports,
736 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
738 using Kokkos::parallel_reduce;
739 typedef typename DT::execution_space XS;
741 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type>> range_policy;
744 const size_type
N = num_packets_per_lid.extent(0);
748 "Setup row pointers for remotes",
752 const size_t num_bytes = num_packets_per_lid(
i);
753 const size_t offset = offsets(
i);
766void makeCrsRowPtrFromLengths(
769 using Kokkos::parallel_scan;
770 typedef typename DT::execution_space XS;
771 typedef typename Kokkos::View<size_t*, DT>::size_type size_type;
772 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type>> range_policy;
776 KOKKOS_LAMBDA(
const size_t&
i,
size_t& update,
const bool&
final) {
786template <
class LocalMatrix,
class LocalMap>
787void copyDataFromSameIDs(
789 const typename PackTraits<int>::output_array_type& tgt_pids,
791 const Kokkos::View<size_t*, typename LocalMap::device_type>& new_start_row,
793 const typename PackTraits<int>::input_array_type& src_pids,
794 const LocalMatrix& local_matrix,
795 const LocalMap& local_col_map,
796 const size_t num_same_ids,
798 using Kokkos::parallel_for;
801 typedef typename DT::execution_space XS;
802 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t>> range_policy;
805 range_policy(0, num_same_ids),
806 KOKKOS_LAMBDA(
const size_t i) {
807 typedef typename std::remove_reference<
decltype(new_start_row(0))>::type atomic_incr_type;
809 const LO src_lid =
static_cast<LO
>(i);
810 size_t src_row = local_matrix.graph.row_map(src_lid);
812 const LO tgt_lid =
static_cast<LO
>(i);
813 const size_t tgt_row = tgt_rowptr(tgt_lid);
815 const size_t nsr = local_matrix.graph.row_map(src_lid + 1) - local_matrix.graph.row_map(src_lid);
816 Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
818 for (
size_t j = local_matrix.graph.row_map(src_lid);
819 j < local_matrix.graph.row_map(src_lid + 1); ++j) {
820 LO src_col = local_matrix.graph.entries(j);
821 tgt_vals(tgt_row + j - src_row) = local_matrix.values(j);
822 tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
823 tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
828template <
class LocalMatrix,
class LocalMap>
829void copyDataFromPermuteIDs(
831 const typename PackTraits<int>::output_array_type& tgt_pids,
833 const Kokkos::View<size_t*, typename LocalMap::device_type>& new_start_row,
835 const typename PackTraits<int>::input_array_type& src_pids,
838 const LocalMatrix& local_matrix,
839 const LocalMap& local_col_map,
841 using Kokkos::parallel_for;
844 typedef typename DT::execution_space XS;
845 typedef typename PackTraits<LO>::input_array_type::size_type size_type;
846 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type>> range_policy;
848 const size_type num_permute_to_lids = permute_to_lids.extent(0);
851 range_policy(0, num_permute_to_lids),
852 KOKKOS_LAMBDA(
const size_t i) {
853 typedef typename std::remove_reference<
decltype(new_start_row(0))>::type atomic_incr_type;
855 const LO src_lid = permute_from_lids(i);
856 const size_t src_row = local_matrix.graph.row_map(src_lid);
858 const LO tgt_lid = permute_to_lids(i);
859 const size_t tgt_row = tgt_rowptr(tgt_lid);
861 size_t nsr = local_matrix.graph.row_map(src_lid + 1) - local_matrix.graph.row_map(src_lid);
862 Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
864 for (
size_t j = local_matrix.graph.row_map(src_lid);
865 j < local_matrix.graph.row_map(src_lid + 1); ++j) {
866 LO src_col = local_matrix.graph.entries(j);
867 tgt_vals(tgt_row + j - src_row) = local_matrix.values(j);
868 tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
869 tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
874template <
typename LocalMatrix,
typename LocalMap,
typename BufferDeviceType>
875int unpackAndCombineIntoCrsArrays2(
877 const typename PackTraits<int>::output_array_type& tgt_pids,
879 const Kokkos::View<size_t*, typename LocalMap::device_type>& new_start_row,
882#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
883 const Kokkos::View<const char*, BufferDeviceType, void, void>& imports,
884 const Kokkos::View<const size_t*, BufferDeviceType, void, void>& num_packets_per_lid,
886 const Kokkos::View<const char*, BufferDeviceType>& imports,
887 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
892 const size_t bytes_per_value) {
893 using Kokkos::atomic_fetch_add;
894 using Kokkos::MemoryUnmanaged;
895 using Kokkos::parallel_reduce;
896 using Kokkos::subview;
902 typedef typename LocalMatrix::value_type ST;
903 typedef typename DT::execution_space XS;
904 typedef typename Kokkos::View<LO*, DT>::size_type size_type;
905 typedef typename Kokkos::pair<size_type, size_type> slice;
906 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type>> range_policy;
908 typedef View<int*, DT, MemoryUnmanaged> pids_out_type;
909 typedef View<GO*, DT, MemoryUnmanaged> gids_out_type;
910 typedef View<ST*, DT, MemoryUnmanaged> vals_out_type;
912 const size_t InvalidNum = OrdinalTraits<size_t>::invalid();
915 const size_type num_import_lids = import_lids.size();
919 "Unpack and combine into CRS",
920 range_policy(0, num_import_lids),
921 KOKKOS_LAMBDA(
const size_t i,
int& k_error) {
922 typedef typename std::remove_reference<
decltype(new_start_row(0))>::type atomic_incr_type;
923 const size_t num_bytes = num_packets_per_lid(i);
924 const size_t offset = offsets(i);
925 if (num_bytes == 0) {
929 size_t num_ent = unpackRowCount<LO>(imports.data(), offset, num_bytes);
930 if (num_ent == InvalidNum) {
934 const LO lcl_row = import_lids(i);
935 const size_t start_row = atomic_fetch_add(&new_start_row(lcl_row), atomic_incr_type(num_ent));
936 const size_t end_row = start_row + num_ent;
938 gids_out_type gids_out = subview(tgt_colind, slice(start_row, end_row));
939 vals_out_type vals_out = subview(tgt_vals, slice(start_row, end_row));
940 pids_out_type pids_out = subview(tgt_pids, slice(start_row, end_row));
942 k_error += unpackRow<ST, LO, GO>(gids_out, pids_out, vals_out,
943 imports.data(), offset, num_bytes,
944 num_ent, bytes_per_value);
947 for (
size_t j = 0; j < static_cast<size_t>(num_ent); ++j) {
948 const int pid = pids_out(j);
949 pids_out(j) = (pid != my_pid) ? pid : -1;
957template <
typename LocalMatrix,
typename LocalMap,
typename BufferDeviceType>
959 const LocalMatrix& local_matrix,
960 const LocalMap& local_col_map,
962#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
963 const Kokkos::View<const char*, BufferDeviceType, void, void>& imports,
964 const Kokkos::View<const size_t*, BufferDeviceType, void, void>& num_packets_per_lid,
966 const Kokkos::View<const char*, BufferDeviceType>& imports,
967 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
974 const typename PackTraits<int>::input_array_type& src_pids,
975 const typename PackTraits<int>::output_array_type& tgt_pids,
976 const size_t num_same_ids,
977 const size_t tgt_num_rows,
978 const size_t tgt_num_nonzeros,
979 const int my_tgt_pid,
980 const size_t bytes_per_value) {
981 using Kokkos::MemoryUnmanaged;
982 using Kokkos::parallel_for;
983 using Kokkos::subview;
988 typedef typename DT::execution_space XS;
989 typedef typename Kokkos::View<LO*, DT>::size_type size_type;
990 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t>> range_policy;
991 typedef BufferDeviceType BDT;
993 const char prefix[] =
"unpackAndCombineIntoCrsArrays: ";
995 const size_t N = tgt_num_rows;
999 const int my_pid = my_tgt_pid;
1003 range_policy(0, N + 1),
1004 KOKKOS_LAMBDA(
const size_t i) {
1010 range_policy(0, num_same_ids),
1011 KOKKOS_LAMBDA(
const size_t i) {
1012 const LO tgt_lid =
static_cast<LO
>(i);
1013 const LO src_lid =
static_cast<LO
>(i);
1014 tgt_rowptr(tgt_lid) = local_matrix.graph.row_map(src_lid + 1) - local_matrix.graph.row_map(src_lid);
1018 const size_type num_permute_to_lids = permute_to_lids.extent(0);
1020 range_policy(0, num_permute_to_lids),
1021 KOKKOS_LAMBDA(
const size_t i) {
1022 const LO tgt_lid = permute_to_lids(i);
1023 const LO src_lid = permute_from_lids(i);
1024 tgt_rowptr(tgt_lid) = local_matrix.graph.row_map(src_lid + 1) - local_matrix.graph.row_map(src_lid);
1028 const size_type num_import_lids = import_lids.extent(0);
1029 View<size_t*, DT> offsets(
"offsets", num_import_lids + 1);
1032#ifdef HAVE_TPETRA_DEBUG
1034 auto nth_offset_h = getEntryOnHost(offsets, num_import_lids);
1035 const bool condition =
1036 nth_offset_h !=
static_cast<size_t>(imports.extent(0));
1037 TEUCHOS_TEST_FOR_EXCEPTION(condition, std::logic_error, prefix <<
"The final offset in bytes " << nth_offset_h <<
" != imports.size() = " << imports.extent(0) <<
". Please report this bug to the Tpetra developers.");
1043 setupRowPointersForRemotes<LO, DT, BDT>(tgt_rowptr,
1044 import_lids, imports, num_packets_per_lid, offsets);
1045 TEUCHOS_TEST_FOR_EXCEPTION(k_error != 0, std::logic_error, prefix <<
" Error transferring data to target row pointers. "
1046 "Please report this bug to the Tpetra developers.");
1050 View<size_t*, DT> new_start_row(
"new_start_row", N + 1);
1053 makeCrsRowPtrFromLengths(tgt_rowptr, new_start_row);
1056 copyDataFromSameIDs(tgt_colind, tgt_pids, tgt_vals, new_start_row,
1057 tgt_rowptr, src_pids, local_matrix, local_col_map, num_same_ids, my_pid);
1059 copyDataFromPermuteIDs(tgt_colind, tgt_pids, tgt_vals, new_start_row,
1060 tgt_rowptr, src_pids, permute_to_lids, permute_from_lids,
1061 local_matrix, local_col_map, my_pid);
1063 if (imports.extent(0) <= 0) {
1067 int unpack_err = unpackAndCombineIntoCrsArrays2(tgt_colind, tgt_pids,
1068 tgt_vals, new_start_row, offsets, import_lids, imports, num_packets_per_lid,
1069 local_matrix, local_col_map, my_pid, bytes_per_value);
1070 TEUCHOS_TEST_FOR_EXCEPTION(
1071 unpack_err != 0, std::logic_error, prefix <<
"unpack loop failed. This "
1072 "should never happen. Please report this bug to the Tpetra developers.");
1118template <
typename ST,
typename LO,
typename GO,
typename Node>
1121 const Teuchos::ArrayView<const char>& imports,
1123 const Teuchos::ArrayView<const LO>&
importLIDs,
1127 typedef typename Node::device_type device_type;
1129 static_assert(std::is_same<device_type, typename local_matrix_device_type::device_type>::value,
1130 "Node::device_type and LocalMatrix::device_type must be the same.");
1148 imports.size(),
true,
"imports");
1150 auto local_matrix =
sourceMatrix.getLocalMatrixDevice();
1151 auto local_col_map =
sourceMatrix.getColMap()->getLocalMap();
1162 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix(
1167template <
typename ST,
typename LO,
typename GO,
typename NT>
1168void unpackCrsMatrixAndCombineNew(
1170 Kokkos::DualView<
char*,
1173 Kokkos::DualView<
size_t*,
1176 const Kokkos::DualView<
const LO*,
1183 using device_type =
typename crs_matrix_type::device_type;
1184 using local_matrix_device_type =
typename crs_matrix_type::local_matrix_device_type;
1185 using buffer_device_type =
typename dist_object_type::buffer_device_type;
1187 static_assert(std::is_same<device_type, typename local_matrix_device_type::device_type>::value,
1188 "crs_matrix_type::device_type and local_matrix_device_type::device_type "
1189 "must be the same.");
1194 auto num_packets_per_lid_d = numPacketsPerLID.view_device();
1196 TEUCHOS_ASSERT(!importLIDs.need_sync_device());
1197 auto import_lids_d = importLIDs.view_device();
1199 if (imports.need_sync_device()) {
1200 imports.sync_device();
1202 auto imports_d = imports.view_device();
1204 auto local_matrix = sourceMatrix.getLocalMatrixDevice();
1205 auto local_col_map = sourceMatrix.getColMap()->getLocalMap();
1206 typedef decltype(local_col_map) local_map_type;
1208 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix<
1209 local_matrix_device_type,
1211 buffer_device_type>(local_matrix, local_col_map, imports_d, num_packets_per_lid_d,
1212 import_lids_d, combineMode);
1270template <
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1274 const Teuchos::ArrayView<const LocalOrdinal>&
importLIDs,
1275 const Teuchos::ArrayView<const char>& imports,
1280 const Teuchos::ArrayView<const LocalOrdinal>&
permuteToLIDs,
1282 using Kokkos::MemoryUnmanaged;
1284 typedef typename Node::device_type DT;
1285 const char prefix[] =
"unpackAndCombineWithOwningPIDsCount: ";
1289 "permuteFromLIDs.size() = "
1295 "CrsMatrix 'sourceMatrix' must be locally indexed.");
1298 "numPacketsPerLID.size() = "
1301 auto local_matrix =
sourceMatrix.getLocalMatrixDevice();
1303 using kokkos_device_type = Kokkos::Device<
typename Node::device_type::execution_space,
1304 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>;
1306#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
1314 "permute_from_lids");
1316#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
1317 Kokkos::View<const char*, kokkos_device_type, void, void>
imports_d =
1319 Kokkos::View<const char*, kokkos_device_type>
imports_d =
1322 imports.getRawPtr(),
1323 imports.size(),
true,
1326#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
1334 "num_packets_per_lid");
1336 return UnpackAndCombineCrsMatrixImpl::unpackAndCombineWithOwningPIDsCount(
1356template <
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1360 Kokkos::Device<
typename Node::device_type::execution_space,
1361 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1368 const Kokkos::View<
const char*,
1369 Kokkos::Device<
typename Node::device_type::execution_space,
1370 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1377 const Kokkos::View<
const size_t*,
1378 Kokkos::Device<
typename Node::device_type::execution_space,
1379 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1386 const size_t numSameIDs,
1388 Kokkos::Device<
typename Node::device_type::execution_space,
1389 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1397 Kokkos::Device<
typename Node::device_type::execution_space,
1398 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1407 Kokkos::View<size_t*, typename Node::device_type>&
crs_rowptr_d,
1408 Kokkos::View<GlobalOrdinal*, typename Node::device_type>&
crs_colind_d,
1410 const Teuchos::ArrayView<const int>&
SourcePids,
1411 Kokkos::View<int*, typename Node::device_type>&
TargetPids) {
1412 using execution_space =
typename Node::execution_space;
1415 using Kokkos::deep_copy;
1418 using Teuchos::ArrayView;
1419 using Teuchos::outArg;
1420 using Teuchos::REDUCE_MAX;
1421 using Teuchos::reduceAll;
1423 typedef typename Node::device_type DT;
1426 typedef typename matrix_type::impl_scalar_type ST;
1428 const char prefix[] =
"Tpetra::Details::unpackAndCombineIntoCrsArrays_new: ";
1429 Teuchos::RCP<Tpetra::Details::ProfilingRegion> tm;
1431 using Kokkos::MemoryUnmanaged;
1435 "permute_from_lids_d.size() = "
1441 "CrsMatrix 'sourceMatrix' must be locally indexed.");
1444 "num_packets_per_lid_d.size() = "
1447 auto local_matrix =
sourceMatrix.getLocalMatrixDevice();
1452 UnpackAndCombineCrsMatrixImpl::unpackAndCombineWithOwningPIDsCount(
1471 Kokkos::deep_copy(execution_space(),
TargetPids, -1);
1474 auto local_col_map =
sourceMatrix.getColMap()->getLocalMap();
1486 size_t bytes_per_value = 0;
1500 if (local_matrix.values.extent(0) > 0) {
1501 const ST&
val = local_matrix.values(0);
1507 Teuchos::reduceAll<int, size_t>(*(
sourceMatrix.getComm()),
1508 Teuchos::REDUCE_MAX,
1510 outArg(bytes_per_value));
1514 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsArrays(
1525 Kokkos::parallel_for(
1526 "setLocalEntriesToPID", Kokkos::RangePolicy<typename DT::execution_space>(0,
TargetPids.size()),
KOKKOS_LAMBDA(
const size_t i) {
1532template <
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1536 Kokkos::Device<
typename Node::device_type::execution_space,
1537 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1544 const Kokkos::View<
const char*,
1545 Kokkos::Device<
typename Node::device_type::execution_space,
1546 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1553 const Kokkos::View<
const size_t*,
1554 Kokkos::Device<
typename Node::device_type::execution_space,
1555 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1562 const size_t numSameIDs,
1564 Kokkos::Device<
typename Node::device_type::execution_space,
1565 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1573 Kokkos::Device<
typename Node::device_type::execution_space,
1574 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1584 Teuchos::ArrayRCP<GlobalOrdinal>&
CRS_colind,
1585 Teuchos::ArrayRCP<Scalar>&
CRS_vals,
1586 const Teuchos::ArrayView<const int>&
SourcePids,
1588 using execution_space =
typename Node::execution_space;
1591 using Kokkos::deep_copy;
1594 using Teuchos::ArrayView;
1595 using Teuchos::outArg;
1596 using Teuchos::REDUCE_MAX;
1597 using Teuchos::reduceAll;
1599 typedef typename Node::device_type DT;
1602 typedef typename matrix_type::impl_scalar_type ST;
1604 const char prefix[] =
"Tpetra::Details::unpackAndCombineIntoCrsArrays_new: ";
1605 Teuchos::RCP<Tpetra::Details::ProfilingRegion> tm;
1607 using Kokkos::MemoryUnmanaged;
1611 "permute_from_lids_d.size() = "
1617 "CrsMatrix 'sourceMatrix' must be locally indexed.");
1620 "num_packets_per_lid_d.size() = "
1623 auto local_matrix =
sourceMatrix.getLocalMatrixDevice();
1628 UnpackAndCombineCrsMatrixImpl::unpackAndCombineWithOwningPIDsCount(
1652 auto local_col_map =
sourceMatrix.getColMap()->getLocalMap();
1665#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE
1666 static_assert(!std::is_same<
1667 typename std::remove_const<
1668 typename std::decay<
1670 std::complex<double>>::value,
1671 "CRS_vals::value_type is std::complex<double>; this should never happen"
1672 ", since std::complex does not work in Kokkos::View objects.");
1679#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE
1680 static_assert(!std::is_same<
1681 typename decltype(
crs_vals_d)::non_const_value_type,
1682 std::complex<double>>::value,
1683 "crs_vals_d::non_const_value_type is std::complex<double>; this should "
1684 "never happen, since std::complex does not work in Kokkos::View objects.");
1697 size_t bytes_per_value = 0;
1711 if (local_matrix.values.extent(0) > 0) {
1712 const ST&
val = local_matrix.values(0);
1718 Teuchos::reduceAll<int, size_t>(*(
sourceMatrix.getComm()),
1719 Teuchos::REDUCE_MAX,
1721 outArg(bytes_per_value));
1724#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE
1725 static_assert(!std::is_same<
1726 typename decltype(
crs_vals_d)::non_const_value_type,
1727 std::complex<double>>::value,
1728 "crs_vals_d::non_const_value_type is std::complex<double>; this should "
1729 "never happen, since std::complex does not work in Kokkos::View objects.");
1733 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsArrays(
1768#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT_KOKKOS_DEPRECATED_CODE_4_ON(ST, LO, GO, NT) \
1770 Details::unpackCrsMatrixAndCombine<ST, LO, GO, NT>( \
1771 const CrsMatrix<ST, LO, GO, NT>&, \
1772 const Teuchos::ArrayView<const char>&, \
1773 const Teuchos::ArrayView<const size_t>&, \
1774 const Teuchos::ArrayView<const LO>&, \
1778 Details::unpackAndCombineWithOwningPIDsCount<ST, LO, GO, NT>( \
1779 const CrsMatrix<ST, LO, GO, NT>&, \
1780 const Teuchos::ArrayView<const LO>&, \
1781 const Teuchos::ArrayView<const char>&, \
1782 const Teuchos::ArrayView<const size_t>&, \
1786 const Teuchos::ArrayView<const LO>&, \
1787 const Teuchos::ArrayView<const LO>&); \
1789 Details::unpackCrsMatrixAndCombineNew<ST, LO, GO, NT>( \
1790 const CrsMatrix<ST, LO, GO, NT>&, \
1791 Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>, \
1792 Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>, \
1793 const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1795 const CombineMode); \
1797 Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT>( \
1798 const CrsMatrix<ST, LO, GO, NT>&, \
1799 const Kokkos::View<LO const*, \
1800 Kokkos::Device<typename NT::device_type::execution_space, \
1801 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1803 const Kokkos::View<const char*, \
1804 Kokkos::Device<typename NT::device_type::execution_space, \
1805 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1807 const Kokkos::View<const size_t*, \
1808 Kokkos::Device<typename NT::device_type::execution_space, \
1809 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1812 const Kokkos::View<LO const*, \
1813 Kokkos::Device<typename NT::device_type::execution_space, \
1814 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1816 const Kokkos::View<LO const*, \
1817 Kokkos::Device<typename NT::device_type::execution_space, \
1818 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1822 Kokkos::View<size_t*, typename NT::device_type>&, \
1823 Kokkos::View<GO*, typename NT::device_type>&, \
1824 Kokkos::View<typename CrsMatrix<ST, LO, GO, NT>::impl_scalar_type*, typename NT::device_type>&, \
1825 const Teuchos::ArrayView<const int>&, \
1826 Kokkos::View<int*, typename NT::device_type>&); \
1828 Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT>( \
1829 const CrsMatrix<ST, LO, GO, NT>&, \
1830 const Kokkos::View<LO const*, \
1831 Kokkos::Device<typename NT::device_type::execution_space, \
1832 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1834 const Kokkos::View<const char*, \
1835 Kokkos::Device<typename NT::device_type::execution_space, \
1836 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1838 const Kokkos::View<const size_t*, \
1839 Kokkos::Device<typename NT::device_type::execution_space, \
1840 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1843 const Kokkos::View<LO const*, \
1844 Kokkos::Device<typename NT::device_type::execution_space, \
1845 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1847 const Kokkos::View<LO const*, \
1848 Kokkos::Device<typename NT::device_type::execution_space, \
1849 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1853 Teuchos::ArrayRCP<size_t>&, \
1854 Teuchos::ArrayRCP<GO>&, \
1855 Teuchos::ArrayRCP<ST>&, \
1856 const Teuchos::ArrayView<const int>&, \
1857 Teuchos::Array<int>&);
1859#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT_KOKKOS_DEPRECATED_CODE_4_OFF(ST, LO, GO, NT) \
1861 Details::unpackCrsMatrixAndCombine<ST, LO, GO, NT>( \
1862 const CrsMatrix<ST, LO, GO, NT>&, \
1863 const Teuchos::ArrayView<const char>&, \
1864 const Teuchos::ArrayView<const size_t>&, \
1865 const Teuchos::ArrayView<const LO>&, \
1869 Details::unpackAndCombineWithOwningPIDsCount<ST, LO, GO, NT>( \
1870 const CrsMatrix<ST, LO, GO, NT>&, \
1871 const Teuchos::ArrayView<const LO>&, \
1872 const Teuchos::ArrayView<const char>&, \
1873 const Teuchos::ArrayView<const size_t>&, \
1877 const Teuchos::ArrayView<const LO>&, \
1878 const Teuchos::ArrayView<const LO>&); \
1880 Details::unpackCrsMatrixAndCombineNew<ST, LO, GO, NT>( \
1881 const CrsMatrix<ST, LO, GO, NT>&, \
1882 Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>, \
1883 Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>, \
1884 const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1886 const CombineMode); \
1888 Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT>( \
1889 const CrsMatrix<ST, LO, GO, NT>&, \
1890 const Kokkos::View<LO const*, \
1891 Kokkos::Device<typename NT::device_type::execution_space, \
1892 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1893 const Kokkos::View<const char*, \
1894 Kokkos::Device<typename NT::device_type::execution_space, \
1895 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1896 const Kokkos::View<const size_t*, \
1897 Kokkos::Device<typename NT::device_type::execution_space, \
1898 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1900 const Kokkos::View<LO const*, \
1901 Kokkos::Device<typename NT::device_type::execution_space, \
1902 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1903 const Kokkos::View<LO const*, \
1904 Kokkos::Device<typename NT::device_type::execution_space, \
1905 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1908 Kokkos::View<size_t*, typename NT::device_type>&, \
1909 Kokkos::View<GO*, typename NT::device_type>&, \
1910 Kokkos::View<typename CrsMatrix<ST, LO, GO, NT>::impl_scalar_type*, typename NT::device_type>&, \
1911 const Teuchos::ArrayView<const int>&, \
1912 Kokkos::View<int*, typename NT::device_type>&); \
1914 Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT>( \
1915 const CrsMatrix<ST, LO, GO, NT>&, \
1916 const Kokkos::View<LO const*, \
1917 Kokkos::Device<typename NT::device_type::execution_space, \
1918 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1919 const Kokkos::View<const char*, \
1920 Kokkos::Device<typename NT::device_type::execution_space, \
1921 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1922 const Kokkos::View<const size_t*, \
1923 Kokkos::Device<typename NT::device_type::execution_space, \
1924 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1926 const Kokkos::View<LO const*, \
1927 Kokkos::Device<typename NT::device_type::execution_space, \
1928 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1929 const Kokkos::View<LO const*, \
1930 Kokkos::Device<typename NT::device_type::execution_space, \
1931 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1934 Teuchos::ArrayRCP<size_t>&, \
1935 Teuchos::ArrayRCP<GO>&, \
1936 Teuchos::ArrayRCP<ST>&, \
1937 const Teuchos::ArrayView<const int>&, \
1938 Teuchos::Array<int>&);
1940#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
1941#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT(ST, LO, GO, NT) \
1942 TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT_KOKKOS_DEPRECATED_CODE_4_ON(ST, LO, GO, NT)
1944#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT(ST, LO, GO, NT) \
1945 TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT_KOKKOS_DEPRECATED_CODE_4_OFF(ST, LO, GO, NT)
Declaration of the Tpetra::CrsMatrix class.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
size_t compute_total_num_entries(const Kokkos::View< const size_t *, BDT > &num_packets_per_lid, const Kokkos::View< const size_t *, DT > &offsets, const Kokkos::View< const char *, BDT > &imports)
Total number of entries in any row of the packed matrix.
void unpackAndCombineIntoCrsMatrix(const LocalMatrix &local_matrix, const LocalMap &local_map, const Kokkos::View< const char *, BufferDeviceType > &imports, const Kokkos::View< const size_t *, BufferDeviceType > &num_packets_per_lid, const typename PackTraits< typename LocalMap::local_ordinal_type >::input_array_type import_lids, const Tpetra::CombineMode combine_mode)
Perform the unpack operation for the matrix.
size_t compute_maximum_num_entries(const Kokkos::View< const size_t *, BDT > &num_packets_per_lid, const Kokkos::View< const size_t *, DT > &offsets, const Kokkos::View< const char *, BDT > &imports)
Maximum number of entries in any row of the packed matrix.
bool compute_batch_info(const View1 &batches_per_lid, View2 &batch_info)
Compute the index and batch number associated with each batch.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
typename row_matrix_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
Struct that holds views of the contents of a CrsMatrix.
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
"Local" part of Map suitable for Kokkos kernels.
KOKKOS_INLINE_FUNCTION LocalOrdinal getLocalElement(const GlobalOrdinal globalIndex) const
Get the local index corresponding to the given global index. (device only)
LocalOrdinal local_ordinal_type
The type of local indices.
GlobalOrdinal global_ordinal_type
The type of global indices.
DeviceType device_type
The device type.
Kokkos::parallel_reduce functor to determine the number of entries (to unpack) in a KokkosSparse::Crs...
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
Implementation details of Tpetra.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
void unpackCrsMatrixAndCombine(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &importLIDs, size_t constantNumPackets, CombineMode combineMode)
Unpack the imported column indices and values, and combine into matrix.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
CombineMode
Rule for combining data in an Import or Export.
@ REPLACE
Replace existing values with new values.
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
@ INSERT
Insert new values that don't currently exist.
Traits class for packing / unpacking data of type T.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > unpackArray(value_type outBuf[], const char inBuf[], const size_t numEnt)
Unpack numEnt value_type entries from the given input buffer of bytes, to the given output buffer of ...
static KOKKOS_INLINE_FUNCTION size_t unpackValue(T &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
Kokkos::View< value_type *, Kokkos::AnonymousSpace > output_array_type
The type of an output array of value_type.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.
Unpacks and combines a single row of the CrsMatrix.
int error() const
Host function for getting the error.