10#ifndef TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_DEF_HPP
11#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_DEF_HPP
15#include "TpetraCore_config.h"
16#include "Kokkos_Core.hpp"
17#include "Teuchos_Array.hpp"
18#include "Teuchos_ArrayView.hpp"
19#include "Teuchos_OrdinalTraits.hpp"
20#include "Teuchos_TimeMonitor.hpp"
28#include "Tpetra_Details_DefaultTypes.hpp"
59namespace UnpackAndCombineCrsMatrixImpl {
70template <
class ST,
class LO,
class GO>
79 const size_t bytes_per_value) {
84 bool unpack_pids =
pids_out.size() > 0;
101 const char*
const pids_in = unpack_pids ? imports +
pids_beg :
nullptr;
112 Kokkos::pair<int, size_t>
p;
151template <
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
156 typedef typename local_matrix_type::value_type ST;
160 typedef typename DT::execution_space XS;
162 typedef Kokkos::View<const size_t*, BufferDeviceType>
163 num_packets_per_lid_type;
164 typedef Kokkos::View<const size_t*, DT> offsets_type;
165 typedef Kokkos::View<const char*, BufferDeviceType> input_buffer_type;
166 typedef Kokkos::View<const LO*, BufferDeviceType> import_lids_type;
168 typedef Kokkos::View<int, DT> error_type;
169 using member_type =
typename Kokkos::TeamPolicy<XS>::member_type;
171 static_assert(std::is_same<LO, typename local_matrix_type::ordinal_type>::value,
172 "LocalMap::local_ordinal_type and "
173 "LocalMatrix::ordinal_type must be the same.");
177 input_buffer_type imports;
178 num_packets_per_lid_type num_packets_per_lid;
179 import_lids_type import_lids;
180 Kokkos::View<const LO* [2], DT> batch_info;
181 offsets_type offsets;
184 size_t bytes_per_value;
186 error_type error_code;
211 , error_code(
"error") {}
214 void operator()(member_type team_member)
const {
215 using Kokkos::MemoryUnmanaged;
216 using Kokkos::subview;
219 const LO
batch = team_member.league_rank();
231 const size_t buf_size = imports.size();
250 "*** Error: UnpackCrsMatrixAndCombineFunctor: "
251 "At row %d, the expected number of bytes (%d) != number of unpacked bytes (%d)\n",
254 Kokkos::atomic_compare_exchange(error_code.data(), 0, 21);
260 "*** Error: UnpackCrsMatrixAndCombineFunctor: "
261 "At row %d, the offset (%d) > buffer size (%d)\n",
264 Kokkos::atomic_compare_exchange(error_code.data(), 0, 22);
296 "*** Error: UnpackCrsMatrixAndCombineFunctor: "
297 "At row %d, number of entries (%d) != number of entries unpacked (%d)\n",
300 Kokkos::atomic_compare_exchange(error_code.data(), 0, 23);
306 Kokkos::parallel_for(
308 [=, *
this](
const LO&
j) {
325 if (combine_mode ==
ADD) {
330 (
void)local_matrix.sumIntoValues(
337 }
else if (combine_mode ==
REPLACE) {
342 (
void)local_matrix.replaceValues(
352 "*** Error: UnpackCrsMatrixAndCombineFunctor: "
353 "At row %d, an unknown error occurred during unpack\n",
355 Kokkos::atomic_compare_exchange(error_code.data(), 0, 31);
359 team_member.team_barrier();
364 auto error_code_h = Kokkos::create_mirror_view_and_copy(
365 Kokkos::HostSpace(), error_code);
371struct MaxNumEntTag {};
372struct TotNumEntTag {};
382template <
class LO,
class DT,
class BDT>
385 typedef Kokkos::View<const size_t*, BDT> num_packets_per_lid_type;
386 typedef Kokkos::View<const size_t*, DT> offsets_type;
387 typedef Kokkos::View<const char*, BDT> input_buffer_type;
393 num_packets_per_lid_type num_packets_per_lid;
394 offsets_type offsets;
395 input_buffer_type imports;
406 operator()(
const MaxNumEntTag,
const LO
i,
value_type& update)
const {
408 const size_t num_bytes = num_packets_per_lid(
i);
411 const char*
const in_buf = imports.data() + offsets(
i);
420 join(
const MaxNumEntTag,
423 if (dst < src) dst = src;
429 const size_t num_bytes = num_packets_per_lid(
i);
432 const char*
const in_buf = imports.data() + offsets(
i);
446template <
class LO,
class DT,
class BDT>
449 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
450 const Kokkos::View<const size_t*, DT>& offsets,
451 const Kokkos::View<const char*, BDT>& imports) {
452 typedef typename DT::execution_space XS;
453 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>,
460 static_cast<LO
>(num_packets_per_lid.extent(0));
461 size_t max_num_ent = 0;
462 Kokkos::parallel_reduce(
"Max num entries in CRS",
475template <
class LO,
class DT,
class BDT>
478 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
479 const Kokkos::View<const size_t*, DT>& offsets,
480 const Kokkos::View<const char*, BDT>& imports) {
481 typedef typename DT::execution_space XS;
482 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>, TotNumEntTag>
range_policy;
487 static_cast<LO
>(num_packets_per_lid.extent(0));
488 Kokkos::parallel_reduce(
"Total num entries in CRS to unpack",
497 unpackRowCount(
const char imports[],
511 return static_cast<size_t>(num_ent_LO);
518template <
class View1,
class View2>
523 using LO =
typename View2::value_type;
527 batch_info(
batch, 0) =
static_cast<LO
>(
i);
532 return batch == batch_info.extent(0);
542template <
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
546 const Kokkos::View<const char*, BufferDeviceType>& imports,
547 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
550 using ST =
typename LocalMatrix::value_type;
553 using XS =
typename DT::execution_space;
555 "Tpetra::Details::UnpackAndCombineCrsMatrixImpl::"
556 "unpackAndCombineIntoCrsMatrix: ";
558 const size_t num_import_lids =
static_cast<size_t>(import_lids.extent(0));
567 std::invalid_argument,
568 prefix <<
"ABSMAX combine mode is not yet implemented for a matrix that has a "
569 "static graph (i.e., was constructed with the CrsMatrix constructor "
570 "that takes a const CrsGraph pointer).");
573 std::invalid_argument,
574 prefix <<
"INSERT combine mode is not allowed if the matrix has a static graph "
575 "(i.e., was constructed with the CrsMatrix constructor that takes a "
576 "const CrsGraph pointer).");
580 std::invalid_argument,
581 prefix <<
"Invalid combine mode; should never get "
582 "here! Please report this bug to the Tpetra developers.");
588 std::invalid_argument,
590 "numPacketsPerLID.size() ("
591 << num_packets_per_lid.extent(0) <<
").");
605 Kokkos::View<LO* [2], DT> batch_info(
"",
num_batches);
608 Kokkos::parallel_reduce(
609 Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t>>(0,
num_import_lids),
612 imports.data(), offsets(
i), num_packets_per_lid(
i));
636 const bool atomic = XS().concurrency() != 1;
651 using policy = Kokkos::TeamPolicy<XS, Kokkos::IndexType<LO>>;
653 if (!Spaces::is_gpu_exec_space<XS>() ||
team_size == Teuchos::OrdinalTraits<size_t>::invalid()) {
659 auto error_code =
f.error();
663 prefix <<
"UnpackCrsMatrixAndCombineFunctor reported error code " << error_code);
666template <
class LocalMatrix,
class BufferDeviceType>
672 const Kokkos::View<const char*, BufferDeviceType, void, void>& imports,
673 const Kokkos::View<const size_t*, BufferDeviceType, void, void>& num_packets_per_lid,
675 const Kokkos::View<const char*, BufferDeviceType>& imports,
676 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
679 using Kokkos::parallel_reduce;
680 typedef typename LocalMatrix::ordinal_type LO;
681 typedef typename LocalMatrix::device_type device_type;
682 typedef typename device_type::execution_space XS;
683 typedef typename Kokkos::View<LO*, device_type>::size_type size_type;
684 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>>
range_policy;
697 update +=
static_cast<size_t>(local_matrix.graph.row_map[
lid + 1] - local_matrix.graph.row_map[
lid]);
704 num_items =
static_cast<LO
>(permute_from_lids.extent(0));
708 range_policy(0, num_items),
709 KOKKOS_LAMBDA(
const LO i,
size_t& update) {
710 const LO lid = permute_from_lids(i);
711 update +=
static_cast<size_t>(local_matrix.graph.row_map[lid + 1] - local_matrix.graph.row_map[lid]);
719 const size_type np = num_packets_per_lid.extent(0);
720 Kokkos::View<size_t*, device_type> offsets(
"offsets", np + 1);
723 compute_total_num_entries<LO, device_type, BDT>(num_packets_per_lid,
731template <
class LO,
class DT,
class BDT>
732int setupRowPointersForRemotes(
735 const Kokkos::View<const char*, BDT>& imports,
736 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
738 using Kokkos::parallel_reduce;
739 typedef typename DT::execution_space XS;
741 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type>>
range_policy;
744 const size_type
N = num_packets_per_lid.extent(0);
748 "Setup row pointers for remotes",
752 const size_t num_bytes = num_packets_per_lid(
i);
753 const size_t offset = offsets(
i);
766void makeCrsRowPtrFromLengths(
769 using Kokkos::parallel_scan;
770 typedef typename DT::execution_space XS;
771 typedef typename Kokkos::View<size_t*, DT>::size_type size_type;
772 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type>>
range_policy;
776 KOKKOS_LAMBDA(
const size_t&
i,
size_t& update,
const bool&
final) {
786template <
class LocalMatrix,
class LocalMap>
787void copyDataFromSameIDs(
789 const typename PackTraits<int>::output_array_type& tgt_pids,
791 const Kokkos::View<size_t*, typename LocalMap::device_type>& new_start_row,
793 const typename PackTraits<int>::input_array_type& src_pids,
794 const LocalMatrix& local_matrix,
795 const LocalMap& local_col_map,
796 const size_t num_same_ids,
798 using Kokkos::parallel_for;
801 typedef typename DT::execution_space XS;
802 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t>> range_policy;
805 range_policy(0, num_same_ids),
806 KOKKOS_LAMBDA(
const size_t i) {
807 typedef typename std::remove_reference<
decltype(new_start_row(0))>::type atomic_incr_type;
809 const LO src_lid =
static_cast<LO
>(i);
810 size_t src_row = local_matrix.graph.row_map(src_lid);
812 const LO tgt_lid =
static_cast<LO
>(i);
813 const size_t tgt_row = tgt_rowptr(tgt_lid);
815 const size_t nsr = local_matrix.graph.row_map(src_lid + 1) - local_matrix.graph.row_map(src_lid);
816 Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
818 for (
size_t j = local_matrix.graph.row_map(src_lid);
819 j < local_matrix.graph.row_map(src_lid + 1); ++j) {
820 LO src_col = local_matrix.graph.entries(j);
821 tgt_vals(tgt_row + j - src_row) = local_matrix.values(j);
822 tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
823 tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
828template <
class LocalMatrix,
class LocalMap>
829void copyDataFromPermuteIDs(
831 const typename PackTraits<int>::output_array_type& tgt_pids,
833 const Kokkos::View<size_t*, typename LocalMap::device_type>& new_start_row,
835 const typename PackTraits<int>::input_array_type& src_pids,
838 const LocalMatrix& local_matrix,
839 const LocalMap& local_col_map,
841 using Kokkos::parallel_for;
844 typedef typename DT::execution_space XS;
845 typedef typename PackTraits<LO>::input_array_type::size_type size_type;
846 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type>> range_policy;
848 const size_type num_permute_to_lids = permute_to_lids.extent(0);
851 range_policy(0, num_permute_to_lids),
852 KOKKOS_LAMBDA(
const size_t i) {
853 typedef typename std::remove_reference<
decltype(new_start_row(0))>::type atomic_incr_type;
855 const LO src_lid = permute_from_lids(i);
856 const size_t src_row = local_matrix.graph.row_map(src_lid);
858 const LO tgt_lid = permute_to_lids(i);
859 const size_t tgt_row = tgt_rowptr(tgt_lid);
861 size_t nsr = local_matrix.graph.row_map(src_lid + 1) - local_matrix.graph.row_map(src_lid);
862 Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
864 for (
size_t j = local_matrix.graph.row_map(src_lid);
865 j < local_matrix.graph.row_map(src_lid + 1); ++j) {
866 LO src_col = local_matrix.graph.entries(j);
867 tgt_vals(tgt_row + j - src_row) = local_matrix.values(j);
868 tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
869 tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
874template <
typename LocalMatrix,
typename LocalMap,
typename BufferDeviceType>
875int unpackAndCombineIntoCrsArrays2(
877 const typename PackTraits<int>::output_array_type& tgt_pids,
879 const Kokkos::View<size_t*, typename LocalMap::device_type>& new_start_row,
882#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
883 const Kokkos::View<const char*, BufferDeviceType, void, void>& imports,
884 const Kokkos::View<const size_t*, BufferDeviceType, void, void>& num_packets_per_lid,
886 const Kokkos::View<const char*, BufferDeviceType>& imports,
887 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
892 const size_t bytes_per_value) {
893 using Kokkos::atomic_fetch_add;
894 using Kokkos::MemoryUnmanaged;
895 using Kokkos::parallel_reduce;
896 using Kokkos::subview;
902 typedef typename LocalMatrix::value_type ST;
903 typedef typename DT::execution_space XS;
904 typedef typename Kokkos::View<LO*, DT>::size_type size_type;
905 typedef typename Kokkos::pair<size_type, size_type> slice;
906 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type>> range_policy;
908 typedef View<int*, DT, MemoryUnmanaged> pids_out_type;
909 typedef View<GO*, DT, MemoryUnmanaged> gids_out_type;
910 typedef View<ST*, DT, MemoryUnmanaged> vals_out_type;
912 const size_t InvalidNum = OrdinalTraits<size_t>::invalid();
915 const size_type num_import_lids = import_lids.size();
919 "Unpack and combine into CRS",
920 range_policy(0, num_import_lids),
921 KOKKOS_LAMBDA(
const size_t i,
int& k_error) {
922 typedef typename std::remove_reference<
decltype(new_start_row(0))>::type atomic_incr_type;
923 const size_t num_bytes = num_packets_per_lid(i);
924 const size_t offset = offsets(i);
925 if (num_bytes == 0) {
929 size_t num_ent = unpackRowCount<LO>(imports.data(), offset, num_bytes);
930 if (num_ent == InvalidNum) {
934 const LO lcl_row = import_lids(i);
935 const size_t start_row = atomic_fetch_add(&new_start_row(lcl_row), atomic_incr_type(num_ent));
936 const size_t end_row = start_row + num_ent;
938 gids_out_type gids_out = subview(tgt_colind, slice(start_row, end_row));
939 vals_out_type vals_out = subview(tgt_vals, slice(start_row, end_row));
940 pids_out_type pids_out = subview(tgt_pids, slice(start_row, end_row));
942 k_error += unpackRow<ST, LO, GO>(gids_out, pids_out, vals_out,
943 imports.data(), offset, num_bytes,
944 num_ent, bytes_per_value);
947 for (
size_t j = 0; j < static_cast<size_t>(num_ent); ++j) {
948 const int pid = pids_out(j);
949 pids_out(j) = (pid != my_pid) ? pid : -1;
957template <
typename LocalMatrix,
typename LocalMap,
typename BufferDeviceType>
959 const LocalMatrix& local_matrix,
960 const LocalMap& local_col_map,
962#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
963 const Kokkos::View<const char*, BufferDeviceType, void, void>& imports,
964 const Kokkos::View<const size_t*, BufferDeviceType, void, void>& num_packets_per_lid,
966 const Kokkos::View<const char*, BufferDeviceType>& imports,
967 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
974 const typename PackTraits<int>::input_array_type& src_pids,
975 const typename PackTraits<int>::output_array_type& tgt_pids,
976 const size_t num_same_ids,
977 const size_t tgt_num_rows,
978 const size_t tgt_num_nonzeros,
979 const int my_tgt_pid,
980 const size_t bytes_per_value) {
981 using Kokkos::MemoryUnmanaged;
982 using Kokkos::parallel_for;
983 using Kokkos::subview;
988 typedef typename DT::execution_space XS;
989 typedef typename Kokkos::View<LO*, DT>::size_type size_type;
990 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t>> range_policy;
991 typedef BufferDeviceType BDT;
993 const char prefix[] =
"unpackAndCombineIntoCrsArrays: ";
995 const size_t N = tgt_num_rows;
999 const int my_pid = my_tgt_pid;
1003 range_policy(0, N + 1),
1004 KOKKOS_LAMBDA(
const size_t i) {
1010 range_policy(0, num_same_ids),
1011 KOKKOS_LAMBDA(
const size_t i) {
1012 const LO tgt_lid =
static_cast<LO
>(i);
1013 const LO src_lid =
static_cast<LO
>(i);
1014 tgt_rowptr(tgt_lid) = local_matrix.graph.row_map(src_lid + 1) - local_matrix.graph.row_map(src_lid);
1018 const size_type num_permute_to_lids = permute_to_lids.extent(0);
1020 range_policy(0, num_permute_to_lids),
1021 KOKKOS_LAMBDA(
const size_t i) {
1022 const LO tgt_lid = permute_to_lids(i);
1023 const LO src_lid = permute_from_lids(i);
1024 tgt_rowptr(tgt_lid) = local_matrix.graph.row_map(src_lid + 1) - local_matrix.graph.row_map(src_lid);
1028 const size_type num_import_lids = import_lids.extent(0);
1029 View<size_t*, DT> offsets(
"offsets", num_import_lids + 1);
1032#ifdef HAVE_TPETRA_DEBUG
1034 auto nth_offset_h = getEntryOnHost(offsets, num_import_lids);
1035 const bool condition =
1036 nth_offset_h !=
static_cast<size_t>(imports.extent(0));
1037 TEUCHOS_TEST_FOR_EXCEPTION(condition, std::logic_error, prefix <<
"The final offset in bytes " << nth_offset_h <<
" != imports.size() = " << imports.extent(0) <<
". Please report this bug to the Tpetra developers.");
1043 setupRowPointersForRemotes<LO, DT, BDT>(tgt_rowptr,
1044 import_lids, imports, num_packets_per_lid, offsets);
1045 TEUCHOS_TEST_FOR_EXCEPTION(k_error != 0, std::logic_error, prefix <<
" Error transferring data to target row pointers. "
1046 "Please report this bug to the Tpetra developers.");
1050 View<size_t*, DT> new_start_row(
"new_start_row", N + 1);
1053 makeCrsRowPtrFromLengths(tgt_rowptr, new_start_row);
1056 copyDataFromSameIDs(tgt_colind, tgt_pids, tgt_vals, new_start_row,
1057 tgt_rowptr, src_pids, local_matrix, local_col_map, num_same_ids, my_pid);
1059 copyDataFromPermuteIDs(tgt_colind, tgt_pids, tgt_vals, new_start_row,
1060 tgt_rowptr, src_pids, permute_to_lids, permute_from_lids,
1061 local_matrix, local_col_map, my_pid);
1063 if (imports.extent(0) <= 0) {
1067 int unpack_err = unpackAndCombineIntoCrsArrays2(tgt_colind, tgt_pids,
1068 tgt_vals, new_start_row, offsets, import_lids, imports, num_packets_per_lid,
1069 local_matrix, local_col_map, my_pid, bytes_per_value);
1070 TEUCHOS_TEST_FOR_EXCEPTION(
1071 unpack_err != 0, std::logic_error, prefix <<
"unpack loop failed. This "
1072 "should never happen. Please report this bug to the Tpetra developers.");
1118template <
typename ST,
typename LO,
typename GO,
typename Node>
1121 const Teuchos::ArrayView<const char>& imports,
1123 const Teuchos::ArrayView<const LO>&
importLIDs,
1127 typedef typename Node::device_type device_type;
1129 static_assert(std::is_same<device_type, typename local_matrix_device_type::device_type>::value,
1130 "Node::device_type and LocalMatrix::device_type must be the same.");
1148 imports.size(),
true,
"imports");
1150 auto local_matrix =
sourceMatrix.getLocalMatrixDevice();
1151 auto local_col_map =
sourceMatrix.getColMap()->getLocalMap();
1162 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix(
1167template <
typename ST,
typename LO,
typename GO,
typename NT>
1168void unpackCrsMatrixAndCombineNew(
1170 Kokkos::DualView<
char*,
1173 Kokkos::DualView<
size_t*,
1176 const Kokkos::DualView<
const LO*,
1183 using device_type =
typename crs_matrix_type::device_type;
1184 using local_matrix_device_type =
typename crs_matrix_type::local_matrix_device_type;
1185 using buffer_device_type =
typename dist_object_type::buffer_device_type;
1187 static_assert(std::is_same<device_type, typename local_matrix_device_type::device_type>::value,
1188 "crs_matrix_type::device_type and local_matrix_device_type::device_type "
1189 "must be the same.");
1194 auto num_packets_per_lid_d = numPacketsPerLID.view_device();
1196 TEUCHOS_ASSERT(!importLIDs.need_sync_device());
1197 auto import_lids_d = importLIDs.view_device();
1199 if (imports.need_sync_device()) {
1200 imports.sync_device();
1202 auto imports_d = imports.view_device();
1204 auto local_matrix = sourceMatrix.getLocalMatrixDevice();
1205 auto local_col_map = sourceMatrix.getColMap()->getLocalMap();
1206 typedef decltype(local_col_map) local_map_type;
1208 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix<
1209 local_matrix_device_type,
1211 buffer_device_type>(local_matrix, local_col_map, imports_d, num_packets_per_lid_d,
1212 import_lids_d, combineMode);
1270template <
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1274 const Teuchos::ArrayView<const LocalOrdinal>&
importLIDs,
1275 const Teuchos::ArrayView<const char>& imports,
1280 const Teuchos::ArrayView<const LocalOrdinal>&
permuteToLIDs,
1282 using Kokkos::MemoryUnmanaged;
1284 typedef typename Node::device_type DT;
1285 const char prefix[] =
"unpackAndCombineWithOwningPIDsCount: ";
1289 "permuteFromLIDs.size() = "
1295 "CrsMatrix 'sourceMatrix' must be locally indexed.");
1298 "numPacketsPerLID.size() = "
1301 auto local_matrix =
sourceMatrix.getLocalMatrixDevice();
1303 using kokkos_device_type = Kokkos::Device<
typename Node::device_type::execution_space,
1304 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>;
1306#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
1314 "permute_from_lids");
1316#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
1317 Kokkos::View<const char*, kokkos_device_type, void, void>
imports_d =
1319 Kokkos::View<const char*, kokkos_device_type>
imports_d =
1322 imports.getRawPtr(),
1323 imports.size(),
true,
1326#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
1334 "num_packets_per_lid");
1336 return UnpackAndCombineCrsMatrixImpl::unpackAndCombineWithOwningPIDsCount(
1356template <
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1360 Kokkos::Device<
typename Node::device_type::execution_space,
1361 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1368 const Kokkos::View<
const char*,
1369 Kokkos::Device<
typename Node::device_type::execution_space,
1370 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1377 const Kokkos::View<
const size_t*,
1378 Kokkos::Device<
typename Node::device_type::execution_space,
1379 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1386 const size_t numSameIDs,
1388 Kokkos::Device<
typename Node::device_type::execution_space,
1389 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1397 Kokkos::Device<
typename Node::device_type::execution_space,
1398 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1407 Kokkos::View<size_t*, typename Node::device_type>&
crs_rowptr_d,
1408 Kokkos::View<GlobalOrdinal*, typename Node::device_type>&
crs_colind_d,
1410 const Teuchos::ArrayView<const int>&
SourcePids,
1411 Kokkos::View<int*, typename Node::device_type>&
TargetPids) {
1412 using execution_space =
typename Node::execution_space;
1415 using Kokkos::deep_copy;
1418 using Teuchos::ArrayView;
1419 using Teuchos::outArg;
1420 using Teuchos::REDUCE_MAX;
1421 using Teuchos::reduceAll;
1423 typedef typename Node::device_type DT;
1426 typedef typename matrix_type::impl_scalar_type ST;
1428 const char prefix[] =
"Tpetra::Details::unpackAndCombineIntoCrsArrays_new: ";
1429#ifdef HAVE_TPETRA_MMM_TIMINGS
1430 using Teuchos::TimeMonitor;
1431 Teuchos::RCP<TimeMonitor> tm;
1434 using Kokkos::MemoryUnmanaged;
1438 "permute_from_lids_d.size() = "
1444 "CrsMatrix 'sourceMatrix' must be locally indexed.");
1447 "num_packets_per_lid_d.size() = "
1450 auto local_matrix =
sourceMatrix.getLocalMatrixDevice();
1453#ifdef HAVE_TPETRA_MMM_TIMINGS
1454 tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"unpackAndCombineWithOwningPIDsCount"))));
1457 UnpackAndCombineCrsMatrixImpl::unpackAndCombineWithOwningPIDsCount(
1460#ifdef HAVE_TPETRA_MMM_TIMINGS
1464#ifdef HAVE_TPETRA_MMM_TIMINGS
1465 tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"resize CRS pointers"))));
1470#ifdef HAVE_TPETRA_MMM_TIMINGS
1482 Kokkos::deep_copy(execution_space(),
TargetPids, -1);
1485 auto local_col_map =
sourceMatrix.getColMap()->getLocalMap();
1487#ifdef HAVE_TPETRA_MMM_TIMINGS
1488 tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"create mirror views from inputs"))));
1497#ifdef HAVE_TPETRA_MMM_TIMINGS
1501 size_t bytes_per_value = 0;
1515 if (local_matrix.values.extent(0) > 0) {
1516 const ST&
val = local_matrix.values(0);
1522 Teuchos::reduceAll<int, size_t>(*(
sourceMatrix.getComm()),
1523 Teuchos::REDUCE_MAX,
1525 outArg(bytes_per_value));
1528#ifdef HAVE_TPETRA_MMM_TIMINGS
1529 tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"unpackAndCombineIntoCrsArrays"))));
1531 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsArrays(
1537#ifdef HAVE_TPETRA_MMM_TIMINGS
1542#ifdef HAVE_TPETRA_MMM_TIMINGS
1543 tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"copy back to host"))));
1546 Kokkos::parallel_for(
1547 "setLocalEntriesToPID", Kokkos::RangePolicy<typename DT::execution_space>(0,
TargetPids.size()),
KOKKOS_LAMBDA(
const size_t i) {
1553template <
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1557 Kokkos::Device<
typename Node::device_type::execution_space,
1558 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1565 const Kokkos::View<
const char*,
1566 Kokkos::Device<
typename Node::device_type::execution_space,
1567 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1574 const Kokkos::View<
const size_t*,
1575 Kokkos::Device<
typename Node::device_type::execution_space,
1576 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1583 const size_t numSameIDs,
1585 Kokkos::Device<
typename Node::device_type::execution_space,
1586 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1594 Kokkos::Device<
typename Node::device_type::execution_space,
1595 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename Node::device_type>>
1605 Teuchos::ArrayRCP<GlobalOrdinal>&
CRS_colind,
1606 Teuchos::ArrayRCP<Scalar>&
CRS_vals,
1607 const Teuchos::ArrayView<const int>&
SourcePids,
1609 using execution_space =
typename Node::execution_space;
1612 using Kokkos::deep_copy;
1615 using Teuchos::ArrayView;
1616 using Teuchos::outArg;
1617 using Teuchos::REDUCE_MAX;
1618 using Teuchos::reduceAll;
1620 typedef typename Node::device_type DT;
1623 typedef typename matrix_type::impl_scalar_type ST;
1625 const char prefix[] =
"Tpetra::Details::unpackAndCombineIntoCrsArrays_new: ";
1626#ifdef HAVE_TPETRA_MMM_TIMINGS
1627 using Teuchos::TimeMonitor;
1628 Teuchos::RCP<TimeMonitor> tm;
1631 using Kokkos::MemoryUnmanaged;
1635 "permute_from_lids_d.size() = "
1641 "CrsMatrix 'sourceMatrix' must be locally indexed.");
1644 "num_packets_per_lid_d.size() = "
1647 auto local_matrix =
sourceMatrix.getLocalMatrixDevice();
1650#ifdef HAVE_TPETRA_MMM_TIMINGS
1651 tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"unpackAndCombineWithOwningPIDsCount"))));
1654 UnpackAndCombineCrsMatrixImpl::unpackAndCombineWithOwningPIDsCount(
1657#ifdef HAVE_TPETRA_MMM_TIMINGS
1661#ifdef HAVE_TPETRA_MMM_TIMINGS
1662 tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"resize CRS pointers"))));
1668#ifdef HAVE_TPETRA_MMM_TIMINGS
1684 auto local_col_map =
sourceMatrix.getColMap()->getLocalMap();
1686#ifdef HAVE_TPETRA_MMM_TIMINGS
1687 tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"create mirror views from inputs"))));
1699#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE
1700 static_assert(!std::is_same<
1701 typename std::remove_const<
1702 typename std::decay<
1704 std::complex<double>>::value,
1705 "CRS_vals::value_type is std::complex<double>; this should never happen"
1706 ", since std::complex does not work in Kokkos::View objects.");
1713#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE
1714 static_assert(!std::is_same<
1715 typename decltype(
crs_vals_d)::non_const_value_type,
1716 std::complex<double>>::value,
1717 "crs_vals_d::non_const_value_type is std::complex<double>; this should "
1718 "never happen, since std::complex does not work in Kokkos::View objects.");
1729#ifdef HAVE_TPETRA_MMM_TIMINGS
1733 size_t bytes_per_value = 0;
1747 if (local_matrix.values.extent(0) > 0) {
1748 const ST&
val = local_matrix.values(0);
1754 Teuchos::reduceAll<int, size_t>(*(
sourceMatrix.getComm()),
1755 Teuchos::REDUCE_MAX,
1757 outArg(bytes_per_value));
1760#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE
1761 static_assert(!std::is_same<
1762 typename decltype(
crs_vals_d)::non_const_value_type,
1763 std::complex<double>>::value,
1764 "crs_vals_d::non_const_value_type is std::complex<double>; this should "
1765 "never happen, since std::complex does not work in Kokkos::View objects.");
1768#ifdef HAVE_TPETRA_MMM_TIMINGS
1769 tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"unpackAndCombineIntoCrsArrays"))));
1771 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsArrays(
1777#ifdef HAVE_TPETRA_MMM_TIMINGS
1782#ifdef HAVE_TPETRA_MMM_TIMINGS
1783 tm = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"copy back to host"))));
1810#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT_KOKKOS_DEPRECATED_CODE_4_ON(ST, LO, GO, NT) \
1812 Details::unpackCrsMatrixAndCombine<ST, LO, GO, NT>( \
1813 const CrsMatrix<ST, LO, GO, NT>&, \
1814 const Teuchos::ArrayView<const char>&, \
1815 const Teuchos::ArrayView<const size_t>&, \
1816 const Teuchos::ArrayView<const LO>&, \
1820 Details::unpackAndCombineWithOwningPIDsCount<ST, LO, GO, NT>( \
1821 const CrsMatrix<ST, LO, GO, NT>&, \
1822 const Teuchos::ArrayView<const LO>&, \
1823 const Teuchos::ArrayView<const char>&, \
1824 const Teuchos::ArrayView<const size_t>&, \
1828 const Teuchos::ArrayView<const LO>&, \
1829 const Teuchos::ArrayView<const LO>&); \
1831 Details::unpackCrsMatrixAndCombineNew<ST, LO, GO, NT>( \
1832 const CrsMatrix<ST, LO, GO, NT>&, \
1833 Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>, \
1834 Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>, \
1835 const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1837 const CombineMode); \
1839 Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT>( \
1840 const CrsMatrix<ST, LO, GO, NT>&, \
1841 const Kokkos::View<LO const*, \
1842 Kokkos::Device<typename NT::device_type::execution_space, \
1843 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1845 const Kokkos::View<const char*, \
1846 Kokkos::Device<typename NT::device_type::execution_space, \
1847 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1849 const Kokkos::View<const size_t*, \
1850 Kokkos::Device<typename NT::device_type::execution_space, \
1851 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1854 const Kokkos::View<LO const*, \
1855 Kokkos::Device<typename NT::device_type::execution_space, \
1856 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1858 const Kokkos::View<LO const*, \
1859 Kokkos::Device<typename NT::device_type::execution_space, \
1860 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1864 Kokkos::View<size_t*, typename NT::device_type>&, \
1865 Kokkos::View<GO*, typename NT::device_type>&, \
1866 Kokkos::View<typename CrsMatrix<ST, LO, GO, NT>::impl_scalar_type*, typename NT::device_type>&, \
1867 const Teuchos::ArrayView<const int>&, \
1868 Kokkos::View<int*, typename NT::device_type>&); \
1870 Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT>( \
1871 const CrsMatrix<ST, LO, GO, NT>&, \
1872 const Kokkos::View<LO const*, \
1873 Kokkos::Device<typename NT::device_type::execution_space, \
1874 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1876 const Kokkos::View<const char*, \
1877 Kokkos::Device<typename NT::device_type::execution_space, \
1878 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1880 const Kokkos::View<const size_t*, \
1881 Kokkos::Device<typename NT::device_type::execution_space, \
1882 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1885 const Kokkos::View<LO const*, \
1886 Kokkos::Device<typename NT::device_type::execution_space, \
1887 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1889 const Kokkos::View<LO const*, \
1890 Kokkos::Device<typename NT::device_type::execution_space, \
1891 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>, \
1895 Teuchos::ArrayRCP<size_t>&, \
1896 Teuchos::ArrayRCP<GO>&, \
1897 Teuchos::ArrayRCP<ST>&, \
1898 const Teuchos::ArrayView<const int>&, \
1899 Teuchos::Array<int>&);
1901#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT_KOKKOS_DEPRECATED_CODE_4_OFF(ST, LO, GO, NT) \
1903 Details::unpackCrsMatrixAndCombine<ST, LO, GO, NT>( \
1904 const CrsMatrix<ST, LO, GO, NT>&, \
1905 const Teuchos::ArrayView<const char>&, \
1906 const Teuchos::ArrayView<const size_t>&, \
1907 const Teuchos::ArrayView<const LO>&, \
1911 Details::unpackAndCombineWithOwningPIDsCount<ST, LO, GO, NT>( \
1912 const CrsMatrix<ST, LO, GO, NT>&, \
1913 const Teuchos::ArrayView<const LO>&, \
1914 const Teuchos::ArrayView<const char>&, \
1915 const Teuchos::ArrayView<const size_t>&, \
1919 const Teuchos::ArrayView<const LO>&, \
1920 const Teuchos::ArrayView<const LO>&); \
1922 Details::unpackCrsMatrixAndCombineNew<ST, LO, GO, NT>( \
1923 const CrsMatrix<ST, LO, GO, NT>&, \
1924 Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>, \
1925 Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>, \
1926 const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1928 const CombineMode); \
1930 Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT>( \
1931 const CrsMatrix<ST, LO, GO, NT>&, \
1932 const Kokkos::View<LO const*, \
1933 Kokkos::Device<typename NT::device_type::execution_space, \
1934 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1935 const Kokkos::View<const char*, \
1936 Kokkos::Device<typename NT::device_type::execution_space, \
1937 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1938 const Kokkos::View<const size_t*, \
1939 Kokkos::Device<typename NT::device_type::execution_space, \
1940 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1942 const Kokkos::View<LO const*, \
1943 Kokkos::Device<typename NT::device_type::execution_space, \
1944 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1945 const Kokkos::View<LO const*, \
1946 Kokkos::Device<typename NT::device_type::execution_space, \
1947 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1950 Kokkos::View<size_t*, typename NT::device_type>&, \
1951 Kokkos::View<GO*, typename NT::device_type>&, \
1952 Kokkos::View<typename CrsMatrix<ST, LO, GO, NT>::impl_scalar_type*, typename NT::device_type>&, \
1953 const Teuchos::ArrayView<const int>&, \
1954 Kokkos::View<int*, typename NT::device_type>&); \
1956 Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT>( \
1957 const CrsMatrix<ST, LO, GO, NT>&, \
1958 const Kokkos::View<LO const*, \
1959 Kokkos::Device<typename NT::device_type::execution_space, \
1960 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1961 const Kokkos::View<const char*, \
1962 Kokkos::Device<typename NT::device_type::execution_space, \
1963 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1964 const Kokkos::View<const size_t*, \
1965 Kokkos::Device<typename NT::device_type::execution_space, \
1966 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1968 const Kokkos::View<LO const*, \
1969 Kokkos::Device<typename NT::device_type::execution_space, \
1970 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1971 const Kokkos::View<LO const*, \
1972 Kokkos::Device<typename NT::device_type::execution_space, \
1973 Tpetra::Details::DefaultTypes::comm_buffer_memory_space<typename NT::device_type>>>, \
1976 Teuchos::ArrayRCP<size_t>&, \
1977 Teuchos::ArrayRCP<GO>&, \
1978 Teuchos::ArrayRCP<ST>&, \
1979 const Teuchos::ArrayView<const int>&, \
1980 Teuchos::Array<int>&);
1982#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
1983#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT(ST, LO, GO, NT) \
1984 TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT_KOKKOS_DEPRECATED_CODE_4_ON(ST, LO, GO, NT)
1986#define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT(ST, LO, GO, NT) \
1987 TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT_KOKKOS_DEPRECATED_CODE_4_OFF(ST, LO, GO, NT)
Declaration of the Tpetra::CrsMatrix class.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
size_t compute_total_num_entries(const Kokkos::View< const size_t *, BDT > &num_packets_per_lid, const Kokkos::View< const size_t *, DT > &offsets, const Kokkos::View< const char *, BDT > &imports)
Total number of entries in any row of the packed matrix.
void unpackAndCombineIntoCrsMatrix(const LocalMatrix &local_matrix, const LocalMap &local_map, const Kokkos::View< const char *, BufferDeviceType > &imports, const Kokkos::View< const size_t *, BufferDeviceType > &num_packets_per_lid, const typename PackTraits< typename LocalMap::local_ordinal_type >::input_array_type import_lids, const Tpetra::CombineMode combine_mode)
Perform the unpack operation for the matrix.
size_t compute_maximum_num_entries(const Kokkos::View< const size_t *, BDT > &num_packets_per_lid, const Kokkos::View< const size_t *, DT > &offsets, const Kokkos::View< const char *, BDT > &imports)
Maximum number of entries in any row of the packed matrix.
bool compute_batch_info(const View1 &batches_per_lid, View2 &batch_info)
Compute the index and batch number associated with each batch.
Struct that holds views of the contents of a CrsMatrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
typename row_matrix_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
"Local" part of Map suitable for Kokkos kernels.
KOKKOS_INLINE_FUNCTION LocalOrdinal getLocalElement(const GlobalOrdinal globalIndex) const
Get the local index corresponding to the given global index. (device only)
LocalOrdinal local_ordinal_type
The type of local indices.
GlobalOrdinal global_ordinal_type
The type of global indices.
DeviceType device_type
The device type.
Kokkos::parallel_reduce functor to determine the number of entries (to unpack) in a KokkosSparse::Crs...
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
Implementation details of Tpetra.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
void unpackCrsMatrixAndCombine(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &importLIDs, size_t constantNumPackets, CombineMode combineMode)
Unpack the imported column indices and values, and combine into matrix.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
CombineMode
Rule for combining data in an Import or Export.
@ REPLACE
Replace existing values with new values.
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
@ INSERT
Insert new values that don't currently exist.
Traits class for packing / unpacking data of type T.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > unpackArray(value_type outBuf[], const char inBuf[], const size_t numEnt)
Unpack numEnt value_type entries from the given input buffer of bytes, to the given output buffer of ...
static KOKKOS_INLINE_FUNCTION size_t unpackValue(T &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
Kokkos::View< value_type *, Kokkos::AnonymousSpace > output_array_type
The type of an output array of value_type.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.
Unpacks and combines a single row of the CrsMatrix.
int error() const
Host function for getting the error.