10#ifndef TPETRA_IMPORT_UTIL2_HPP
11#define TPETRA_IMPORT_UTIL2_HPP
18#include "Tpetra_ConfigDefs.hpp"
19#include "Tpetra_Import.hpp"
20#include "Tpetra_HashTable.hpp"
21#include "Tpetra_Map.hpp"
23#include "Tpetra_Distributor.hpp"
25#include "Tpetra_Vector.hpp"
26#include "Kokkos_DualView.hpp"
27#include "KokkosSparse_SortCrs.hpp"
28#include <Teuchos_Array.hpp>
31#include <Kokkos_Core.hpp>
32#include <Kokkos_UnorderedMap.hpp>
33#include <Kokkos_Sort.hpp>
41struct LocalRemoteCount {
42 size_t numLocalColGIDs;
43 size_t numRemoteColGIDs;
45 KOKKOS_INLINE_FUNCTION
48 , numRemoteColGIDs(0) {}
50 KOKKOS_INLINE_FUNCTION
51 LocalRemoteCount& operator+=(
const LocalRemoteCount& src) {
52 numLocalColGIDs += src.numLocalColGIDs;
53 numRemoteColGIDs += src.numRemoteColGIDs;
63struct reduction_identity<LocalRemoteCount> {
64 KOKKOS_FORCEINLINE_FUNCTION
static LocalRemoteCount sum() {
65 return LocalRemoteCount();
71namespace Import_Util {
75template <
typename Scalar,
typename Ordinal>
77 const Teuchos::ArrayView<Ordinal>& CRS_colind,
78 const Teuchos::ArrayView<Scalar>& CRS_vals);
80template <
typename Ordinal>
82 const Teuchos::ArrayView<Ordinal>& CRS_colind);
84template <
typename rowptr_array_type,
typename colind_array_type,
typename vals_array_type>
86 const colind_array_type& CRS_colind,
87 const vals_array_type& CRS_vals);
89template <
typename rowptr_array_type,
typename colind_array_type>
91 const colind_array_type& CRS_colind);
93template <
typename local_crs_matrix>
94void sortCrsMatrix(local_crs_matrix& lclMatrix);
96template <
typename local_crs_graph>
97void sortCrsGraph(local_crs_graph& lclGraph);
103template <
typename Scalar,
typename Ordinal>
105 const Teuchos::ArrayView<Ordinal>& CRS_colind,
106 const Teuchos::ArrayView<Scalar>& CRS_vals);
108template <
typename Ordinal>
110 const Teuchos::ArrayView<Ordinal>& CRS_colind);
112template <
class rowptr_view_type,
class colind_view_type,
class vals_view_type>
114 colind_view_type& CRS_colind,
115 vals_view_type& CRS_vals,
116 const ::KokkosSparse::SortAlgorithm option);
133template <
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
135 const Teuchos::ArrayView<const size_t>& rowptr,
136 const Teuchos::ArrayView<LocalOrdinal>& colind_LID,
137 const Teuchos::ArrayView<GlobalOrdinal>& colind_GID,
139 const Teuchos::ArrayView<const int>& owningPIDs,
140 Teuchos::Array<int>& remotePIDs,
142 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
148template <
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
150 const Kokkos::View<const size_t*, typename Node::device_type> rowptr_view,
151 const Kokkos::View<LocalOrdinal*, typename Node::device_type> colind_LID_view,
152 const Kokkos::View<GlobalOrdinal*, typename Node::device_type> colind_GID_view,
154 const Kokkos::View<const int*, typename Node::device_type> owningPIDs_view,
155 Teuchos::Array<int>& remotePIDs,
157 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
172template <
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
174 bool useReverseModeForOwnership,
175 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferForMigratingData,
176 bool useReverseModeForMigration,
187namespace Import_Util {
189template <
typename PID,
typename GlobalOrdinal>
190bool sort_PID_then_GID(
const std::pair<PID, GlobalOrdinal>& a,
191 const std::pair<PID, GlobalOrdinal>& b) {
192 if (a.first != b.first)
193 return (a.first < b.first);
194 return (a.second < b.second);
197template <
typename PID,
198 typename GlobalOrdinal,
199 typename LocalOrdinal>
200bool sort_PID_then_pair_GID_LID(
const std::pair<PID, std::pair<GlobalOrdinal, LocalOrdinal>>& a,
201 const std::pair<PID, std::pair<GlobalOrdinal, LocalOrdinal>>& b) {
202 if (a.first != b.first)
203 return a.first < b.first;
205 return (a.second.first < b.second.first);
208template <
typename Scalar,
209 typename LocalOrdinal,
210 typename GlobalOrdinal,
212void reverseNeighborDiscovery(
const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& SourceMatrix,
213 const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type& rowptr,
214 const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type& colind,
218 Teuchos::ArrayRCP<int>& type3PIDs,
219 Teuchos::ArrayRCP<LocalOrdinal>& type3LIDs,
220 Teuchos::RCP<
const Teuchos::Comm<int>>& rcomm) {
221#ifdef HAVE_TPETRACORE_MPI
222 using ::Tpetra::Details::Behavior;
223 typedef LocalOrdinal LO;
224 typedef GlobalOrdinal GO;
225 typedef std::pair<GO, GO> pidgidpair_t;
227 const std::string prefix{
" Import_Util2::ReverseND:: "};
228 const std::string label(
"IU2::Neighbor");
231 if (MyImporter.is_null())
return;
233 std::ostringstream errstr;
235 auto const comm = MyDomainMap->getComm();
237 MPI_Comm rawComm = getRawMpiComm(*comm);
238 const int MyPID = rcomm->getRank();
246 Distributor& Distor = MyImporter->getDistributor();
247 const size_t NumRecvs = Distor.getNumReceives();
248 const size_t NumSends = Distor.getNumSends();
249 auto RemoteLIDs = MyImporter->getRemoteLIDs();
250 auto const ProcsFrom = Distor.getProcsFrom();
251 auto const ProcsTo = Distor.getProcsTo();
253 auto LengthsFrom = Distor.getLengthsFrom();
254 auto MyColMap = SourceMatrix.getColMap();
255 const size_t numCols = MyColMap->getLocalNumElements();
256 RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> target = MyImporter->getTargetMap();
260 Teuchos::Array<int> RemotePIDOrder(numCols, -1);
263 for (
size_t i = 0, j = 0; i < NumRecvs; ++i) {
264 for (
size_t k = 0; k < LengthsFrom[i]; ++k) {
265 const int pid = ProcsFrom[i];
267 RemotePIDOrder[RemoteLIDs[j]] = i;
279 Teuchos::Array<int> ReverseSendSizes(NumRecvs, 0);
281 Teuchos::Array<Teuchos::ArrayRCP<pidgidpair_t>> RSB(NumRecvs);
296 Teuchos::Array<std::set<pidgidpair_t>> pidsets(NumRecvs);
299 for (
size_t i = 0; i < NumExportLIDs; i++) {
300 LO lid = ExportLIDs[i];
301 GO exp_pid = ExportPIDs[i];
302 for (
auto j = rowptr[lid]; j < rowptr[lid + 1]; j++) {
303 int pid_order = RemotePIDOrder[colind[j]];
304 if (pid_order != -1) {
305 GO gid = MyColMap->getGlobalElement(colind[j]);
306 auto tpair = pidgidpair_t(exp_pid, gid);
307 pidsets[pid_order].insert(pidsets[pid_order].end(), tpair);
316 for (
auto&& ps : pidsets) {
318 RSB[jj] = Teuchos::arcp(
new pidgidpair_t[s], 0, s,
true);
319 std::copy(ps.begin(), ps.end(), RSB[jj]);
320 ReverseSendSizes[jj] = s;
326 Teuchos::Array<int> ReverseRecvSizes(NumSends, -1);
327 Teuchos::Array<MPI_Request> rawBreq(ProcsFrom.size() + ProcsTo.size(), MPI_REQUEST_NULL);
329 const int mpi_tag_base_ = 3;
332 for (
int i = 0; i < ProcsTo.size(); ++i) {
333 int Rec_Tag = mpi_tag_base_ + ProcsTo[i];
334 int* thisrecv = (
int*)(&ReverseRecvSizes[i]);
335 MPI_Request rawRequest = MPI_REQUEST_NULL;
336 MPI_Irecv(
const_cast<int*
>(thisrecv),
343 rawBreq[mpireq_idx++] = rawRequest;
345 for (
int i = 0; i < ProcsFrom.size(); ++i) {
346 int Send_Tag = mpi_tag_base_ + MyPID;
347 int* mysend = (
int*)(&ReverseSendSizes[i]);
348 MPI_Request rawRequest = MPI_REQUEST_NULL;
356 rawBreq[mpireq_idx++] = rawRequest;
358 Teuchos::Array<MPI_Status> rawBstatus(rawBreq.size());
359#ifdef HAVE_TPETRA_DEBUG
362 MPI_Waitall(rawBreq.size(), rawBreq.getRawPtr(),
363 rawBstatus.getRawPtr());
365#ifdef HAVE_TPETRA_DEBUG
367 errstr << MyPID <<
"sE1 reverseNeighborDiscovery Mpi_Waitall error on send ";
369 std::cerr << errstr.str() << std::flush;
373 int totalexportpairrecsize = 0;
374 for (
size_t i = 0; i < NumSends; ++i) {
375 totalexportpairrecsize += ReverseRecvSizes[i];
376#ifdef HAVE_TPETRA_DEBUG
377 if (ReverseRecvSizes[i] < 0) {
378 errstr << MyPID <<
"E4 reverseNeighborDiscovery: got < 0 for receive size " << ReverseRecvSizes[i] << std::endl;
383 Teuchos::ArrayRCP<pidgidpair_t> AllReverseRecv = Teuchos::arcp(
new pidgidpair_t[totalexportpairrecsize], 0, totalexportpairrecsize,
true);
386 for (
int i = 0; i < ProcsTo.size(); ++i) {
387 int recv_data_size = ReverseRecvSizes[i] * 2;
388 int recvData_MPI_Tag = mpi_tag_base_ * 2 + ProcsTo[i];
389 MPI_Request rawRequest = MPI_REQUEST_NULL;
390 GO* rec_bptr = (GO*)(&AllReverseRecv[offset]);
391 offset += ReverseRecvSizes[i];
394 ::Tpetra::Details::MpiTypeTraits<GO>::getType(rec_bptr[0]),
399 rawBreq[mpireq_idx++] = rawRequest;
401 for (
int ii = 0; ii < ProcsFrom.size(); ++ii) {
402 GO* send_bptr = (GO*)(RSB[ii].getRawPtr());
403 MPI_Request rawSequest = MPI_REQUEST_NULL;
404 int send_data_size = ReverseSendSizes[ii] * 2;
405 int sendData_MPI_Tag = mpi_tag_base_ * 2 + MyPID;
408 ::Tpetra::Details::MpiTypeTraits<GO>::getType(send_bptr[0]),
414 rawBreq[mpireq_idx++] = rawSequest;
416#ifdef HAVE_TPETRA_DEBUG
419 MPI_Waitall(rawBreq.size(),
421 rawBstatus.getRawPtr());
422#ifdef HAVE_TPETRA_DEBUG
424 errstr << MyPID <<
"E3.r reverseNeighborDiscovery Mpi_Waitall error on receive ";
426 std::cerr << errstr.str() << std::flush;
429 std::sort(AllReverseRecv.begin(), AllReverseRecv.end(), Tpetra::Import_Util::sort_PID_then_GID<GlobalOrdinal, GlobalOrdinal>);
431 auto newEndOfPairs = std::unique(AllReverseRecv.begin(), AllReverseRecv.end());
433 if (AllReverseRecv.begin() == newEndOfPairs)
return;
434 int ARRsize = std::distance(AllReverseRecv.begin(), newEndOfPairs);
435 auto rPIDs = Teuchos::arcp(
new int[ARRsize], 0, ARRsize,
true);
436 auto rLIDs = Teuchos::arcp(
new LocalOrdinal[ARRsize], 0, ARRsize,
true);
439 for (
auto itr = AllReverseRecv.begin(); itr != newEndOfPairs; ++itr) {
440 if ((
int)(itr->first) != MyPID) {
441 rPIDs[tsize] = (int)itr->first;
442 LocalOrdinal lid = MyDomainMap->getLocalElement(itr->second);
448 type3PIDs = rPIDs.persistingView(0, tsize);
449 type3LIDs = rLIDs.persistingView(0, tsize);
452 std::cerr << errstr.str() << std::flush;
456 MPI_Abort(MPI_COMM_WORLD, -1);
462template <
typename Scalar,
typename Ordinal>
464 const Teuchos::ArrayView<Ordinal>&
CRS_colind,
465 const Teuchos::ArrayView<Scalar>&
CRS_vals) {
468 auto vals_k = Kokkos::View<Scalar*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(
CRS_vals.data(),
CRS_vals.size());
472template <
typename Ordinal>
473void sortCrsEntries(
const Teuchos::ArrayView<size_t>&
CRS_rowptr,
474 const Teuchos::ArrayView<Ordinal>&
CRS_colind) {
480template <
typename rowptr_array_type,
typename colind_array_type,
typename vals_array_type>
481void sortCrsEntries(
const rowptr_array_type& CRS_rowptr,
482 const colind_array_type& CRS_colind,
483 const vals_array_type& CRS_vals) {
484 KokkosSparse::SortAlgorithm option = KokkosSparse::SortAlgorithm::DEFAULT;
485 static constexpr bool is_cpu = std::is_same_v<typename rowptr_array_type::memory_space, Kokkos::HostSpace>;
486 if constexpr (is_cpu)
487 option = KokkosSparse::SortAlgorithm::SHELL;
488 KokkosSparse::sort_crs_matrix(CRS_rowptr, CRS_colind, CRS_vals,
489 KokkosKernels::ArithTraits<typename colind_array_type::non_const_value_type>::max(),
493template <
typename local_crs_matrix>
494void sortCrsMatrix(local_crs_matrix& lclMatrix) {
495 KokkosSparse::SortAlgorithm option = KokkosSparse::SortAlgorithm::DEFAULT;
496 static constexpr bool is_cpu = std::is_same_v<typename local_crs_matrix::device_type::memory_space, Kokkos::HostSpace>;
497 if constexpr (is_cpu)
498 option = KokkosSparse::SortAlgorithm::SHELL;
499 KokkosSparse::sort_crs_matrix(lclMatrix, option);
502template <
typename local_crs_graph>
503void sortCrsGraph(local_crs_graph& lclGraph) {
504 KokkosSparse::SortAlgorithm option = KokkosSparse::SortAlgorithm::DEFAULT;
505 static constexpr bool is_cpu = std::is_same_v<typename local_crs_graph::device_type::memory_space, Kokkos::HostSpace>;
506 if constexpr (is_cpu)
507 option = KokkosSparse::SortAlgorithm::SHELL;
508 KokkosSparse::sort_crs_graph(lclGraph, KokkosKernels::ArithTraits<typename local_crs_graph::entries_type::non_const_value_type>::max(), option);
511template <
typename rowptr_array_type,
typename colind_array_type>
513 const colind_array_type& CRS_colind) {
514 KokkosSparse::SortAlgorithm option = KokkosSparse::SortAlgorithm::DEFAULT;
515 static constexpr bool is_cpu = std::is_same_v<typename rowptr_array_type::memory_space, Kokkos::HostSpace>;
516 if constexpr (is_cpu)
517 option = KokkosSparse::SortAlgorithm::SHELL;
518 KokkosSparse::sort_crs_graph(CRS_rowptr, CRS_colind,
519 KokkosKernels::ArithTraits<typename colind_array_type::non_const_value_type>::max(),
524template <
typename Scalar,
typename Ordinal>
526 const Teuchos::ArrayView<Ordinal>&
CRS_colind,
527 const Teuchos::ArrayView<Scalar>&
CRS_vals) {
595template <
typename Ordinal>
596void sortAndMergeCrsEntries(
const Teuchos::ArrayView<size_t>&
CRS_rowptr,
597 const Teuchos::ArrayView<Ordinal>&
CRS_colind) {
598 Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type>
CRS_vals;
602template <
class rowptr_view_type,
class colind_view_type,
class vals_view_type>
603void sortAndMergeCrsEntries(rowptr_view_type& CRS_rowptr,
604 colind_view_type& CRS_colind,
605 vals_view_type& CRS_vals,
606 const ::KokkosSparse::SortAlgorithm option) {
607 using execution_space =
typename vals_view_type::execution_space;
609 auto CRS_rowptr_in = CRS_rowptr;
610 auto CRS_colind_in = CRS_colind;
611 auto CRS_vals_in = CRS_vals;
613 KokkosSparse::sort_and_merge_matrix<execution_space, rowptr_view_type,
614 colind_view_type, vals_view_type>(CRS_rowptr_in, CRS_colind_in, CRS_vals_in,
615 CRS_rowptr, CRS_colind, CRS_vals,
616 KokkosKernels::ArithTraits<typename colind_view_type::non_const_value_type>::max(),
620template <
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
621void lowCommunicationMakeColMapAndReindexSerial(
const Teuchos::ArrayView<const size_t>& rowptr,
622 const Teuchos::ArrayView<LocalOrdinal>& colind_LID,
623 const Teuchos::ArrayView<GlobalOrdinal>& colind_GID,
625 const Teuchos::ArrayView<const int>& owningPIDs,
626 Teuchos::Array<int>& remotePIDs,
628 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null) {
630 typedef LocalOrdinal LO;
631 typedef GlobalOrdinal GO;
634 const char prefix[] =
"lowCommunicationMakeColMapAndReindexSerial: ";
638 const map_type& domainMap = *domainMapRCP;
643 const size_t numDomainElements = domainMap.getLocalNumElements();
644 Teuchos::Array<bool> LocalGIDs;
645 if (numDomainElements > 0) {
646 LocalGIDs.resize(numDomainElements,
false);
657 const size_t numMyRows = rowptr.size() - 1;
658 const int hashsize = std::max(
static_cast<int>(numMyRows), 100);
661 Teuchos::Array<GO> RemoteGIDList;
662 RemoteGIDList.reserve(hashsize);
663 Teuchos::Array<int> PIDList;
664 PIDList.reserve(hashsize);
675 size_t NumLocalColGIDs = 0;
676 LO NumRemoteColGIDs = 0;
677 for (
size_t i = 0; i < numMyRows; ++i) {
678 for (
size_t j = rowptr[i]; j < rowptr[i + 1]; ++j) {
679 const GO GID = colind_GID[j];
681 const LO LID = domainMap.getLocalElement(GID);
683 const bool alreadyFound = LocalGIDs[LID];
685 LocalGIDs[LID] =
true;
690 const LO hash_value = RemoteGIDs.get(GID);
691 if (hash_value == -1) {
692 const int PID = owningPIDs[j];
693 TEUCHOS_TEST_FOR_EXCEPTION(
694 PID == -1, std::invalid_argument, prefix <<
"Cannot figure out if "
696 colind_LID[j] =
static_cast<LO
>(numDomainElements + NumRemoteColGIDs);
697 RemoteGIDs.add(GID, NumRemoteColGIDs);
698 RemoteGIDList.push_back(GID);
699 PIDList.push_back(PID);
702 colind_LID[j] =
static_cast<LO
>(numDomainElements + hash_value);
710 if (domainMap.getComm()->getSize() == 1) {
713 TEUCHOS_TEST_FOR_EXCEPTION(
714 NumRemoteColGIDs != 0, std::runtime_error, prefix <<
"There is only one "
715 "process in the domain Map's communicator, which means that there are no "
716 "\"remote\" indices. Nevertheless, some column indices are not in the "
718 if (
static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
722 colMap = domainMapRCP;
728 const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
729 GST numMyColsGST =
static_cast<GST
>(numMyCols);
731 auto req = Details::iallreduce(numMyColsGST, numGlobalCols, Teuchos::REDUCE_SUM, *domainMap.getComm());
735 Teuchos::Array<GO> ColIndices;
736 GO* RemoteColIndices = NULL;
738 ColIndices.resize(numMyCols);
739 if (NumLocalColGIDs !=
static_cast<size_t>(numMyCols)) {
740 RemoteColIndices = &ColIndices[NumLocalColGIDs];
744 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
745 RemoteColIndices[i] = RemoteGIDList[i];
749 Teuchos::Array<LO> RemotePermuteIDs(NumRemoteColGIDs);
750 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
751 RemotePermuteIDs[i] = i;
758 ColIndices.begin() + NumLocalColGIDs,
759 RemotePermuteIDs.begin());
765 remotePIDs = PIDList;
774 LO StartCurrent = 0, StartNext = 1;
775 while (StartNext < NumRemoteColGIDs) {
776 if (PIDList[StartNext] == PIDList[StartNext - 1]) {
779 Tpetra::sort2(ColIndices.begin() + NumLocalColGIDs + StartCurrent,
780 ColIndices.begin() + NumLocalColGIDs + StartNext,
781 RemotePermuteIDs.begin() + StartCurrent);
782 StartCurrent = StartNext;
786 Tpetra::sort2(ColIndices.begin() + NumLocalColGIDs + StartCurrent,
787 ColIndices.begin() + NumLocalColGIDs + StartNext,
788 RemotePermuteIDs.begin() + StartCurrent);
791 Teuchos::Array<LO> ReverseRemotePermuteIDs(NumRemoteColGIDs);
792 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
793 ReverseRemotePermuteIDs[RemotePermuteIDs[i]] = i;
797 bool use_local_permute =
false;
798 Teuchos::Array<LO> LocalPermuteIDs(numDomainElements);
810 Teuchos::ArrayView<const GO> domainGlobalElements = domainMap.getLocalElementList();
811 if (
static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
812 if (NumLocalColGIDs > 0) {
814 std::copy(domainGlobalElements.begin(), domainGlobalElements.end(),
818 LO NumLocalAgain = 0;
819 use_local_permute =
true;
820 for (
size_t i = 0; i < numDomainElements; ++i) {
822 LocalPermuteIDs[i] = NumLocalAgain;
823 ColIndices[NumLocalAgain++] = domainGlobalElements[i];
826 TEUCHOS_TEST_FOR_EXCEPTION(
827 static_cast<size_t>(NumLocalAgain) != NumLocalColGIDs,
828 std::runtime_error, prefix <<
"Local ID count test failed.");
832 for (
size_t i = 0; i < numMyRows; ++i) {
833 for (
size_t j = rowptr[i]; j < rowptr[i + 1]; ++j) {
834 const LO ID = colind_LID[j];
835 if (
static_cast<size_t>(ID) < numDomainElements) {
836 if (use_local_permute) {
837 colind_LID[j] = LocalPermuteIDs[colind_LID[j]];
843 colind_LID[j] = NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j] - numDomainElements];
850 colMap = rcp(
new map_type(numGlobalCols, ColIndices, domainMap.getIndexBase(),
855template <
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
857 const Teuchos::ArrayView<const size_t>&
rowptr,
858 const Teuchos::ArrayView<LocalOrdinal>&
colind_LID,
859 const Teuchos::ArrayView<GlobalOrdinal>&
colind_GID,
861 const Teuchos::ArrayView<const int>&
owningPIDs,
862 Teuchos::Array<int>& remotePIDs,
864 const Teuchos::RCP<Teuchos::ParameterList>&
params) {
865 using DT =
typename Node::device_type;
866 using execution_space =
typename DT::execution_space;
867 execution_space
exec;
886template <
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
888 const Kokkos::View<const size_t*, typename Node::device_type>
rowptr_view,
889 const Kokkos::View<LocalOrdinal*, typename Node::device_type>
colind_LID_view,
890 const Kokkos::View<GlobalOrdinal*, typename Node::device_type>
colind_GID_view,
892 const Kokkos::View<const int*, typename Node::device_type>
owningPIDs_view,
893 Teuchos::Array<int>& remotePIDs,
895 const Teuchos::RCP<Teuchos::ParameterList>&
params) {
901 const char prefix[] =
"lowCommunicationMakeColMapAndReindex: ";
904 typedef typename Node::device_type DT;
905 using execution_space =
typename DT::execution_space;
906 using memory_space =
typename DT::memory_space;
907 execution_space
exec;
908 using team_policy = Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
909 typedef typename map_type::local_map_type local_map_type;
929 const LO
LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
936 Kokkos::parallel_reduce(
938 const int i =
member.league_rank();
942 Kokkos::parallel_reduce(
951 LocalGIDs_view[LID] = true;
952 innerUpdate.numLocalColGIDs++;
958 innerUpdate.numRemoteColGIDs++;
960 Kokkos::abort(
"Cannot figure out if ID is owned.\n");
981 if (domainMap.getComm()->getSize() == 1) {
986 "process in the domain Map's communicator, which means that there are no "
987 "\"remote\" indices. Nevertheless, some column indices are not in the "
996 auto localColMap = colMap->getLocalMap();
997 Kokkos::parallel_for(
1005 Kokkos::View<GO*, DT> RemoteColMapIndices_unsorted(Kokkos::ViewAllocateWithoutInitializing(
"RemoteColMapIndices_unsorted"), NumRemoteColGIDs);
1006 Kokkos::View<int*, DT> RemotePIDs_unsorted(Kokkos::ViewAllocateWithoutInitializing(
"RemotePIDs_unsorted"), NumRemoteColGIDs);
1013 size_t numEnteredRemotes;
1014 Kokkos::parallel_scan(
1015 Kokkos::RangePolicy<execution_space>(0, GIDs_map.capacity()), KOKKOS_LAMBDA(
const int i,
size_t& update,
const bool final) {
1016 if (GIDs_map.valid_at(i) && GIDs_map.value_at(i) != -1) {
1018 RemoteColMapIndices_unsorted(update) = GIDs_map.key_at(i);
1019 RemotePIDs_unsorted(update) = GIDs_map.value_at(i);
1025 TEUCHOS_ASSERT(numEnteredRemotes == NumRemoteColGIDs);
1029 const size_t numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
1030 GST numMyColsGST =
static_cast<GST
>(numMyCols);
1032 auto req = Details::iallreduce(numMyColsGST, numGlobalCols, Teuchos::REDUCE_SUM, *domainMap.getComm());
1036 Kokkos::View<GO*, DT> ColMapIndices(Kokkos::ViewAllocateWithoutInitializing(
"ColMapIndices"), numMyCols);
1039 if (NumRemoteColGIDs > 0) {
1041 auto RemoteColMapIndices_sorted = Kokkos::subview(ColMapIndices, Kokkos::make_pair(NumLocalColGIDs, numMyCols));
1044 Kokkos::View<size_t*, memory_space> index(Kokkos::ViewAllocateWithoutInitializing(
"index"), NumRemoteColGIDs);
1045 Kokkos::parallel_for(
1046 Kokkos::RangePolicy<execution_space>(0, NumRemoteColGIDs), KOKKOS_LAMBDA(
const size_t i) {
1050 exec, index, KOKKOS_LAMBDA(
const size_t i,
const size_t j) {
1051 auto pid_i = RemotePIDs_unsorted(i);
1052 auto pid_j = RemotePIDs_unsorted(j);
1054 return RemoteColMapIndices_unsorted(i) < RemoteColMapIndices_unsorted(j);
1056 return pid_i < pid_j;
1059 Kokkos::View<int*, memory_space> RemotePIDs_sorted(Kokkos::ViewAllocateWithoutInitializing(
"RemotePIDs_new"), NumRemoteColGIDs);
1060 Kokkos::parallel_for(
1061 Kokkos::RangePolicy<execution_space>(0, NumRemoteColGIDs), KOKKOS_LAMBDA(
const size_t i) {
1062 RemoteColMapIndices_sorted(i) = RemoteColMapIndices_unsorted(index(i));
1063 RemotePIDs_sorted(i) = RemotePIDs_unsorted(index(i));
1068 Teuchos::Array<int> PIDList(NumRemoteColGIDs);
1069 Kokkos::View<int*, Kokkos::HostSpace> RemotePIDs_sorted_host(PIDList.data(), NumRemoteColGIDs);
1070 Kokkos::deep_copy(exec, RemotePIDs_sorted_host, RemotePIDs_sorted);
1073 remotePIDs = PIDList;
1088 if (
static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
1089 if (NumLocalColGIDs > 0) {
1091 auto LocalColMapIndices = Kokkos::subview(ColMapIndices, Kokkos::make_pair((
size_t)0, numDomainElements));
1092 Kokkos::deep_copy(LocalColMapIndices, domainMap.getMyGlobalIndicesDevice());
1096 LO NumLocalAgain = 0;
1097 Kokkos::parallel_scan(
1098 Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(
const int i, LO& update,
const bool final) {
1099 if (LocalGIDs_view(i)) {
1101 ColMapIndices(update) = domainMap_local.getGlobalElement(i);
1108 TEUCHOS_TEST_FOR_EXCEPTION(
1109 static_cast<size_t>(NumLocalAgain) != NumLocalColGIDs,
1110 std::runtime_error, prefix <<
"Local ID count test failed.");
1115 colMap = rcp(
new map_type(numGlobalCols, ColMapIndices, domainMap.getIndexBase(),
1116 domainMap.getComm(), params));
1119 auto localColMap = colMap->getLocalMap();
1120 Kokkos::parallel_for(
1121 Kokkos::RangePolicy<execution_space>(0, colind_GID_view.size()), KOKKOS_LAMBDA(
const int i) {
1122 colind_LID_view(i) = localColMap.getLocalElement(colind_GID_view(i));
1139template <
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1155#ifdef HAVE_TPETRA_DEBUG
1166 Teuchos::ArrayRCP<int> pids =
temp.getDataNonConst();
1167 Teuchos::ArrayView<int>
v_pids = pids();
1169 TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error,
"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
1173 TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error,
"Tpetra::Import_Util::getTwoTransferOwnershipVector this option not yet implemented");
1175 TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error,
"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
Declaration of the Tpetra::CrsMatrix class.
Add specializations of Teuchos::Details::MpiTypeTraits for Kokkos::complex<float> and Kokkos::complex...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer< LocalOrdinal, GlobalOrdinal, Node > &transferThatDefinesOwnership, bool useReverseModeForOwnership, const ::Tpetra::Details::Transfer< LocalOrdinal, GlobalOrdinal, Node > &transferForMigratingData, bool useReverseModeForMigration, Tpetra::Vector< int, LocalOrdinal, GlobalOrdinal, Node > &owningPIDs)
Generates an list of owning PIDs based on two transfer (aka import/export objects) Let: OwningMap = u...
void sortAndMergeCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort and merge the entries of the (raw CSR) matrix by column index within each row.
void lowCommunicationMakeColMapAndReindex(const Teuchos::ArrayView< const size_t > &rowptr, const Teuchos::ArrayView< LocalOrdinal > &colind_LID, const Teuchos::ArrayView< GlobalOrdinal > &colind_GID, const Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMapRCP, const Teuchos::ArrayView< const int > &owningPIDs, Teuchos::Array< int > &remotePIDs, Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &colMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
lowCommunicationMakeColMapAndReindex
void sortCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort the entries of the (raw CSR) matrix by column index within each row.
void getPids(const Tpetra::Import< LocalOrdinal, GlobalOrdinal, Node > &Importer, Teuchos::Array< int > &pids, bool use_minus_one_for_local)
Like getPidGidPairs, but just gets the PIDs, ordered by the column Map.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
Common base class of Import and Export.
Teuchos::ArrayView< const LO > getExportLIDs() const
List of entries in the source Map that will be sent to other processes.
size_t getNumExportIDs() const
Number of entries that must be sent by the calling process to other processes.
Teuchos::ArrayView< const int > getExportPIDs() const
List of processes to which entries will be sent.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3, const bool stableSort=false)
Sort the first array, and apply the same permutation to the second and third arrays.
@ REPLACE
Replace existing values with new values.