10#ifndef TPETRA_IMPORT_UTIL2_HPP 
   11#define TPETRA_IMPORT_UTIL2_HPP 
   18#include "Tpetra_ConfigDefs.hpp" 
   19#include "Tpetra_Import.hpp" 
   20#include "Tpetra_HashTable.hpp" 
   21#include "Tpetra_Map.hpp" 
   23#include "Tpetra_Distributor.hpp" 
   26#include "Tpetra_Vector.hpp" 
   27#include "Kokkos_DualView.hpp" 
   28#include "KokkosSparse_SortCrs.hpp" 
   29#include <Teuchos_Array.hpp> 
   31#include <Kokkos_UnorderedMap.hpp> 
   32#include <unordered_map> 
   38#include <Kokkos_Core.hpp> 
   39#include <Kokkos_Sort.hpp> 
   42namespace Import_Util {
 
   46template <
typename Scalar, 
typename Ordinal>
 
   48                    const Teuchos::ArrayView<Ordinal>& CRS_colind,
 
   49                    const Teuchos::ArrayView<Scalar>& CRS_vals);
 
   51template <
typename Ordinal>
 
   53                    const Teuchos::ArrayView<Ordinal>& CRS_colind);
 
   55template <
typename rowptr_array_type, 
typename colind_array_type, 
typename vals_array_type>
 
   57                    const colind_array_type& CRS_colind,
 
   58                    const vals_array_type& CRS_vals);
 
   60template <
typename rowptr_array_type, 
typename colind_array_type>
 
   62                    const colind_array_type& CRS_colind);
 
   68template <
typename Scalar, 
typename Ordinal>
 
   70                            const Teuchos::ArrayView<Ordinal>& CRS_colind,
 
   71                            const Teuchos::ArrayView<Scalar>& CRS_vals);
 
   73template <
typename Ordinal>
 
   75                            const Teuchos::ArrayView<Ordinal>& CRS_colind);
 
   77template <
class rowptr_view_type, 
class colind_view_type, 
class vals_view_type>
 
   79                            const colind_view_type& CRS_colind,
 
   80                            const vals_view_type& CRS_vals);
 
   97template <
typename LocalOrdinal, 
typename GlobalOrdinal, 
typename Node>
 
   99    const Teuchos::ArrayView<const size_t>& rowptr,
 
  100    const Teuchos::ArrayView<LocalOrdinal>& colind_LID,
 
  101    const Teuchos::ArrayView<GlobalOrdinal>& colind_GID,
 
  103    const Teuchos::ArrayView<const int>& owningPIDs,
 
  104    Teuchos::Array<int>& remotePIDs,
 
  111template <
typename LocalOrdinal, 
typename GlobalOrdinal, 
typename Node>
 
  113    const Kokkos::View<size_t*, typename Node::device_type> 
rowptr_view,
 
  114    const Kokkos::View<LocalOrdinal*, typename Node::device_type> 
colind_LID_view,
 
  115    const Kokkos::View<GlobalOrdinal*, typename Node::device_type> 
colind_GID_view,
 
  117    const Teuchos::ArrayView<const int>& 
owningPIDs,
 
  118    Teuchos::Array<int>& remotePIDs,
 
  134template <
typename LocalOrdinal, 
typename GlobalOrdinal, 
typename Node>
 
  149namespace Import_Util {
 
  151template <
typename PID, 
typename GlobalOrdinal>
 
  152bool sort_PID_then_GID(
const std::pair<PID, GlobalOrdinal>& a,
 
  153                       const std::pair<PID, GlobalOrdinal>& b) {
 
  154  if (a.first != b.first)
 
  155    return (a.first < b.first);
 
  156  return (a.second < b.second);
 
  159template <
typename PID,
 
  160          typename GlobalOrdinal,
 
  161          typename LocalOrdinal>
 
  162bool sort_PID_then_pair_GID_LID(
const std::pair<PID, std::pair<GlobalOrdinal, LocalOrdinal>>& a,
 
  163                                const std::pair<PID, std::pair<GlobalOrdinal, LocalOrdinal>>& b) {
 
  164  if (a.first != b.first)
 
  165    return a.first < b.first;
 
  167    return (a.second.first < b.second.first);
 
  170template <
typename Scalar,
 
  171          typename LocalOrdinal,
 
  172          typename GlobalOrdinal,
 
  174void reverseNeighborDiscovery(
const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& SourceMatrix,
 
  175                              const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type& rowptr,
 
  176                              const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type& colind,
 
  180                              Teuchos::ArrayRCP<int>& type3PIDs,
 
  181                              Teuchos::ArrayRCP<LocalOrdinal>& type3LIDs,
 
  182                              Teuchos::RCP<
const Teuchos::Comm<int>>& rcomm) {
 
  183#ifdef HAVE_TPETRACORE_MPI 
  184  using Teuchos::TimeMonitor;
 
  185  using ::Tpetra::Details::Behavior;
 
  186  typedef LocalOrdinal LO;
 
  187  typedef GlobalOrdinal GO;
 
  188  typedef std::pair<GO, GO> pidgidpair_t;
 
  190  const std::string prefix{
" Import_Util2::ReverseND:: "};
 
  191  const std::string label(
"IU2::Neighbor");
 
  194  if (MyImporter.is_null()) 
return;
 
  196  std::ostringstream errstr;
 
  198  auto const comm = MyDomainMap->getComm();
 
  200  MPI_Comm rawComm = getRawMpiComm(*comm);
 
  201  const int MyPID  = rcomm->getRank();
 
  209  Distributor& Distor   = MyImporter->getDistributor();
 
  210  const size_t NumRecvs = Distor.getNumReceives();
 
  211  const size_t NumSends = Distor.getNumSends();
 
  212  auto RemoteLIDs       = MyImporter->getRemoteLIDs();
 
  213  auto const ProcsFrom  = Distor.getProcsFrom();
 
  214  auto const ProcsTo    = Distor.getProcsTo();
 
  216  auto LengthsFrom                                                 = Distor.getLengthsFrom();
 
  217  auto MyColMap                                                    = SourceMatrix.getColMap();
 
  218  const size_t numCols                                             = MyColMap->getLocalNumElements();
 
  219  RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> target = MyImporter->getTargetMap();
 
  223  Teuchos::Array<int> RemotePIDOrder(numCols, -1);
 
  226  for (
size_t i = 0, j = 0; i < NumRecvs; ++i) {
 
  227    for (
size_t k = 0; k < LengthsFrom[i]; ++k) {
 
  228      const int pid = ProcsFrom[i];
 
  230        RemotePIDOrder[RemoteLIDs[j]] = i;
 
  242  Teuchos::Array<int> ReverseSendSizes(NumRecvs, 0);
 
  244  Teuchos::Array<Teuchos::ArrayRCP<pidgidpair_t>> RSB(NumRecvs);
 
  247#ifdef HAVE_TPETRA_MMM_TIMINGS 
  248    TimeMonitor set_all(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMallSetRSB")));
 
  261    Teuchos::Array<std::set<pidgidpair_t>> pidsets(NumRecvs);
 
  263#ifdef HAVE_TPETRA_MMM_TIMINGS 
  264      TimeMonitor set_insert(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMallSetRSBinsert")));
 
  266      for (
size_t i = 0; i < NumExportLIDs; i++) {
 
  267        LO lid     = ExportLIDs[i];
 
  268        GO exp_pid = ExportPIDs[i];
 
  269        for (
auto j = rowptr[lid]; j < rowptr[lid + 1]; j++) {
 
  270          int pid_order = RemotePIDOrder[colind[j]];
 
  271          if (pid_order != -1) {
 
  272            GO gid     = MyColMap->getGlobalElement(colind[j]);  
 
  273            auto tpair = pidgidpair_t(exp_pid, gid);
 
  274            pidsets[pid_order].insert(pidsets[pid_order].end(), tpair);
 
  281#ifdef HAVE_TPETRA_MMM_TIMINGS 
  282      TimeMonitor set_cpy(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMallSetRSBcpy")));
 
  285      for (
auto&& ps : pidsets) {
 
  287        RSB[jj] = Teuchos::arcp(
new pidgidpair_t[s], 0, s, 
true);
 
  288        std::copy(ps.begin(), ps.end(), RSB[jj]);
 
  289        ReverseSendSizes[jj] = s;
 
  295  Teuchos::Array<int> ReverseRecvSizes(NumSends, -1);
 
  296  Teuchos::Array<MPI_Request> rawBreq(ProcsFrom.size() + ProcsTo.size(), MPI_REQUEST_NULL);
 
  298  const int mpi_tag_base_ = 3;
 
  301  for (
int i = 0; i < ProcsTo.size(); ++i) {
 
  302    int Rec_Tag            = mpi_tag_base_ + ProcsTo[i];
 
  303    int* thisrecv          = (
int*)(&ReverseRecvSizes[i]);
 
  304    MPI_Request rawRequest = MPI_REQUEST_NULL;
 
  305    MPI_Irecv(
const_cast<int*
>(thisrecv),
 
  312    rawBreq[mpireq_idx++] = rawRequest;
 
  314  for (
int i = 0; i < ProcsFrom.size(); ++i) {
 
  315    int Send_Tag           = mpi_tag_base_ + MyPID;
 
  316    int* mysend            = (
int*)(&ReverseSendSizes[i]);
 
  317    MPI_Request rawRequest = MPI_REQUEST_NULL;
 
  325    rawBreq[mpireq_idx++] = rawRequest;
 
  327  Teuchos::Array<MPI_Status> rawBstatus(rawBreq.size());
 
  328#ifdef HAVE_TPETRA_DEBUG 
  331      MPI_Waitall(rawBreq.size(), rawBreq.getRawPtr(),
 
  332                  rawBstatus.getRawPtr());
 
  334#ifdef HAVE_TPETRA_DEBUG 
  336    errstr << MyPID << 
"sE1 reverseNeighborDiscovery Mpi_Waitall error on send ";
 
  338    std::cerr << errstr.str() << std::flush;
 
  342  int totalexportpairrecsize = 0;
 
  343  for (
size_t i = 0; i < NumSends; ++i) {
 
  344    totalexportpairrecsize += ReverseRecvSizes[i];
 
  345#ifdef HAVE_TPETRA_DEBUG 
  346    if (ReverseRecvSizes[i] < 0) {
 
  347      errstr << MyPID << 
"E4 reverseNeighborDiscovery: got < 0 for receive size " << ReverseRecvSizes[i] << std::endl;
 
  352  Teuchos::ArrayRCP<pidgidpair_t> AllReverseRecv = Teuchos::arcp(
new pidgidpair_t[totalexportpairrecsize], 0, totalexportpairrecsize, 
true);
 
  355  for (
int i = 0; i < ProcsTo.size(); ++i) {
 
  356    int recv_data_size     = ReverseRecvSizes[i] * 2;
 
  357    int recvData_MPI_Tag   = mpi_tag_base_ * 2 + ProcsTo[i];
 
  358    MPI_Request rawRequest = MPI_REQUEST_NULL;
 
  359    GO* rec_bptr           = (GO*)(&AllReverseRecv[offset]);
 
  360    offset += ReverseRecvSizes[i];
 
  363              ::Tpetra::Details::MpiTypeTraits<GO>::getType(rec_bptr[0]),
 
  368    rawBreq[mpireq_idx++] = rawRequest;
 
  370  for (
int ii = 0; ii < ProcsFrom.size(); ++ii) {
 
  371    GO* send_bptr          = (GO*)(RSB[ii].getRawPtr());
 
  372    MPI_Request rawSequest = MPI_REQUEST_NULL;
 
  373    int send_data_size     = ReverseSendSizes[ii] * 2;  
 
  374    int sendData_MPI_Tag   = mpi_tag_base_ * 2 + MyPID;
 
  377              ::Tpetra::Details::MpiTypeTraits<GO>::getType(send_bptr[0]),
 
  383    rawBreq[mpireq_idx++] = rawSequest;
 
  385#ifdef HAVE_TPETRA_DEBUG 
  388      MPI_Waitall(rawBreq.size(),
 
  390                  rawBstatus.getRawPtr());
 
  391#ifdef HAVE_TPETRA_DEBUG 
  393    errstr << MyPID << 
"E3.r reverseNeighborDiscovery Mpi_Waitall error on receive ";
 
  395    std::cerr << errstr.str() << std::flush;
 
  398  std::sort(AllReverseRecv.begin(), AllReverseRecv.end(), Tpetra::Import_Util::sort_PID_then_GID<GlobalOrdinal, GlobalOrdinal>);
 
  400  auto newEndOfPairs = std::unique(AllReverseRecv.begin(), AllReverseRecv.end());
 
  402  if (AllReverseRecv.begin() == newEndOfPairs) 
return;
 
  403  int ARRsize = std::distance(AllReverseRecv.begin(), newEndOfPairs);
 
  404  auto rPIDs  = Teuchos::arcp(
new int[ARRsize], 0, ARRsize, 
true);
 
  405  auto rLIDs  = Teuchos::arcp(
new LocalOrdinal[ARRsize], 0, ARRsize, 
true);
 
  408  for (
auto itr = AllReverseRecv.begin(); itr != newEndOfPairs; ++itr) {
 
  409    if ((
int)(itr->first) != MyPID) {
 
  410      rPIDs[tsize]     = (int)itr->first;
 
  411      LocalOrdinal lid = MyDomainMap->getLocalElement(itr->second);
 
  417  type3PIDs = rPIDs.persistingView(0, tsize);
 
  418  type3LIDs = rLIDs.persistingView(0, tsize);
 
  421    std::cerr << errstr.str() << std::flush;
 
  425    MPI_Abort(MPI_COMM_WORLD, -1);
 
  431template <
typename Scalar, 
typename Ordinal>
 
  433                    const Teuchos::ArrayView<Ordinal>& 
CRS_colind,
 
  434                    const Teuchos::ArrayView<Scalar>& 
CRS_vals) {
 
  445    if (start >= 
nnz) 
continue;
 
  455    while (
m < 
n) 
m = 
m * 3 + 1;
 
 
  479template <
typename Ordinal>
 
  480void sortCrsEntries(
const Teuchos::ArrayView<size_t>& 
CRS_rowptr,
 
  481                    const Teuchos::ArrayView<Ordinal>& 
CRS_colind) {
 
  483  Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type> 
CRS_vals;
 
  489template <
class RowOffsetsType, 
class ColumnIndicesType, 
class ValuesType>
 
  490class SortCrsEntries {
 
  492  typedef typename ColumnIndicesType::non_const_value_type ordinal_type;
 
  493  typedef typename ValuesType::non_const_value_type scalar_type;
 
  496  SortCrsEntries(
const RowOffsetsType& ptr,
 
  497                 const ColumnIndicesType& ind,
 
  498                 const ValuesType& val)
 
  502    static_assert(std::is_signed<ordinal_type>::value,
 
  504                  "column index -- that is, the type of each entry of ind " 
  505                  "-- must be signed in order for this functor to work.");
 
  508  KOKKOS_FUNCTION 
void operator()(
const size_t i)
 const {
 
  509    const size_t nnz                = ind_.extent(0);
 
  510    const size_t start              = ptr_(i);
 
  511    const bool permute_values_array = val_.extent(0) > 0;
 
  514      const size_t NumEntries = ptr_(i + 1) - 
start;
 
  516      const ordinal_type n = 
static_cast<ordinal_type
>(NumEntries);
 
  518      while (m < n) m = m * 3 + 1;
 
  522        ordinal_type max = n - m;
 
  523        for (ordinal_type j = 0; j < max; j++) {
 
  524          for (ordinal_type k = j; k >= 0; k -= m) {
 
  525            const size_t sk = 
start + k;
 
  526            if (ind_(sk + m) >= ind_(sk)) {
 
  529            if (permute_values_array) {
 
  530              const scalar_type dtemp = val_(sk + m);
 
  531              val_(sk + m)            = val_(sk);
 
  534            const ordinal_type itemp = ind_(sk + m);
 
  535            ind_(sk + m)             = ind_(sk);
 
  546                 const ColumnIndicesType& ind,
 
  547                 const ValuesType& val) {
 
  553    if (ptr.extent(0) == 0) {
 
  556    const size_t NumRows = ptr.extent(0) - 1;
 
  558    typedef size_t index_type;  
 
  559    typedef typename ValuesType::execution_space execution_space;
 
  562    typedef Kokkos::RangePolicy<execution_space, index_type> range_type;
 
  563    Kokkos::parallel_for(
"sortCrsEntries", range_type(0, NumRows),
 
  564                         SortCrsEntries(ptr, ind, val));
 
  569  ColumnIndicesType ind_;
 
  575template <
typename rowptr_array_type, 
typename colind_array_type, 
typename vals_array_type>
 
  577                    const colind_array_type& CRS_colind,
 
  578                    const vals_array_type& CRS_vals) {
 
  579  Impl::SortCrsEntries<rowptr_array_type, colind_array_type,
 
  580                       vals_array_type>::sortCrsEntries(CRS_rowptr, CRS_colind, CRS_vals);
 
  583template <
typename rowptr_array_type, 
typename colind_array_type>
 
  585                    const colind_array_type& CRS_colind) {
 
  587  typedef typename colind_array_type::execution_space execution_space;
 
  588  typedef Tpetra::Details::DefaultTypes::scalar_type scalar_type;
 
  589  typedef typename Kokkos::View<scalar_type*, execution_space> scalar_view_type;
 
  590  scalar_view_type CRS_vals;
 
  592                 scalar_view_type>(CRS_rowptr, CRS_colind, CRS_vals);
 
  596template <
typename Scalar, 
typename Ordinal>
 
  598                            const Teuchos::ArrayView<Ordinal>& 
CRS_colind,
 
  599                            const Teuchos::ArrayView<Scalar>& 
CRS_vals) {
 
 
  667template <
typename Ordinal>
 
  668void sortAndMergeCrsEntries(
const Teuchos::ArrayView<size_t>& 
CRS_rowptr,
 
  669                            const Teuchos::ArrayView<Ordinal>& 
CRS_colind) {
 
  670  Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type> 
CRS_vals;
 
  674template <
class rowptr_view_type, 
class colind_view_type, 
class vals_view_type>
 
  675void sortAndMergeCrsEntries(rowptr_view_type& CRS_rowptr,
 
  676                            colind_view_type& CRS_colind,
 
  677                            vals_view_type& CRS_vals) {
 
  678  using execution_space = 
typename vals_view_type::execution_space;
 
  680  auto CRS_rowptr_in = CRS_rowptr;
 
  681  auto CRS_colind_in = CRS_colind;
 
  682  auto CRS_vals_in   = CRS_vals;
 
  684  KokkosSparse::sort_and_merge_matrix<execution_space, rowptr_view_type,
 
  685                                      colind_view_type, vals_view_type>(CRS_rowptr_in, CRS_colind_in, CRS_vals_in,
 
  686                                                                        CRS_rowptr, CRS_colind, CRS_vals);
 
  689template <
typename LocalOrdinal, 
typename GlobalOrdinal, 
typename Node>
 
  690void lowCommunicationMakeColMapAndReindexSerial(
const Teuchos::ArrayView<const size_t>& rowptr,
 
  691                                                const Teuchos::ArrayView<LocalOrdinal>& colind_LID,
 
  692                                                const Teuchos::ArrayView<GlobalOrdinal>& colind_GID,
 
  694                                                const Teuchos::ArrayView<const int>& owningPIDs,
 
  695                                                Teuchos::Array<int>& remotePIDs,
 
  698  typedef LocalOrdinal LO;
 
  699  typedef GlobalOrdinal GO;
 
  702  const char prefix[] = 
"lowCommunicationMakeColMapAndReindexSerial: ";
 
  706  const map_type& domainMap = *domainMapRCP;
 
  711  const size_t numDomainElements = domainMap.getLocalNumElements();
 
  712  Teuchos::Array<bool> LocalGIDs;
 
  713  if (numDomainElements > 0) {
 
  714    LocalGIDs.resize(numDomainElements, 
false);  
 
  725  const size_t numMyRows = rowptr.size() - 1;
 
  726  const int hashsize     = std::max(
static_cast<int>(numMyRows), 100);
 
  729  Teuchos::Array<GO> RemoteGIDList;
 
  730  RemoteGIDList.reserve(hashsize);
 
  731  Teuchos::Array<int> PIDList;
 
  732  PIDList.reserve(hashsize);
 
  743  size_t NumLocalColGIDs = 0;
 
  744  LO NumRemoteColGIDs    = 0;
 
  745  for (
size_t i = 0; i < numMyRows; ++i) {
 
  746    for (
size_t j = rowptr[i]; j < rowptr[i + 1]; ++j) {
 
  747      const GO GID = colind_GID[j];
 
  749      const LO LID = domainMap.getLocalElement(GID);
 
  751        const bool alreadyFound = LocalGIDs[LID];
 
  753          LocalGIDs[LID] = 
true;  
 
  758        const LO hash_value = RemoteGIDs.get(GID);
 
  759        if (hash_value == -1) {  
 
  760          const int PID = owningPIDs[j];
 
  761          TEUCHOS_TEST_FOR_EXCEPTION(
 
  762              PID == -1, std::invalid_argument, prefix << 
"Cannot figure out if " 
  764          colind_LID[j] = 
static_cast<LO
>(numDomainElements + NumRemoteColGIDs);
 
  765          RemoteGIDs.add(GID, NumRemoteColGIDs);
 
  766          RemoteGIDList.push_back(GID);
 
  767          PIDList.push_back(PID);
 
  770          colind_LID[j] = 
static_cast<LO
>(numDomainElements + hash_value);
 
  778  if (domainMap.getComm()->getSize() == 1) {
 
  781    TEUCHOS_TEST_FOR_EXCEPTION(
 
  782        NumRemoteColGIDs != 0, std::runtime_error, prefix << 
"There is only one " 
  783                                                             "process in the domain Map's communicator, which means that there are no " 
  784                                                             "\"remote\" indices.  Nevertheless, some column indices are not in the " 
  786    if (
static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
 
  790      colMap = domainMapRCP;
 
  797  const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
 
  798  Teuchos::Array<GO> ColIndices;
 
  799  GO* RemoteColIndices = NULL;
 
  801    ColIndices.resize(numMyCols);
 
  802    if (NumLocalColGIDs != 
static_cast<size_t>(numMyCols)) {
 
  803      RemoteColIndices = &ColIndices[NumLocalColGIDs];  
 
  807  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
 
  808    RemoteColIndices[i] = RemoteGIDList[i];
 
  812  Teuchos::Array<LO> RemotePermuteIDs(NumRemoteColGIDs);
 
  813  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
 
  814    RemotePermuteIDs[i] = i;
 
  821                ColIndices.begin() + NumLocalColGIDs,
 
  822                RemotePermuteIDs.begin());
 
  828  remotePIDs = PIDList;
 
  837  LO StartCurrent = 0, StartNext = 1;
 
  838  while (StartNext < NumRemoteColGIDs) {
 
  839    if (PIDList[StartNext] == PIDList[StartNext - 1]) {
 
  842      Tpetra::sort2(ColIndices.begin() + NumLocalColGIDs + StartCurrent,
 
  843                    ColIndices.begin() + NumLocalColGIDs + StartNext,
 
  844                    RemotePermuteIDs.begin() + StartCurrent);
 
  845      StartCurrent = StartNext;
 
  849  Tpetra::sort2(ColIndices.begin() + NumLocalColGIDs + StartCurrent,
 
  850                ColIndices.begin() + NumLocalColGIDs + StartNext,
 
  851                RemotePermuteIDs.begin() + StartCurrent);
 
  854  Teuchos::Array<LO> ReverseRemotePermuteIDs(NumRemoteColGIDs);
 
  855  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
 
  856    ReverseRemotePermuteIDs[RemotePermuteIDs[i]] = i;
 
  860  bool use_local_permute = 
false;
 
  861  Teuchos::Array<LO> LocalPermuteIDs(numDomainElements);
 
  873  Teuchos::ArrayView<const GO> domainGlobalElements = domainMap.getLocalElementList();
 
  874  if (
static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
 
  875    if (NumLocalColGIDs > 0) {
 
  877      std::copy(domainGlobalElements.begin(), domainGlobalElements.end(),
 
  881    LO NumLocalAgain  = 0;
 
  882    use_local_permute = 
true;
 
  883    for (
size_t i = 0; i < numDomainElements; ++i) {
 
  885        LocalPermuteIDs[i]          = NumLocalAgain;
 
  886        ColIndices[NumLocalAgain++] = domainGlobalElements[i];
 
  889    TEUCHOS_TEST_FOR_EXCEPTION(
 
  890        static_cast<size_t>(NumLocalAgain) != NumLocalColGIDs,
 
  891        std::runtime_error, prefix << 
"Local ID count test failed.");
 
  895  const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid();
 
  896  colMap              = rcp(
new map_type(minus_one, ColIndices, domainMap.getIndexBase(),
 
  897                                         domainMap.getComm()));
 
  900  for (
size_t i = 0; i < numMyRows; ++i) {
 
  901    for (
size_t j = rowptr[i]; j < rowptr[i + 1]; ++j) {
 
  902      const LO ID = colind_LID[j];
 
  903      if (
static_cast<size_t>(ID) < numDomainElements) {
 
  904        if (use_local_permute) {
 
  905          colind_LID[j] = LocalPermuteIDs[colind_LID[j]];
 
  911        colind_LID[j] = NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j] - numDomainElements];
 
  917template <
typename LocalOrdinal, 
typename GlobalOrdinal, 
typename Node>
 
  919    const Teuchos::ArrayView<const size_t>& 
rowptr,
 
  920    const Teuchos::ArrayView<LocalOrdinal>& 
colind_LID,
 
  921    const Teuchos::ArrayView<GlobalOrdinal>& 
colind_GID,
 
  923    const Teuchos::ArrayView<const int>& 
owningPIDs,
 
  924    Teuchos::Array<int>& remotePIDs,
 
  931  const char prefix[] = 
"lowCommunicationMakeColMapAndReindex: ";
 
  933  typedef typename Node::device_type DT;
 
  934  using execution_space = 
typename DT::execution_space;
 
  935  execution_space 
exec;
 
  936  using team_policy = Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
 
  937  typedef typename map_type::local_map_type local_map_type;
 
  972  Kokkos::parallel_reduce(
 
  974        const int i                 = 
member.league_rank();
 
  978        Kokkos::parallel_reduce(
 
  987                  LocalGIDs_view[LID] = true;
 
  993                if (
outcome.success() && PID == -1) {
 
  994                  Kokkos::abort(
"Cannot figure out if ID is owned.\n");
 
 1013  Kokkos::parallel_scan(
 
 1016          RemoteGIDList_view[update] = RemoteGIDs_view_map.key_at(i);
 
 1017          PIDList_view[update]       = RemoteGIDs_view_map.value_at(i);
 
 1026  if (domainMap.getComm()->getSize() == 1) {
 
 1031                                                             "process in the domain Map's communicator, which means that there are no " 
 1032                                                             "\"remote\" indices.  Nevertheless, some column indices are not in the " 
 1041      auto localColMap = colMap->getLocalMap();
 
 1042      Kokkos::parallel_for(
 
 1059      Kokkos::parallel_for(
 
 1067    Kokkos::parallel_reduce(
 
 1097    exec.fence(
"fence before setting PIDList");
 
 1139      Kokkos::parallel_for(
 
 1147    Kokkos::parallel_scan(
 
 1160        std::runtime_error, 
prefix << 
"Local ID count test failed.");
 
 1164  const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid();
 
 1167                            domainMap.getComm()));
 
 1170  auto localColMap = colMap->getLocalMap();
 
 1171  Kokkos::parallel_for(
 
 
 1181template <
typename LocalOrdinal, 
typename GlobalOrdinal, 
typename Node>
 
 1182void lowCommunicationMakeColMapAndReindex(
 
 1183    const Kokkos::View<size_t*, typename Node::device_type> 
rowptr_view,
 
 1184    const Kokkos::View<LocalOrdinal*, typename Node::device_type> 
colind_LID_view,
 
 1185    const Kokkos::View<GlobalOrdinal*, typename Node::device_type> 
colind_GID_view,
 
 1188    Teuchos::Array<int>& remotePIDs,
 
 1195  const char prefix[] = 
"lowCommunicationMakeColMapAndReindex: ";
 
 1197  typedef typename Node::device_type DT;
 
 1198  using execution_space = 
typename DT::execution_space;
 
 1199  execution_space 
exec;
 
 1200  using team_policy = Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
 
 1201  typedef typename map_type::local_map_type local_map_type;
 
 1223  Kokkos::parallel_reduce(
 
 1225        const int i                 = 
member.league_rank();
 
 1229        Kokkos::parallel_reduce(
 
 1238                  LocalGIDs_view[LID] = true;
 
 1242                const int PID = owningPIDs_view[j];
 
 1243                auto outcome  = RemoteGIDs_view_map.insert(GID, PID);
 
 1244                if (outcome.success() && PID == -1) {
 
 1245                  Kokkos::abort(
"Cannot figure out if ID is owned.\n");
 
 1249            NumLocalColGIDs_temp);
 
 1250        if (member.team_rank() == 0) update += NumLocalColGIDs_temp;
 
 1254  LO NumRemoteColGIDs = RemoteGIDs_view_map.size();
 
 1256  Kokkos::View<int*, DT> PIDList_view(
"PIDList_d", NumRemoteColGIDs);
 
 1258  Kokkos::View<GO*, DT> RemoteGIDList_view(
"RemoteGIDList", NumRemoteColGIDs);
 
 1259  auto RemoteGIDList_host = Kokkos::create_mirror_view(RemoteGIDList_view);
 
 1263  Kokkos::parallel_scan(
 
 1264      Kokkos::RangePolicy<execution_space>(0, RemoteGIDs_view_map.capacity()), KOKKOS_LAMBDA(
const int i, GO& update, 
const bool final) {
 
 1265        if (
final && RemoteGIDs_view_map.valid_at(i)) {
 
 1266          RemoteGIDList_view[update] = RemoteGIDs_view_map.key_at(i);
 
 1267          PIDList_view[update]       = RemoteGIDs_view_map.value_at(i);
 
 1269        if (RemoteGIDs_view_map.valid_at(i)) {
 
 1276  if (domainMap.getComm()->getSize() == 1) {
 
 1279    TEUCHOS_TEST_FOR_EXCEPTION(
 
 1280        NumRemoteColGIDs != 0, std::runtime_error, prefix << 
"There is only one " 
 1281                                                             "process in the domain Map's communicator, which means that there are no " 
 1282                                                             "\"remote\" indices.  Nevertheless, some column indices are not in the " 
 1284    if (
static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
 
 1288      colMap = domainMapRCP;
 
 1291      auto localColMap = colMap->getLocalMap();
 
 1292      Kokkos::parallel_for(
 
 1293          Kokkos::RangePolicy<execution_space>(0, colind_GID_view.size()), KOKKOS_LAMBDA(
const int i) {
 
 1294            colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
 
 1302  const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
 
 1303  Kokkos::View<GO*, DT> ColIndices_view(
"ColIndices", numMyCols);
 
 1306  if (NumRemoteColGIDs > 0) {
 
 1307    if (NumLocalColGIDs != 
static_cast<size_t>(numMyCols)) {
 
 1308      Kokkos::parallel_for(
 
 1309          Kokkos::RangePolicy<execution_space>(0, NumRemoteColGIDs), KOKKOS_LAMBDA(
const int i) {
 
 1310            ColIndices_view[NumLocalColGIDs + i] = RemoteGIDList_view[i];
 
 1316    Kokkos::parallel_reduce(
 
 1317        Kokkos::RangePolicy<execution_space>(0, PIDList_view.size()), KOKKOS_LAMBDA(
const int i, 
int& max) {
 
 1318          if (max < PIDList_view[i]) max = PIDList_view[i];
 
 1320        Kokkos::Max<int>(PID_max));
 
 1322    using KeyViewTypePID = 
decltype(PIDList_view);
 
 1323    using BinSortOpPID   = Kokkos::BinOp1D<KeyViewTypePID>;
 
 1326    auto ColIndices_subview = Kokkos::subview(ColIndices_view, Kokkos::make_pair(NumLocalColGIDs, ColIndices_view.size()));
 
 1329    BinSortOpPID binOp2(PID_max + 1, 0, PID_max);
 
 1334    Kokkos::BinSort<KeyViewTypePID, BinSortOpPID> bin_sort2(PIDList_view, 0, PIDList_view.size(), binOp2, 
false);
 
 1335    bin_sort2.create_permute_vector(exec);
 
 1336    bin_sort2.sort(exec, PIDList_view);
 
 1337    bin_sort2.sort(exec, ColIndices_subview);
 
 1341    Teuchos::Array<int> PIDList(NumRemoteColGIDs);
 
 1342    Kokkos::View<int*, Kokkos::HostSpace> PIDList_host(PIDList.data(), PIDList.size());
 
 1343    Kokkos::deep_copy(exec, PIDList_host, PIDList_view);
 
 1346    remotePIDs = PIDList;
 
 1353    LO StartCurrent = 0, StartNext = 1;
 
 1354    while (StartNext < NumRemoteColGIDs) {
 
 1355      if (PIDList_host[StartNext] == PIDList_host[StartNext - 1]) {
 
 1358        Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
 
 1359        StartCurrent = StartNext;
 
 1364    Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
 
 1379  if (
static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
 
 1380    if (NumLocalColGIDs > 0) {
 
 1382      Kokkos::parallel_for(
 
 1383          Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(
const int i) {
 
 1384            ColIndices_view[i] = domainMap_local.getGlobalElement(i);
 
 1389    LO NumLocalAgain = 0;
 
 1390    Kokkos::parallel_scan(
 
 1391        Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(
const int i, LO& update, 
const bool final) {
 
 1392          if (
final && LocalGIDs_view[i]) {
 
 1393            ColIndices_view[update] = domainMap_local.getGlobalElement(i);
 
 1395          if (LocalGIDs_view[i]) {
 
 1401    TEUCHOS_TEST_FOR_EXCEPTION(
 
 1402        static_cast<size_t>(NumLocalAgain) != NumLocalColGIDs,
 
 1403        std::runtime_error, prefix << 
"Local ID count test failed.");
 
 1407  const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid();
 
 1409  colMap = rcp(
new map_type(minus_one, ColIndices_view, domainMap.getIndexBase(),
 
 1410                            domainMap.getComm()));
 
 1413  auto localColMap = colMap->getLocalMap();
 
 1414  Kokkos::parallel_for(
 
 1415      Kokkos::RangePolicy<execution_space>(0, colind_GID_view.size()), KOKKOS_LAMBDA(
const int i) {
 
 1416        colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
 
 1433template <
typename LocalOrdinal, 
typename GlobalOrdinal, 
typename Node>
 
 1449#ifdef HAVE_TPETRA_DEBUG 
 1460  Teuchos::ArrayRCP<int> 
pids    = 
temp.getDataNonConst();
 
 1463    TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, 
"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
 
 1467    TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, 
"Tpetra::Import_Util::getTwoTransferOwnershipVector this option not yet implemented");
 
 1469    TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, 
"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
 
 
Declaration of the Tpetra::CrsMatrix class.
 
Add specializations of Teuchos::Details::MpiTypeTraits for Kokkos::complex<float> and Kokkos::complex...
 
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
 
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
 
void lowCommunicationMakeColMapAndReindex(const Teuchos::ArrayView< const size_t > &rowptr, const Teuchos::ArrayView< LocalOrdinal > &colind_LID, const Teuchos::ArrayView< GlobalOrdinal > &colind_GID, const Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMapRCP, const Teuchos::ArrayView< const int > &owningPIDs, Teuchos::Array< int > &remotePIDs, Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &colMap)
lowCommunicationMakeColMapAndReindex
 
void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer< LocalOrdinal, GlobalOrdinal, Node > &transferThatDefinesOwnership, bool useReverseModeForOwnership, const ::Tpetra::Details::Transfer< LocalOrdinal, GlobalOrdinal, Node > &transferForMigratingData, bool useReverseModeForMigration, Tpetra::Vector< int, LocalOrdinal, GlobalOrdinal, Node > &owningPIDs)
Generates an list of owning PIDs based on two transfer (aka import/export objects) Let: OwningMap = u...
 
void sortAndMergeCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort and merge the entries of the (raw CSR) matrix by column index within each row.
 
void sortCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort the entries of the (raw CSR) matrix by column index within each row.
 
void getPids(const Tpetra::Import< LocalOrdinal, GlobalOrdinal, Node > &Importer, Teuchos::Array< int > &pids, bool use_minus_one_for_local)
Like getPidGidPairs, but just gets the PIDs, ordered by the column Map.
 
Stand-alone utility functions and macros.
 
Struct that holds views of the contents of a CrsMatrix.
 
Common base class of Import and Export.
 
Teuchos::ArrayView< const LO > getExportLIDs() const
List of entries in the source Map that will be sent to other processes.
 
size_t getNumExportIDs() const
Number of entries that must be sent by the calling process to other processes.
 
Teuchos::ArrayView< const int > getExportPIDs() const
List of processes to which entries will be sent.
 
void start()
Start the deep_copy counter.
 
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
 
Namespace Tpetra contains the class and methods constituting the Tpetra library.
 
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
 
size_t global_size_t
Global size_t object.
 
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3, const bool stableSort=false)
Sort the first array, and apply the same permutation to the second and third arrays.
 
@ REPLACE
Replace existing values with new values.