Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Import_Util2.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_IMPORT_UTIL2_HPP
11#define TPETRA_IMPORT_UTIL2_HPP
12
17
18#include "Tpetra_ConfigDefs.hpp"
19#include "Tpetra_Import.hpp"
20#include "Tpetra_HashTable.hpp"
21#include "Tpetra_Map.hpp"
22#include "Tpetra_Util.hpp"
23#include "Tpetra_Distributor.hpp"
25#include "Tpetra_Vector.hpp"
26#include "Kokkos_DualView.hpp"
27#include "KokkosSparse_SortCrs.hpp"
28#include <Teuchos_Array.hpp>
31#include <Kokkos_Core.hpp>
32#include <Kokkos_UnorderedMap.hpp>
33#include <Kokkos_Sort.hpp>
34#include <utility>
35#include <set>
36
38
39namespace {
40
41struct LocalRemoteCount {
42 size_t numLocalColGIDs;
43 size_t numRemoteColGIDs;
44
45 KOKKOS_INLINE_FUNCTION
46 LocalRemoteCount()
47 : numLocalColGIDs(0)
48 , numRemoteColGIDs(0) {}
49
50 KOKKOS_INLINE_FUNCTION
51 LocalRemoteCount& operator+=(const LocalRemoteCount& src) {
52 numLocalColGIDs += src.numLocalColGIDs;
53 numRemoteColGIDs += src.numRemoteColGIDs;
54 return *this;
55 }
56};
57
58} // namespace
59
60namespace Kokkos {
61
62template <>
63struct reduction_identity<LocalRemoteCount> {
64 KOKKOS_FORCEINLINE_FUNCTION static LocalRemoteCount sum() {
65 return LocalRemoteCount();
66 }
67};
68} // namespace Kokkos
69
70namespace Tpetra {
71namespace Import_Util {
72
75template <typename Scalar, typename Ordinal>
76void sortCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
77 const Teuchos::ArrayView<Ordinal>& CRS_colind,
78 const Teuchos::ArrayView<Scalar>& CRS_vals);
79
80template <typename Ordinal>
81void sortCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
82 const Teuchos::ArrayView<Ordinal>& CRS_colind);
83
84template <typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
85void sortCrsEntries(const rowptr_array_type& CRS_rowptr,
86 const colind_array_type& CRS_colind,
87 const vals_array_type& CRS_vals);
88
89template <typename rowptr_array_type, typename colind_array_type>
90void sortCrsEntries(const rowptr_array_type& CRS_rowptr,
91 const colind_array_type& CRS_colind);
92
93template <typename local_crs_matrix>
94void sortCrsMatrix(local_crs_matrix& lclMatrix);
95
96template <typename local_crs_graph>
97void sortCrsGraph(local_crs_graph& lclGraph);
98
103template <typename Scalar, typename Ordinal>
104void sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
105 const Teuchos::ArrayView<Ordinal>& CRS_colind,
106 const Teuchos::ArrayView<Scalar>& CRS_vals);
107
108template <typename Ordinal>
109void sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
110 const Teuchos::ArrayView<Ordinal>& CRS_colind);
111
112template <class rowptr_view_type, class colind_view_type, class vals_view_type>
113void sortAndMergeCrsEntries(rowptr_view_type& CRS_rowptr,
114 colind_view_type& CRS_colind,
115 vals_view_type& CRS_vals,
116 const ::KokkosSparse::SortAlgorithm option);
117
133template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
135 const Teuchos::ArrayView<const size_t>& rowptr,
136 const Teuchos::ArrayView<LocalOrdinal>& colind_LID,
137 const Teuchos::ArrayView<GlobalOrdinal>& colind_GID,
138 const Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& domainMapRCP,
139 const Teuchos::ArrayView<const int>& owningPIDs,
140 Teuchos::Array<int>& remotePIDs,
141 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap,
142 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
143
148template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
150 const Kokkos::View<const size_t*, typename Node::device_type> rowptr_view,
151 const Kokkos::View<LocalOrdinal*, typename Node::device_type> colind_LID_view,
152 const Kokkos::View<GlobalOrdinal*, typename Node::device_type> colind_GID_view,
153 const Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& domainMapRCP,
154 const Kokkos::View<const int*, typename Node::device_type> owningPIDs_view,
155 Teuchos::Array<int>& remotePIDs,
156 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap,
157 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
158
172template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
173void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
174 bool useReverseModeForOwnership,
175 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferForMigratingData,
176 bool useReverseModeForMigration,
178
179} // namespace Import_Util
180} // namespace Tpetra
181
182//
183// Implementations
184//
185
186namespace Tpetra {
187namespace Import_Util {
188
189template <typename PID, typename GlobalOrdinal>
190bool sort_PID_then_GID(const std::pair<PID, GlobalOrdinal>& a,
191 const std::pair<PID, GlobalOrdinal>& b) {
192 if (a.first != b.first)
193 return (a.first < b.first);
194 return (a.second < b.second);
195}
196
197template <typename PID,
198 typename GlobalOrdinal,
199 typename LocalOrdinal>
200bool sort_PID_then_pair_GID_LID(const std::pair<PID, std::pair<GlobalOrdinal, LocalOrdinal>>& a,
201 const std::pair<PID, std::pair<GlobalOrdinal, LocalOrdinal>>& b) {
202 if (a.first != b.first)
203 return a.first < b.first;
204 else
205 return (a.second.first < b.second.first);
206}
207
208template <typename Scalar,
209 typename LocalOrdinal,
210 typename GlobalOrdinal,
211 typename Node>
212void reverseNeighborDiscovery(const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& SourceMatrix,
213 const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type& rowptr,
214 const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type& colind,
216 Teuchos::RCP<const Tpetra::Import<LocalOrdinal, GlobalOrdinal, Node>> MyImporter,
217 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> MyDomainMap,
218 Teuchos::ArrayRCP<int>& type3PIDs,
219 Teuchos::ArrayRCP<LocalOrdinal>& type3LIDs,
220 Teuchos::RCP<const Teuchos::Comm<int>>& rcomm) {
221#ifdef HAVE_TPETRACORE_MPI
222 using ::Tpetra::Details::Behavior;
223 typedef LocalOrdinal LO;
224 typedef GlobalOrdinal GO;
225 typedef std::pair<GO, GO> pidgidpair_t;
226 using Teuchos::RCP;
227 const std::string prefix{" Import_Util2::ReverseND:: "};
228 const std::string label("IU2::Neighbor");
229
230 // There can be no neighbor discovery if you don't have an importer
231 if (MyImporter.is_null()) return;
232
233 std::ostringstream errstr;
234 bool error = false;
235 auto const comm = MyDomainMap->getComm();
236
237 MPI_Comm rawComm = getRawMpiComm(*comm);
238 const int MyPID = rcomm->getRank();
239
240 // Things related to messages I am sending in forward mode (RowTransfer)
241 // *** Note: this will be incorrect for transferAndFillComplete if it is in reverse mode. FIXME cbl.
242 auto ExportPIDs = RowTransfer.getExportPIDs();
243 auto ExportLIDs = RowTransfer.getExportLIDs();
244 auto NumExportLIDs = RowTransfer.getNumExportIDs();
245
246 Distributor& Distor = MyImporter->getDistributor();
247 const size_t NumRecvs = Distor.getNumReceives();
248 const size_t NumSends = Distor.getNumSends();
249 auto RemoteLIDs = MyImporter->getRemoteLIDs();
250 auto const ProcsFrom = Distor.getProcsFrom();
251 auto const ProcsTo = Distor.getProcsTo();
252
253 auto LengthsFrom = Distor.getLengthsFrom();
254 auto MyColMap = SourceMatrix.getColMap();
255 const size_t numCols = MyColMap->getLocalNumElements();
256 RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> target = MyImporter->getTargetMap();
257
258 // Get the owning pids in a special way,
259 // s.t. ProcsFrom[RemotePIDs[i]] is the proc that owns RemoteLIDs[j]....
260 Teuchos::Array<int> RemotePIDOrder(numCols, -1);
261
262 // For each remote ID, record index into ProcsFrom, who owns it.
263 for (size_t i = 0, j = 0; i < NumRecvs; ++i) {
264 for (size_t k = 0; k < LengthsFrom[i]; ++k) {
265 const int pid = ProcsFrom[i];
266 if (pid != MyPID) {
267 RemotePIDOrder[RemoteLIDs[j]] = i;
268 }
269 j++;
270 }
271 }
272
273 // Step One: Start tacking the (GID,PID) pairs on the std sets
274 //
275 // For each index in ProcsFrom, we will insert into a set of (PID,
276 // GID) pairs, in order to build a list of such pairs for each of
277 // those processes. Since this is building a reverse, we will send
278 // to these processes.
279 Teuchos::Array<int> ReverseSendSizes(NumRecvs, 0);
280 // do this as C array to avoid Teuchos::Array value initialization of all reserved memory
281 Teuchos::Array<Teuchos::ArrayRCP<pidgidpair_t>> RSB(NumRecvs);
282
283 {
284 Tpetra::Details::ProfilingRegion set_all("Import_Util2::ReverseND::isMMallSetRSB");
285
286 // 25 Jul 2018: CBL
287 // todo:std::unordered_set (hash table),
288 // with an adequate prereservation ("bucket count").
289 // An onordered_set has to have a custom hasher for pid/gid pair
290 // However, when pidsets is copied to RSB, it will be in key
291 // order _not_ in pid,gid order. (unlike std::set).
292 // Impliment this with a reserve, and time BOTH building pidsets
293 // _and_ the sort after the receive. Even if unordered_set saves
294 // time, if it causes the sort to be longer, it's not a win.
295
296 Teuchos::Array<std::set<pidgidpair_t>> pidsets(NumRecvs);
297 {
298 Tpetra::Details::ProfilingRegion set_insert("Import_Util2::ReverseND::isMMallSetRSBinsert");
299 for (size_t i = 0; i < NumExportLIDs; i++) {
300 LO lid = ExportLIDs[i];
301 GO exp_pid = ExportPIDs[i];
302 for (auto j = rowptr[lid]; j < rowptr[lid + 1]; j++) {
303 int pid_order = RemotePIDOrder[colind[j]];
304 if (pid_order != -1) {
305 GO gid = MyColMap->getGlobalElement(colind[j]); // Epetra SM.GCID46 =>sm->graph-> {colmap(colind)}
306 auto tpair = pidgidpair_t(exp_pid, gid);
307 pidsets[pid_order].insert(pidsets[pid_order].end(), tpair);
308 }
309 }
310 }
311 }
312
313 {
314 Tpetra::Details::ProfilingRegion set_cpy("Import_Util2::ReverseND::isMMallSetRSBcpy");
315 int jj = 0;
316 for (auto&& ps : pidsets) {
317 auto s = ps.size();
318 RSB[jj] = Teuchos::arcp(new pidgidpair_t[s], 0, s, true);
319 std::copy(ps.begin(), ps.end(), RSB[jj]);
320 ReverseSendSizes[jj] = s;
321 ++jj;
322 }
323 }
324 } // end of set based packing.
325
326 Teuchos::Array<int> ReverseRecvSizes(NumSends, -1);
327 Teuchos::Array<MPI_Request> rawBreq(ProcsFrom.size() + ProcsTo.size(), MPI_REQUEST_NULL);
328 // 25 Jul 2018: MPI_TAG_UB is the largest tag value; could be < 32768.
329 const int mpi_tag_base_ = 3;
330
331 int mpireq_idx = 0;
332 for (int i = 0; i < ProcsTo.size(); ++i) {
333 int Rec_Tag = mpi_tag_base_ + ProcsTo[i];
334 int* thisrecv = (int*)(&ReverseRecvSizes[i]);
335 MPI_Request rawRequest = MPI_REQUEST_NULL;
336 MPI_Irecv(const_cast<int*>(thisrecv),
337 1,
338 MPI_INT,
339 ProcsTo[i],
340 Rec_Tag,
341 rawComm,
342 &rawRequest);
343 rawBreq[mpireq_idx++] = rawRequest;
344 }
345 for (int i = 0; i < ProcsFrom.size(); ++i) {
346 int Send_Tag = mpi_tag_base_ + MyPID;
347 int* mysend = (int*)(&ReverseSendSizes[i]);
348 MPI_Request rawRequest = MPI_REQUEST_NULL;
349 MPI_Isend(mysend,
350 1,
351 MPI_INT,
352 ProcsFrom[i],
353 Send_Tag,
354 rawComm,
355 &rawRequest);
356 rawBreq[mpireq_idx++] = rawRequest;
357 }
358 Teuchos::Array<MPI_Status> rawBstatus(rawBreq.size());
359#ifdef HAVE_TPETRA_DEBUG
360 const int err1 =
361#endif
362 MPI_Waitall(rawBreq.size(), rawBreq.getRawPtr(),
363 rawBstatus.getRawPtr());
364
365#ifdef HAVE_TPETRA_DEBUG
366 if (err1) {
367 errstr << MyPID << "sE1 reverseNeighborDiscovery Mpi_Waitall error on send ";
368 error = true;
369 std::cerr << errstr.str() << std::flush;
370 }
371#endif
372
373 int totalexportpairrecsize = 0;
374 for (size_t i = 0; i < NumSends; ++i) {
375 totalexportpairrecsize += ReverseRecvSizes[i];
376#ifdef HAVE_TPETRA_DEBUG
377 if (ReverseRecvSizes[i] < 0) {
378 errstr << MyPID << "E4 reverseNeighborDiscovery: got < 0 for receive size " << ReverseRecvSizes[i] << std::endl;
379 error = true;
380 }
381#endif
382 }
383 Teuchos::ArrayRCP<pidgidpair_t> AllReverseRecv = Teuchos::arcp(new pidgidpair_t[totalexportpairrecsize], 0, totalexportpairrecsize, true);
384 int offset = 0;
385 mpireq_idx = 0;
386 for (int i = 0; i < ProcsTo.size(); ++i) {
387 int recv_data_size = ReverseRecvSizes[i] * 2;
388 int recvData_MPI_Tag = mpi_tag_base_ * 2 + ProcsTo[i];
389 MPI_Request rawRequest = MPI_REQUEST_NULL;
390 GO* rec_bptr = (GO*)(&AllReverseRecv[offset]);
391 offset += ReverseRecvSizes[i];
392 MPI_Irecv(rec_bptr,
393 recv_data_size,
394 ::Tpetra::Details::MpiTypeTraits<GO>::getType(rec_bptr[0]),
395 ProcsTo[i],
396 recvData_MPI_Tag,
397 rawComm,
398 &rawRequest);
399 rawBreq[mpireq_idx++] = rawRequest;
400 }
401 for (int ii = 0; ii < ProcsFrom.size(); ++ii) {
402 GO* send_bptr = (GO*)(RSB[ii].getRawPtr());
403 MPI_Request rawSequest = MPI_REQUEST_NULL;
404 int send_data_size = ReverseSendSizes[ii] * 2; // 2 == count of pair
405 int sendData_MPI_Tag = mpi_tag_base_ * 2 + MyPID;
406 MPI_Isend(send_bptr,
407 send_data_size,
408 ::Tpetra::Details::MpiTypeTraits<GO>::getType(send_bptr[0]),
409 ProcsFrom[ii],
410 sendData_MPI_Tag,
411 rawComm,
412 &rawSequest);
413
414 rawBreq[mpireq_idx++] = rawSequest;
415 }
416#ifdef HAVE_TPETRA_DEBUG
417 const int err =
418#endif
419 MPI_Waitall(rawBreq.size(),
420 rawBreq.getRawPtr(),
421 rawBstatus.getRawPtr());
422#ifdef HAVE_TPETRA_DEBUG
423 if (err) {
424 errstr << MyPID << "E3.r reverseNeighborDiscovery Mpi_Waitall error on receive ";
425 error = true;
426 std::cerr << errstr.str() << std::flush;
427 }
428#endif
429 std::sort(AllReverseRecv.begin(), AllReverseRecv.end(), Tpetra::Import_Util::sort_PID_then_GID<GlobalOrdinal, GlobalOrdinal>);
430
431 auto newEndOfPairs = std::unique(AllReverseRecv.begin(), AllReverseRecv.end());
432 // don't resize to remove non-unique, just use the end-of-unique iterator
433 if (AllReverseRecv.begin() == newEndOfPairs) return;
434 int ARRsize = std::distance(AllReverseRecv.begin(), newEndOfPairs);
435 auto rPIDs = Teuchos::arcp(new int[ARRsize], 0, ARRsize, true);
436 auto rLIDs = Teuchos::arcp(new LocalOrdinal[ARRsize], 0, ARRsize, true);
437
438 int tsize = 0;
439 for (auto itr = AllReverseRecv.begin(); itr != newEndOfPairs; ++itr) {
440 if ((int)(itr->first) != MyPID) {
441 rPIDs[tsize] = (int)itr->first;
442 LocalOrdinal lid = MyDomainMap->getLocalElement(itr->second);
443 rLIDs[tsize] = lid;
444 tsize++;
445 }
446 }
447
448 type3PIDs = rPIDs.persistingView(0, tsize);
449 type3LIDs = rLIDs.persistingView(0, tsize);
450
451 if (error) {
452 std::cerr << errstr.str() << std::flush;
453 comm->barrier();
454 comm->barrier();
455 comm->barrier();
456 MPI_Abort(MPI_COMM_WORLD, -1);
457 }
458#endif
459}
460
461// Note: This should get merged with the other Tpetra sort routines eventually.
462template <typename Scalar, typename Ordinal>
463void sortCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
464 const Teuchos::ArrayView<Ordinal>& CRS_colind,
465 const Teuchos::ArrayView<Scalar>& CRS_vals) {
466 auto rowptr_k = Kokkos::View<size_t*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(CRS_rowptr.data(), CRS_rowptr.size());
467 auto colind_k = Kokkos::View<Ordinal*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(CRS_colind.data(), CRS_colind.size());
468 auto vals_k = Kokkos::View<Scalar*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(CRS_vals.data(), CRS_vals.size());
470}
471
472template <typename Ordinal>
473void sortCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
474 const Teuchos::ArrayView<Ordinal>& CRS_colind) {
475 auto rowptr_k = Kokkos::View<size_t*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(CRS_rowptr.data(), CRS_rowptr.size());
476 auto colind_k = Kokkos::View<Ordinal*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(CRS_colind.data(), CRS_colind.size());
477 sortCrsEntries(rowptr_k, colind_k);
478}
479
480template <typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
481void sortCrsEntries(const rowptr_array_type& CRS_rowptr,
482 const colind_array_type& CRS_colind,
483 const vals_array_type& CRS_vals) {
484 KokkosSparse::SortAlgorithm option = KokkosSparse::SortAlgorithm::DEFAULT;
485 static constexpr bool is_cpu = std::is_same_v<typename rowptr_array_type::memory_space, Kokkos::HostSpace>;
486 if constexpr (is_cpu)
487 option = KokkosSparse::SortAlgorithm::SHELL;
488 KokkosSparse::sort_crs_matrix(CRS_rowptr, CRS_colind, CRS_vals,
489 KokkosKernels::ArithTraits<typename colind_array_type::non_const_value_type>::max(),
490 option);
491}
492
493template <typename local_crs_matrix>
494void sortCrsMatrix(local_crs_matrix& lclMatrix) {
495 KokkosSparse::SortAlgorithm option = KokkosSparse::SortAlgorithm::DEFAULT;
496 static constexpr bool is_cpu = std::is_same_v<typename local_crs_matrix::device_type::memory_space, Kokkos::HostSpace>;
497 if constexpr (is_cpu)
498 option = KokkosSparse::SortAlgorithm::SHELL;
499 KokkosSparse::sort_crs_matrix(lclMatrix, option);
500}
501
502template <typename local_crs_graph>
503void sortCrsGraph(local_crs_graph& lclGraph) {
504 KokkosSparse::SortAlgorithm option = KokkosSparse::SortAlgorithm::DEFAULT;
505 static constexpr bool is_cpu = std::is_same_v<typename local_crs_graph::device_type::memory_space, Kokkos::HostSpace>;
506 if constexpr (is_cpu)
507 option = KokkosSparse::SortAlgorithm::SHELL;
508 KokkosSparse::sort_crs_graph(lclGraph, KokkosKernels::ArithTraits<typename local_crs_graph::entries_type::non_const_value_type>::max(), option);
509}
510
511template <typename rowptr_array_type, typename colind_array_type>
512void sortCrsEntries(const rowptr_array_type& CRS_rowptr,
513 const colind_array_type& CRS_colind) {
514 KokkosSparse::SortAlgorithm option = KokkosSparse::SortAlgorithm::DEFAULT;
515 static constexpr bool is_cpu = std::is_same_v<typename rowptr_array_type::memory_space, Kokkos::HostSpace>;
516 if constexpr (is_cpu)
517 option = KokkosSparse::SortAlgorithm::SHELL;
518 KokkosSparse::sort_crs_graph(CRS_rowptr, CRS_colind,
519 KokkosKernels::ArithTraits<typename colind_array_type::non_const_value_type>::max(),
520 option);
521}
522
523// Note: This should get merged with the other Tpetra sort routines eventually.
524template <typename Scalar, typename Ordinal>
525void sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
526 const Teuchos::ArrayView<Ordinal>& CRS_colind,
527 const Teuchos::ArrayView<Scalar>& CRS_vals) {
528 // For each row, sort column entries from smallest to largest,
529 // merging column ids that are identify by adding values. Use shell
530 // sort. Stable sort so it is fast if indices are already sorted.
531 // Code copied from Epetra_CrsMatrix::SortEntries()
532
533 if (CRS_rowptr.size() == 0) {
534 return; // no rows, so nothing to sort
535 }
536 const size_t NumRows = CRS_rowptr.size() - 1;
537 const size_t nnz = CRS_colind.size();
538 size_t new_curr = CRS_rowptr[0];
539 size_t old_curr = CRS_rowptr[0];
540
541 const bool permute_values_array = CRS_vals.size() > 0;
542
543 for (size_t i = 0; i < NumRows; i++) {
544 const size_t old_rowptr_i = CRS_rowptr[i];
546 if (old_rowptr_i >= nnz) continue;
547
548 size_t NumEntries = CRS_rowptr[i + 1] - old_rowptr_i;
549 Teuchos::ArrayRCP<Scalar> locValues;
551 locValues = Teuchos::arcp<Scalar>(&CRS_vals[old_rowptr_i], 0, NumEntries, false);
552 Teuchos::ArrayRCP<Ordinal> locIndices(&CRS_colind[old_rowptr_i], 0, NumEntries, false);
553
554 // Sort phase
556 Ordinal m = n / 2;
557
558 while (m > 0) {
559 Ordinal max = n - m;
560 for (Ordinal j = 0; j < max; j++) {
561 for (Ordinal k = j; k >= 0; k -= m) {
562 if (locIndices[k + m] >= locIndices[k])
563 break;
565 Scalar dtemp = locValues[k + m];
566 locValues[k + m] = locValues[k];
567 locValues[k] = dtemp;
568 }
570 locIndices[k + m] = locIndices[k];
571 locIndices[k] = itemp;
572 }
573 }
574 m = m / 2;
575 }
576
577 // Merge & shrink
578 for (size_t j = old_rowptr_i; j < CRS_rowptr[i + 1]; j++) {
579 if (j > old_rowptr_i && CRS_colind[j] == CRS_colind[new_curr - 1]) {
581 } else if (new_curr == j) {
582 new_curr++;
583 } else {
586 new_curr++;
587 }
588 }
590 }
591
593}
594
595template <typename Ordinal>
596void sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
597 const Teuchos::ArrayView<Ordinal>& CRS_colind) {
598 Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type> CRS_vals;
600}
601
602template <class rowptr_view_type, class colind_view_type, class vals_view_type>
603void sortAndMergeCrsEntries(rowptr_view_type& CRS_rowptr,
604 colind_view_type& CRS_colind,
605 vals_view_type& CRS_vals,
606 const ::KokkosSparse::SortAlgorithm option) {
607 using execution_space = typename vals_view_type::execution_space;
608
609 auto CRS_rowptr_in = CRS_rowptr;
610 auto CRS_colind_in = CRS_colind;
611 auto CRS_vals_in = CRS_vals;
612
613 KokkosSparse::sort_and_merge_matrix<execution_space, rowptr_view_type,
614 colind_view_type, vals_view_type>(CRS_rowptr_in, CRS_colind_in, CRS_vals_in,
615 CRS_rowptr, CRS_colind, CRS_vals,
616 KokkosKernels::ArithTraits<typename colind_view_type::non_const_value_type>::max(),
617 option);
618}
619
620template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
621void lowCommunicationMakeColMapAndReindexSerial(const Teuchos::ArrayView<const size_t>& rowptr,
622 const Teuchos::ArrayView<LocalOrdinal>& colind_LID,
623 const Teuchos::ArrayView<GlobalOrdinal>& colind_GID,
624 const Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& domainMapRCP,
625 const Teuchos::ArrayView<const int>& owningPIDs,
626 Teuchos::Array<int>& remotePIDs,
627 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap,
628 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null) {
629 using Teuchos::rcp;
630 typedef LocalOrdinal LO;
631 typedef GlobalOrdinal GO;
632 typedef Tpetra::global_size_t GST;
633 typedef Tpetra::Map<LO, GO, Node> map_type;
634 const char prefix[] = "lowCommunicationMakeColMapAndReindexSerial: ";
635
636 // The domainMap is an RCP because there is a shortcut for a
637 // (common) special case to return the columnMap = domainMap.
638 const map_type& domainMap = *domainMapRCP;
639
640 // Scan all column indices and sort into two groups:
641 // Local: those whose GID matches a GID of the domain map on this processor and
642 // Remote: All others.
643 const size_t numDomainElements = domainMap.getLocalNumElements();
644 Teuchos::Array<bool> LocalGIDs;
645 if (numDomainElements > 0) {
646 LocalGIDs.resize(numDomainElements, false); // Assume domain GIDs are not local
647 }
648
649 // In principle it is good to have RemoteGIDs and RemotGIDList be as
650 // long as the number of remote GIDs on this processor, but this
651 // would require two passes through the column IDs, so we make it
652 // the max of 100 and the number of block rows.
653 //
654 // FIXME (mfh 11 Feb 2015) Tpetra::Details::HashTable can hold at
655 // most INT_MAX entries, but it's possible to have more rows than
656 // that (if size_t is 64 bits and int is 32 bits).
657 const size_t numMyRows = rowptr.size() - 1;
658 const int hashsize = std::max(static_cast<int>(numMyRows), 100);
659
660 Tpetra::Details::HashTable<GO, LO> RemoteGIDs(hashsize);
661 Teuchos::Array<GO> RemoteGIDList;
662 RemoteGIDList.reserve(hashsize);
663 Teuchos::Array<int> PIDList;
664 PIDList.reserve(hashsize);
665
666 // Here we start using the *LocalOrdinal* colind_LID array. This is
667 // safe even if both columnIndices arrays are actually the same
668 // (because LocalOrdinal==GO). For *local* GID's set
669 // colind_LID with with their LID in the domainMap. For *remote*
670 // GIDs, we set colind_LID with (numDomainElements+NumRemoteColGIDs)
671 // before the increment of the remote count. These numberings will
672 // be separate because no local LID is greater than
673 // numDomainElements.
674
675 size_t NumLocalColGIDs = 0;
676 LO NumRemoteColGIDs = 0;
677 for (size_t i = 0; i < numMyRows; ++i) {
678 for (size_t j = rowptr[i]; j < rowptr[i + 1]; ++j) {
679 const GO GID = colind_GID[j];
680 // Check if GID matches a row GID
681 const LO LID = domainMap.getLocalElement(GID);
682 if (LID != -1) {
683 const bool alreadyFound = LocalGIDs[LID];
684 if (!alreadyFound) {
685 LocalGIDs[LID] = true; // There is a column in the graph associated with this domain map GID
686 NumLocalColGIDs++;
687 }
688 colind_LID[j] = LID;
689 } else {
690 const LO hash_value = RemoteGIDs.get(GID);
691 if (hash_value == -1) { // This means its a new remote GID
692 const int PID = owningPIDs[j];
693 TEUCHOS_TEST_FOR_EXCEPTION(
694 PID == -1, std::invalid_argument, prefix << "Cannot figure out if "
695 "PID is owned.");
696 colind_LID[j] = static_cast<LO>(numDomainElements + NumRemoteColGIDs);
697 RemoteGIDs.add(GID, NumRemoteColGIDs);
698 RemoteGIDList.push_back(GID);
699 PIDList.push_back(PID);
700 NumRemoteColGIDs++;
701 } else {
702 colind_LID[j] = static_cast<LO>(numDomainElements + hash_value);
703 }
704 }
705 }
706 }
707
708 // Possible short-circuit: If all domain map GIDs are present as
709 // column indices, then set ColMap=domainMap and quit.
710 if (domainMap.getComm()->getSize() == 1) {
711 // Sanity check: When there is only one process, there can be no
712 // remoteGIDs.
713 TEUCHOS_TEST_FOR_EXCEPTION(
714 NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
715 "process in the domain Map's communicator, which means that there are no "
716 "\"remote\" indices. Nevertheless, some column indices are not in the "
717 "domain Map.");
718 if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
719 // In this case, we just use the domainMap's indices, which is,
720 // not coincidently, what we clobbered colind with up above
721 // anyway. No further reindexing is needed.
722 colMap = domainMapRCP;
723 return;
724 }
725 }
726
727 // Launch reduction to get global num entries in the column map
728 const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
729 GST numMyColsGST = static_cast<GST>(numMyCols);
730 GST numGlobalCols;
731 auto req = Details::iallreduce(numMyColsGST, numGlobalCols, Teuchos::REDUCE_SUM, *domainMap.getComm());
732
733 // Now build the array containing column GIDs
734 // Build back end, containing remote GIDs, first
735 Teuchos::Array<GO> ColIndices;
736 GO* RemoteColIndices = NULL;
737 if (numMyCols > 0) {
738 ColIndices.resize(numMyCols);
739 if (NumLocalColGIDs != static_cast<size_t>(numMyCols)) {
740 RemoteColIndices = &ColIndices[NumLocalColGIDs]; // Points to back half of ColIndices
741 }
742 }
743
744 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
745 RemoteColIndices[i] = RemoteGIDList[i];
746 }
747
748 // Build permute array for *remote* reindexing.
749 Teuchos::Array<LO> RemotePermuteIDs(NumRemoteColGIDs);
750 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
751 RemotePermuteIDs[i] = i;
752 }
753
754 // Sort External column indices so that all columns coming from a
755 // given remote processor are contiguous. This is a sort with two
756 // auxilary arrays: RemoteColIndices and RemotePermuteIDs.
757 Tpetra::sort3(PIDList.begin(), PIDList.end(),
758 ColIndices.begin() + NumLocalColGIDs,
759 RemotePermuteIDs.begin());
760
761 // Stash the RemotePIDs.
762 //
763 // Note: If Teuchos::Array had a shrink_to_fit like std::vector,
764 // we'd call it here.
765 remotePIDs = PIDList;
766
767 // Sort external column indices so that columns from a given remote
768 // processor are not only contiguous but also in ascending
769 // order. NOTE: I don't know if the number of externals associated
770 // with a given remote processor is known at this point ... so I
771 // count them here.
772
773 // NTS: Only sort the RemoteColIndices this time...
774 LO StartCurrent = 0, StartNext = 1;
775 while (StartNext < NumRemoteColGIDs) {
776 if (PIDList[StartNext] == PIDList[StartNext - 1]) {
777 StartNext++;
778 } else {
779 Tpetra::sort2(ColIndices.begin() + NumLocalColGIDs + StartCurrent,
780 ColIndices.begin() + NumLocalColGIDs + StartNext,
781 RemotePermuteIDs.begin() + StartCurrent);
782 StartCurrent = StartNext;
783 StartNext++;
784 }
785 }
786 Tpetra::sort2(ColIndices.begin() + NumLocalColGIDs + StartCurrent,
787 ColIndices.begin() + NumLocalColGIDs + StartNext,
788 RemotePermuteIDs.begin() + StartCurrent);
789
790 // Reverse the permutation to get the information we actually care about
791 Teuchos::Array<LO> ReverseRemotePermuteIDs(NumRemoteColGIDs);
792 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
793 ReverseRemotePermuteIDs[RemotePermuteIDs[i]] = i;
794 }
795
796 // Build permute array for *local* reindexing.
797 bool use_local_permute = false;
798 Teuchos::Array<LO> LocalPermuteIDs(numDomainElements);
799
800 // Now fill front end. Two cases:
801 //
802 // (1) If the number of Local column GIDs is the same as the number
803 // of Local domain GIDs, we can simply read the domain GIDs into
804 // the front part of ColIndices, otherwise
805 //
806 // (2) We step through the GIDs of the domainMap, checking to see if
807 // each domain GID is a column GID. we want to do this to
808 // maintain a consistent ordering of GIDs between the columns
809 // and the domain.
810 Teuchos::ArrayView<const GO> domainGlobalElements = domainMap.getLocalElementList();
811 if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
812 if (NumLocalColGIDs > 0) {
813 // Load Global Indices into first numMyCols elements column GID list
814 std::copy(domainGlobalElements.begin(), domainGlobalElements.end(),
815 ColIndices.begin());
816 }
817 } else {
818 LO NumLocalAgain = 0;
819 use_local_permute = true;
820 for (size_t i = 0; i < numDomainElements; ++i) {
821 if (LocalGIDs[i]) {
822 LocalPermuteIDs[i] = NumLocalAgain;
823 ColIndices[NumLocalAgain++] = domainGlobalElements[i];
824 }
825 }
826 TEUCHOS_TEST_FOR_EXCEPTION(
827 static_cast<size_t>(NumLocalAgain) != NumLocalColGIDs,
828 std::runtime_error, prefix << "Local ID count test failed.");
829 }
830
831 // Low-cost reindex of the matrix
832 for (size_t i = 0; i < numMyRows; ++i) {
833 for (size_t j = rowptr[i]; j < rowptr[i + 1]; ++j) {
834 const LO ID = colind_LID[j];
835 if (static_cast<size_t>(ID) < numDomainElements) {
836 if (use_local_permute) {
837 colind_LID[j] = LocalPermuteIDs[colind_LID[j]];
838 }
839 // In the case where use_local_permute==false, we just copy
840 // the DomainMap's ordering, which it so happens is what we
841 // put in colind_LID to begin with.
842 } else {
843 colind_LID[j] = NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j] - numDomainElements];
844 }
845 }
846 }
847
848 // Make column Map
849 req->wait();
850 colMap = rcp(new map_type(numGlobalCols, ColIndices, domainMap.getIndexBase(),
851 domainMap.getComm(),
852 params));
853}
854
855template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
857 const Teuchos::ArrayView<const size_t>& rowptr,
858 const Teuchos::ArrayView<LocalOrdinal>& colind_LID,
859 const Teuchos::ArrayView<GlobalOrdinal>& colind_GID,
861 const Teuchos::ArrayView<const int>& owningPIDs,
862 Teuchos::Array<int>& remotePIDs,
863 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap,
864 const Teuchos::RCP<Teuchos::ParameterList>& params) {
865 using DT = typename Node::device_type;
866 using execution_space = typename DT::execution_space;
867 execution_space exec;
868 DT device;
869
870 // Create device mirror and host mirror views from function parameters
871 // When we pass in views instead of Teuchos::ArrayViews, we can avoid copying views
872 auto colind_LID_view = Details::create_mirror_view_from_raw_host_array(device, colind_LID.getRawPtr(), colind_LID.size(), true, "colind_LID");
873 auto rowptr_view = Details::create_mirror_view_from_raw_host_array(device, rowptr.getRawPtr(), rowptr.size(), true, "rowptr");
874 auto colind_GID_view = Details::create_mirror_view_from_raw_host_array(device, colind_GID.getRawPtr(), colind_GID.size(), true, "colind_GID");
875 auto owningPIDs_view = Details::create_mirror_view_from_raw_host_array(device, owningPIDs.getRawPtr(), owningPIDs.size(), true, "owningPIDs");
876
877 typename decltype(colind_LID_view)::host_mirror_type colind_LID_host(colind_LID.getRawPtr(), colind_LID.size());
878
880
881 // For now, we copy back into colind_LID_host (which also overwrites the colind_LID Teuchos array)
882 // When colind_LID becomes a Kokkos View we can delete this
883 Kokkos::deep_copy(exec, colind_LID_host, colind_LID_view);
884}
885
886template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
888 const Kokkos::View<const size_t*, typename Node::device_type> rowptr_view,
889 const Kokkos::View<LocalOrdinal*, typename Node::device_type> colind_LID_view,
890 const Kokkos::View<GlobalOrdinal*, typename Node::device_type> colind_GID_view,
892 const Kokkos::View<const int*, typename Node::device_type> owningPIDs_view,
893 Teuchos::Array<int>& remotePIDs,
894 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap,
895 const Teuchos::RCP<Teuchos::ParameterList>& params) {
896 using Teuchos::rcp;
897 typedef LocalOrdinal LO;
898 typedef GlobalOrdinal GO;
900 typedef Tpetra::Map<LO, GO, Node> map_type;
901 const char prefix[] = "lowCommunicationMakeColMapAndReindex: ";
902 Tpetra::Details::ProfilingRegion regOuter("lowCommunicationMakeColMapAndReindex");
903
904 typedef typename Node::device_type DT;
905 using execution_space = typename DT::execution_space;
906 using memory_space = typename DT::memory_space;
907 execution_space exec;
908 using team_policy = Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
909 typedef typename map_type::local_map_type local_map_type;
910
911 // The domainMap is an RCP because there is a shortcut for a
912 // (common) special case to return the columnMap = domainMap.
913 const map_type& domainMap = *domainMapRCP;
914
915 const size_t numMyRows = rowptr_view.size() - 1;
916 local_map_type domainMap_local = domainMap.getLocalMap();
917
918 const size_t numDomainElements = domainMap.getLocalNumElements();
919 Kokkos::View<bool*, DT> LocalGIDs_view("LocalGIDs", numDomainElements);
920
921 auto map_capacity = std::max(std::min(uint32_t(1.2 * colind_LID_view.size()),
923 uint32_t(5000));
924 Kokkos::UnorderedMap<GO, int, DT> GIDs_map(map_capacity);
925 size_t NumLocalColGIDs;
926 size_t NumRemoteColGIDs;
927 while (true) {
928 Tpetra::Details::ProfilingRegion reg("lowCommunicationMakeColMapAndReindex::fill_map");
929 const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
930
931 // Scan all column indices and sort into two groups:
932 // Local: those whose GID matches a GID of the domain map on this processor and
933 // Remote: All others.
934 // Kokkos::Parallel_reduce sums up NumLocalColGIDs, while we use the size of the Remote GIDs map to find NumRemoteColGIDs
935 LocalRemoteCount counts;
936 Kokkos::parallel_reduce(
937 team_policy(numMyRows, Kokkos::AUTO), KOKKOS_LAMBDA(const typename team_policy::member_type& member, LocalRemoteCount& update) {
938 const int i = member.league_rank();
939 LocalRemoteCount NumLocalColGIDs_temp;
940 size_t rowptr_start = rowptr_view[i];
941 size_t rowptr_end = rowptr_view[i + 1];
942 Kokkos::parallel_reduce(
943 Kokkos::TeamThreadRange(member, rowptr_start, rowptr_end), [&](const size_t j, LocalRemoteCount& innerUpdate) {
944 const GO GID = colind_GID_view[j];
945 // Check if GID matches a row GID in local domain map
946 const LO LID = domainMap_local.getLocalElement(GID);
947 if (LID != LINVALID) {
948 auto outcome = GIDs_map.insert(GID, -1);
949 // Fresh insert
950 if (outcome.success()) {
951 LocalGIDs_view[LID] = true;
952 innerUpdate.numLocalColGIDs++;
953 }
954 } else {
955 const int PID = owningPIDs_view[j];
956 auto outcome = GIDs_map.insert(GID, PID);
957 if (outcome.success()) {
958 innerUpdate.numRemoteColGIDs++;
959 if (PID == -1) {
960 Kokkos::abort("Cannot figure out if ID is owned.\n");
961 }
962 }
963 }
964 },
966 if (member.team_rank() == 0) update += NumLocalColGIDs_temp;
967 },
968 counts);
969 if (!GIDs_map.failed_insert()) {
970 NumLocalColGIDs = counts.numLocalColGIDs;
971 NumRemoteColGIDs = counts.numRemoteColGIDs;
972 break;
973 } else {
974 map_capacity *= 1.5;
975 GIDs_map = Kokkos::UnorderedMap<GO, int, DT>(map_capacity);
976 }
977 }
978
979 // Possible short-circuit: If all domain map GIDs are present as
980 // column indices, then set ColMap=domainMap and quit.
981 if (domainMap.getComm()->getSize() == 1) {
982 // Sanity check: When there is only one process, there can be no
983 // remoteGIDs.
985 NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
986 "process in the domain Map's communicator, which means that there are no "
987 "\"remote\" indices. Nevertheless, some column indices are not in the "
988 "domain Map.");
989 if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
990 // In this case, we just use the domainMap's indices, which is,
991 // not coincidently, what we clobbered colind with up above
992 // anyway. No further reindexing is needed.
993 colMap = domainMapRCP;
994
995 // Fill out local colMap (which should only contain local GIDs)
996 auto localColMap = colMap->getLocalMap();
997 Kokkos::parallel_for(
998 Kokkos::RangePolicy<execution_space>(0, colind_GID_view.size()), KOKKOS_LAMBDA(const int i) {
999 colind_LID_view(i) = localColMap.getLocalElement(colind_GID_view(i));
1000 });
1001 return;
1002 }
1003 }
1004
1005 Kokkos::View<GO*, DT> RemoteColMapIndices_unsorted(Kokkos::ViewAllocateWithoutInitializing("RemoteColMapIndices_unsorted"), NumRemoteColGIDs);
1006 Kokkos::View<int*, DT> RemotePIDs_unsorted(Kokkos::ViewAllocateWithoutInitializing("RemotePIDs_unsorted"), NumRemoteColGIDs);
1007
1008 // For each index in RemoteGIDs_map that contains a GID, use "update" to indicate the number of GIDs "before" this GID
1009 // This maps each element in the RemoteGIDs hash table to an index in RemoteGIDList / PIDList without any overwriting or empty spaces between indices
1010
1011 {
1012 Tpetra::Details::ProfilingRegion reg("lowCommunicationMakeColMapAndReindex::process_map");
1013 size_t numEnteredRemotes;
1014 Kokkos::parallel_scan(
1015 Kokkos::RangePolicy<execution_space>(0, GIDs_map.capacity()), KOKKOS_LAMBDA(const int i, size_t& update, const bool final) {
1016 if (GIDs_map.valid_at(i) && GIDs_map.value_at(i) != -1) {
1017 if (final) {
1018 RemoteColMapIndices_unsorted(update) = GIDs_map.key_at(i);
1019 RemotePIDs_unsorted(update) = GIDs_map.value_at(i);
1020 }
1021 ++update;
1022 }
1023 },
1024 numEnteredRemotes);
1025 TEUCHOS_ASSERT(numEnteredRemotes == NumRemoteColGIDs);
1026 }
1027
1028 // Launch reduction to get global num entries in the column map
1029 const size_t numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
1030 GST numMyColsGST = static_cast<GST>(numMyCols);
1031 GST numGlobalCols;
1032 auto req = Details::iallreduce(numMyColsGST, numGlobalCols, Teuchos::REDUCE_SUM, *domainMap.getComm());
1033
1034 // Now build the array containing column GIDs
1035 // Build back end, containing remote GIDs, first
1036 Kokkos::View<GO*, DT> ColMapIndices(Kokkos::ViewAllocateWithoutInitializing("ColMapIndices"), numMyCols);
1037
1038 // We don't need to load the back end of ColMapIndices or sort if there are no remote GIDs
1039 if (NumRemoteColGIDs > 0) {
1040 Tpetra::Details::ProfilingRegion reg("lowCommunicationMakeColMapAndReindex::sort");
1041 auto RemoteColMapIndices_sorted = Kokkos::subview(ColMapIndices, Kokkos::make_pair(NumLocalColGIDs, numMyCols));
1042
1043 // Create a permutation that sorts by first by PID and then by GID.
1044 Kokkos::View<size_t*, memory_space> index(Kokkos::ViewAllocateWithoutInitializing("index"), NumRemoteColGIDs);
1045 Kokkos::parallel_for(
1046 Kokkos::RangePolicy<execution_space>(0, NumRemoteColGIDs), KOKKOS_LAMBDA(const size_t i) {
1047 index(i) = i;
1048 });
1049 Kokkos::sort(
1050 exec, index, KOKKOS_LAMBDA(const size_t i, const size_t j) {
1051 auto pid_i = RemotePIDs_unsorted(i);
1052 auto pid_j = RemotePIDs_unsorted(j);
1053 if (pid_i == pid_j)
1054 return RemoteColMapIndices_unsorted(i) < RemoteColMapIndices_unsorted(j);
1055 else
1056 return pid_i < pid_j;
1057 });
1058 // Apply the permutation to remote GIDs and PIDs.
1059 Kokkos::View<int*, memory_space> RemotePIDs_sorted(Kokkos::ViewAllocateWithoutInitializing("RemotePIDs_new"), NumRemoteColGIDs);
1060 Kokkos::parallel_for(
1061 Kokkos::RangePolicy<execution_space>(0, NumRemoteColGIDs), KOKKOS_LAMBDA(const size_t i) {
1062 RemoteColMapIndices_sorted(i) = RemoteColMapIndices_unsorted(index(i));
1063 RemotePIDs_sorted(i) = RemotePIDs_unsorted(index(i));
1064 });
1065
1066 // Deep copy back from device to host
1067 // Stash the RemotePIDs. Once remotePIDs is changed to become a Kokkos view, we can remove this and copy directly.
1068 Teuchos::Array<int> PIDList(NumRemoteColGIDs);
1069 Kokkos::View<int*, Kokkos::HostSpace> RemotePIDs_sorted_host(PIDList.data(), NumRemoteColGIDs);
1070 Kokkos::deep_copy(exec, RemotePIDs_sorted_host, RemotePIDs_sorted);
1071 exec.fence();
1072
1073 remotePIDs = PIDList;
1074 }
1075
1076 // Build permute array for *local* reindexing.
1077
1078 // Now fill front end. Two cases:
1079 //
1080 // (1) If the number of Local column GIDs is the same as the number
1081 // of Local domain GIDs, we can simply read the domain GIDs into
1082 // the front part of ColIndices, otherwise
1083 //
1084 // (2) We step through the GIDs of the domainMap, checking to see if
1085 // each domain GID is a column GID. we want to do this to
1086 // maintain a consistent ordering of GIDs between the columns
1087 // and the domain.
1088 if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
1089 if (NumLocalColGIDs > 0) {
1090 // Load Global Indices into first numMyCols elements column GID list
1091 auto LocalColMapIndices = Kokkos::subview(ColMapIndices, Kokkos::make_pair((size_t)0, numDomainElements));
1092 Kokkos::deep_copy(LocalColMapIndices, domainMap.getMyGlobalIndicesDevice());
1093 }
1094 } else {
1095 // This part isn't actually tested in the unit tests
1096 LO NumLocalAgain = 0;
1097 Kokkos::parallel_scan(
1098 Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i, LO& update, const bool final) {
1099 if (LocalGIDs_view(i)) {
1100 if (final) {
1101 ColMapIndices(update) = domainMap_local.getGlobalElement(i);
1102 }
1103 update++;
1104 }
1105 },
1106 NumLocalAgain);
1107
1108 TEUCHOS_TEST_FOR_EXCEPTION(
1109 static_cast<size_t>(NumLocalAgain) != NumLocalColGIDs,
1110 std::runtime_error, prefix << "Local ID count test failed.");
1111 }
1112
1113 // Make column Map
1114 req->wait();
1115 colMap = rcp(new map_type(numGlobalCols, ColMapIndices, domainMap.getIndexBase(),
1116 domainMap.getComm(), params));
1117
1118 // Fill out colind_LID using local map
1119 auto localColMap = colMap->getLocalMap();
1120 Kokkos::parallel_for(
1121 Kokkos::RangePolicy<execution_space>(0, colind_GID_view.size()), KOKKOS_LAMBDA(const int i) {
1122 colind_LID_view(i) = localColMap.getLocalElement(colind_GID_view(i));
1123 });
1124}
1125
1126// Generates an list of owning PIDs based on two transfer (aka import/export objects)
1127// Let:
1128// OwningMap = useReverseModeForOwnership ? transferThatDefinesOwnership.getTargetMap() : transferThatDefinesOwnership.getSourceMap();
1129// MapAo = useReverseModeForOwnership ? transferThatDefinesOwnership.getSourceMap() : transferThatDefinesOwnership.getTargetMap();
1130// MapAm = useReverseModeForMigration ? transferThatDefinesMigration.getTargetMap() : transferThatDefinesMigration.getSourceMap();
1131// VectorMap = useReverseModeForMigration ? transferThatDefinesMigration.getSourceMap() : transferThatDefinesMigration.getTargetMap();
1132// Precondition:
1133// 1) MapAo.isSameAs(*MapAm) - map compatibility between transfers
1134// 2) VectorMap->isSameAs(*owningPIDs->getMap()) - map compabibility between transfer & vector
1135// 3) OwningMap->isOneToOne() - owning map is 1-to-1
1136// --- Precondition 3 is only checked in DEBUG mode ---
1137// Postcondition:
1138// owningPIDs[VectorMap->getLocalElement(GID i)] = j iff (OwningMap->isLocalElement(GID i) on rank j)
1139template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
1140void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
1142 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesMigration,
1147
1148 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> OwningMap = useReverseModeForOwnership ? transferThatDefinesOwnership.getTargetMap() : transferThatDefinesOwnership.getSourceMap();
1149 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> MapAo = useReverseModeForOwnership ? transferThatDefinesOwnership.getSourceMap() : transferThatDefinesOwnership.getTargetMap();
1150 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> MapAm = useReverseModeForMigration ? transferThatDefinesMigration.getTargetMap() : transferThatDefinesMigration.getSourceMap();
1151 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> VectorMap = useReverseModeForMigration ? transferThatDefinesMigration.getSourceMap() : transferThatDefinesMigration.getTargetMap();
1152
1153 TEUCHOS_TEST_FOR_EXCEPTION(!MapAo->isSameAs(*MapAm), std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch between transfers");
1154 TEUCHOS_TEST_FOR_EXCEPTION(!VectorMap->isSameAs(*owningPIDs.getMap()), std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch transfer and vector");
1155#ifdef HAVE_TPETRA_DEBUG
1156 TEUCHOS_TEST_FOR_EXCEPTION(!OwningMap->isOneToOne(), std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
1157#endif
1158
1159 int rank = OwningMap->getComm()->getRank();
1160 // Generate "A" vector and fill it with owning information. We can read this from transferThatDefinesOwnership w/o communication
1161 // Note: Due to the 1-to-1 requirement, several of these options throw
1163 const import_type* ownAsImport = dynamic_cast<const import_type*>(&transferThatDefinesOwnership);
1164 const export_type* ownAsExport = dynamic_cast<const export_type*>(&transferThatDefinesOwnership);
1165
1166 Teuchos::ArrayRCP<int> pids = temp.getDataNonConst();
1167 Teuchos::ArrayView<int> v_pids = pids();
1169 TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
1171 getPids(*ownAsImport, v_pids, false);
1173 TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector this option not yet implemented");
1174 } else {
1175 TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
1176 }
1177
1178 const import_type* xferAsImport = dynamic_cast<const import_type*>(&transferThatDefinesMigration);
1179 const export_type* xferAsExport = dynamic_cast<const export_type*>(&transferThatDefinesMigration);
1180 TEUCHOS_TEST_FOR_EXCEPTION(!xferAsImport && !xferAsExport, std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector transfer undefined");
1181
1182 // Migrate from "A" vector to output vector
1183 owningPIDs.putScalar(rank);
1190 else
1192}
1193
1194} // namespace Import_Util
1195} // namespace Tpetra
1196
1197#endif // TPETRA_IMPORT_UTIL_HPP
Declaration of the Tpetra::CrsMatrix class.
Add specializations of Teuchos::Details::MpiTypeTraits for Kokkos::complex<float> and Kokkos::complex...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer< LocalOrdinal, GlobalOrdinal, Node > &transferThatDefinesOwnership, bool useReverseModeForOwnership, const ::Tpetra::Details::Transfer< LocalOrdinal, GlobalOrdinal, Node > &transferForMigratingData, bool useReverseModeForMigration, Tpetra::Vector< int, LocalOrdinal, GlobalOrdinal, Node > &owningPIDs)
Generates an list of owning PIDs based on two transfer (aka import/export objects) Let: OwningMap = u...
void sortAndMergeCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort and merge the entries of the (raw CSR) matrix by column index within each row.
void lowCommunicationMakeColMapAndReindex(const Teuchos::ArrayView< const size_t > &rowptr, const Teuchos::ArrayView< LocalOrdinal > &colind_LID, const Teuchos::ArrayView< GlobalOrdinal > &colind_GID, const Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMapRCP, const Teuchos::ArrayView< const int > &owningPIDs, Teuchos::Array< int > &remotePIDs, Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &colMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
lowCommunicationMakeColMapAndReindex
void sortCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort the entries of the (raw CSR) matrix by column index within each row.
void getPids(const Tpetra::Import< LocalOrdinal, GlobalOrdinal, Node > &Importer, Teuchos::Array< int > &pids, bool use_minus_one_for_local)
Like getPidGidPairs, but just gets the PIDs, ordered by the column Map.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
Common base class of Import and Export.
Teuchos::ArrayView< const LO > getExportLIDs() const
List of entries in the source Map that will be sent to other processes.
size_t getNumExportIDs() const
Number of entries that must be sent by the calling process to other processes.
Teuchos::ArrayView< const int > getExportPIDs() const
List of processes to which entries will be sent.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3, const bool stableSort=false)
Sort the first array, and apply the same permutation to the second and third arrays.
@ REPLACE
Replace existing values with new values.