Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Import_Util2.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_IMPORT_UTIL2_HPP
11#define TPETRA_IMPORT_UTIL2_HPP
12
17
18#include "Tpetra_ConfigDefs.hpp"
19#include "Tpetra_Import.hpp"
20#include "Tpetra_HashTable.hpp"
21#include "Tpetra_Map.hpp"
22#include "Tpetra_Util.hpp"
23#include "Tpetra_Distributor.hpp"
25#include "Tpetra_Vector.hpp"
26#include "Kokkos_DualView.hpp"
27#include "KokkosSparse_SortCrs.hpp"
28#include <Teuchos_Array.hpp>
31#include <Kokkos_Core.hpp>
32#include <Kokkos_UnorderedMap.hpp>
33#include <Kokkos_Sort.hpp>
34#include <utility>
35#include <set>
36
38
39namespace {
40
41struct LocalRemoteCount {
42 size_t numLocalColGIDs;
43 size_t numRemoteColGIDs;
44
45 KOKKOS_INLINE_FUNCTION
46 LocalRemoteCount()
47 : numLocalColGIDs(0)
48 , numRemoteColGIDs(0) {}
49
50 KOKKOS_INLINE_FUNCTION
51 LocalRemoteCount& operator+=(const LocalRemoteCount& src) {
52 numLocalColGIDs += src.numLocalColGIDs;
53 numRemoteColGIDs += src.numRemoteColGIDs;
54 return *this;
55 }
56};
57
58} // namespace
59
60namespace Kokkos {
61
62template <>
63struct reduction_identity<LocalRemoteCount> {
64 KOKKOS_FORCEINLINE_FUNCTION static LocalRemoteCount sum() {
65 return LocalRemoteCount();
66 }
67};
68} // namespace Kokkos
69
70namespace Tpetra {
71namespace Import_Util {
72
75template <typename Scalar, typename Ordinal>
76void sortCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
77 const Teuchos::ArrayView<Ordinal>& CRS_colind,
78 const Teuchos::ArrayView<Scalar>& CRS_vals);
79
80template <typename Ordinal>
81void sortCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
82 const Teuchos::ArrayView<Ordinal>& CRS_colind);
83
84template <typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
85void sortCrsEntries(const rowptr_array_type& CRS_rowptr,
86 const colind_array_type& CRS_colind,
87 const vals_array_type& CRS_vals);
88
89template <typename rowptr_array_type, typename colind_array_type>
90void sortCrsEntries(const rowptr_array_type& CRS_rowptr,
91 const colind_array_type& CRS_colind);
92
97template <typename Scalar, typename Ordinal>
98void sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
99 const Teuchos::ArrayView<Ordinal>& CRS_colind,
100 const Teuchos::ArrayView<Scalar>& CRS_vals);
101
102template <typename Ordinal>
103void sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
104 const Teuchos::ArrayView<Ordinal>& CRS_colind);
105
106template <class rowptr_view_type, class colind_view_type, class vals_view_type>
107void sortAndMergeCrsEntries(const rowptr_view_type& CRS_rowptr,
108 const colind_view_type& CRS_colind,
109 const vals_view_type& CRS_vals);
110
126template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
128 const Teuchos::ArrayView<const size_t>& rowptr,
129 const Teuchos::ArrayView<LocalOrdinal>& colind_LID,
130 const Teuchos::ArrayView<GlobalOrdinal>& colind_GID,
131 const Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& domainMapRCP,
132 const Teuchos::ArrayView<const int>& owningPIDs,
133 Teuchos::Array<int>& remotePIDs,
134 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap);
135
140template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
142 const Kokkos::View<const size_t*, typename Node::device_type> rowptr_view,
143 const Kokkos::View<LocalOrdinal*, typename Node::device_type> colind_LID_view,
144 const Kokkos::View<GlobalOrdinal*, typename Node::device_type> colind_GID_view,
145 const Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& domainMapRCP,
146 const Kokkos::View<const int*, typename Node::device_type> owningPIDs_view,
147 Teuchos::Array<int>& remotePIDs,
148 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap);
149
163template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
164void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
165 bool useReverseModeForOwnership,
166 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferForMigratingData,
167 bool useReverseModeForMigration,
169
170} // namespace Import_Util
171} // namespace Tpetra
172
173//
174// Implementations
175//
176
177namespace Tpetra {
178namespace Import_Util {
179
180template <typename PID, typename GlobalOrdinal>
181bool sort_PID_then_GID(const std::pair<PID, GlobalOrdinal>& a,
182 const std::pair<PID, GlobalOrdinal>& b) {
183 if (a.first != b.first)
184 return (a.first < b.first);
185 return (a.second < b.second);
186}
187
188template <typename PID,
189 typename GlobalOrdinal,
190 typename LocalOrdinal>
191bool sort_PID_then_pair_GID_LID(const std::pair<PID, std::pair<GlobalOrdinal, LocalOrdinal>>& a,
192 const std::pair<PID, std::pair<GlobalOrdinal, LocalOrdinal>>& b) {
193 if (a.first != b.first)
194 return a.first < b.first;
195 else
196 return (a.second.first < b.second.first);
197}
198
199template <typename Scalar,
200 typename LocalOrdinal,
201 typename GlobalOrdinal,
202 typename Node>
203void reverseNeighborDiscovery(const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& SourceMatrix,
204 const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type& rowptr,
205 const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type& colind,
207 Teuchos::RCP<const Tpetra::Import<LocalOrdinal, GlobalOrdinal, Node>> MyImporter,
208 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> MyDomainMap,
209 Teuchos::ArrayRCP<int>& type3PIDs,
210 Teuchos::ArrayRCP<LocalOrdinal>& type3LIDs,
211 Teuchos::RCP<const Teuchos::Comm<int>>& rcomm) {
212#ifdef HAVE_TPETRACORE_MPI
213 using ::Tpetra::Details::Behavior;
214 typedef LocalOrdinal LO;
215 typedef GlobalOrdinal GO;
216 typedef std::pair<GO, GO> pidgidpair_t;
217 using Teuchos::RCP;
218 const std::string prefix{" Import_Util2::ReverseND:: "};
219 const std::string label("IU2::Neighbor");
220
221 // There can be no neighbor discovery if you don't have an importer
222 if (MyImporter.is_null()) return;
223
224 std::ostringstream errstr;
225 bool error = false;
226 auto const comm = MyDomainMap->getComm();
227
228 MPI_Comm rawComm = getRawMpiComm(*comm);
229 const int MyPID = rcomm->getRank();
230
231 // Things related to messages I am sending in forward mode (RowTransfer)
232 // *** Note: this will be incorrect for transferAndFillComplete if it is in reverse mode. FIXME cbl.
233 auto ExportPIDs = RowTransfer.getExportPIDs();
234 auto ExportLIDs = RowTransfer.getExportLIDs();
235 auto NumExportLIDs = RowTransfer.getNumExportIDs();
236
237 Distributor& Distor = MyImporter->getDistributor();
238 const size_t NumRecvs = Distor.getNumReceives();
239 const size_t NumSends = Distor.getNumSends();
240 auto RemoteLIDs = MyImporter->getRemoteLIDs();
241 auto const ProcsFrom = Distor.getProcsFrom();
242 auto const ProcsTo = Distor.getProcsTo();
243
244 auto LengthsFrom = Distor.getLengthsFrom();
245 auto MyColMap = SourceMatrix.getColMap();
246 const size_t numCols = MyColMap->getLocalNumElements();
247 RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> target = MyImporter->getTargetMap();
248
249 // Get the owning pids in a special way,
250 // s.t. ProcsFrom[RemotePIDs[i]] is the proc that owns RemoteLIDs[j]....
251 Teuchos::Array<int> RemotePIDOrder(numCols, -1);
252
253 // For each remote ID, record index into ProcsFrom, who owns it.
254 for (size_t i = 0, j = 0; i < NumRecvs; ++i) {
255 for (size_t k = 0; k < LengthsFrom[i]; ++k) {
256 const int pid = ProcsFrom[i];
257 if (pid != MyPID) {
258 RemotePIDOrder[RemoteLIDs[j]] = i;
259 }
260 j++;
261 }
262 }
263
264 // Step One: Start tacking the (GID,PID) pairs on the std sets
265 //
266 // For each index in ProcsFrom, we will insert into a set of (PID,
267 // GID) pairs, in order to build a list of such pairs for each of
268 // those processes. Since this is building a reverse, we will send
269 // to these processes.
270 Teuchos::Array<int> ReverseSendSizes(NumRecvs, 0);
271 // do this as C array to avoid Teuchos::Array value initialization of all reserved memory
272 Teuchos::Array<Teuchos::ArrayRCP<pidgidpair_t>> RSB(NumRecvs);
273
274 {
275 Tpetra::Details::ProfilingRegion set_all("Import_Util2::ReverseND::isMMallSetRSB");
276
277 // 25 Jul 2018: CBL
278 // todo:std::unordered_set (hash table),
279 // with an adequate prereservation ("bucket count").
280 // An onordered_set has to have a custom hasher for pid/gid pair
281 // However, when pidsets is copied to RSB, it will be in key
282 // order _not_ in pid,gid order. (unlike std::set).
283 // Impliment this with a reserve, and time BOTH building pidsets
284 // _and_ the sort after the receive. Even if unordered_set saves
285 // time, if it causes the sort to be longer, it's not a win.
286
287 Teuchos::Array<std::set<pidgidpair_t>> pidsets(NumRecvs);
288 {
289 Tpetra::Details::ProfilingRegion set_insert("Import_Util2::ReverseND::isMMallSetRSBinsert");
290 for (size_t i = 0; i < NumExportLIDs; i++) {
291 LO lid = ExportLIDs[i];
292 GO exp_pid = ExportPIDs[i];
293 for (auto j = rowptr[lid]; j < rowptr[lid + 1]; j++) {
294 int pid_order = RemotePIDOrder[colind[j]];
295 if (pid_order != -1) {
296 GO gid = MyColMap->getGlobalElement(colind[j]); // Epetra SM.GCID46 =>sm->graph-> {colmap(colind)}
297 auto tpair = pidgidpair_t(exp_pid, gid);
298 pidsets[pid_order].insert(pidsets[pid_order].end(), tpair);
299 }
300 }
301 }
302 }
303
304 {
305 Tpetra::Details::ProfilingRegion set_cpy("Import_Util2::ReverseND::isMMallSetRSBcpy");
306 int jj = 0;
307 for (auto&& ps : pidsets) {
308 auto s = ps.size();
309 RSB[jj] = Teuchos::arcp(new pidgidpair_t[s], 0, s, true);
310 std::copy(ps.begin(), ps.end(), RSB[jj]);
311 ReverseSendSizes[jj] = s;
312 ++jj;
313 }
314 }
315 } // end of set based packing.
316
317 Teuchos::Array<int> ReverseRecvSizes(NumSends, -1);
318 Teuchos::Array<MPI_Request> rawBreq(ProcsFrom.size() + ProcsTo.size(), MPI_REQUEST_NULL);
319 // 25 Jul 2018: MPI_TAG_UB is the largest tag value; could be < 32768.
320 const int mpi_tag_base_ = 3;
321
322 int mpireq_idx = 0;
323 for (int i = 0; i < ProcsTo.size(); ++i) {
324 int Rec_Tag = mpi_tag_base_ + ProcsTo[i];
325 int* thisrecv = (int*)(&ReverseRecvSizes[i]);
326 MPI_Request rawRequest = MPI_REQUEST_NULL;
327 MPI_Irecv(const_cast<int*>(thisrecv),
328 1,
329 MPI_INT,
330 ProcsTo[i],
331 Rec_Tag,
332 rawComm,
333 &rawRequest);
334 rawBreq[mpireq_idx++] = rawRequest;
335 }
336 for (int i = 0; i < ProcsFrom.size(); ++i) {
337 int Send_Tag = mpi_tag_base_ + MyPID;
338 int* mysend = (int*)(&ReverseSendSizes[i]);
339 MPI_Request rawRequest = MPI_REQUEST_NULL;
340 MPI_Isend(mysend,
341 1,
342 MPI_INT,
343 ProcsFrom[i],
344 Send_Tag,
345 rawComm,
346 &rawRequest);
347 rawBreq[mpireq_idx++] = rawRequest;
348 }
349 Teuchos::Array<MPI_Status> rawBstatus(rawBreq.size());
350#ifdef HAVE_TPETRA_DEBUG
351 const int err1 =
352#endif
353 MPI_Waitall(rawBreq.size(), rawBreq.getRawPtr(),
354 rawBstatus.getRawPtr());
355
356#ifdef HAVE_TPETRA_DEBUG
357 if (err1) {
358 errstr << MyPID << "sE1 reverseNeighborDiscovery Mpi_Waitall error on send ";
359 error = true;
360 std::cerr << errstr.str() << std::flush;
361 }
362#endif
363
364 int totalexportpairrecsize = 0;
365 for (size_t i = 0; i < NumSends; ++i) {
366 totalexportpairrecsize += ReverseRecvSizes[i];
367#ifdef HAVE_TPETRA_DEBUG
368 if (ReverseRecvSizes[i] < 0) {
369 errstr << MyPID << "E4 reverseNeighborDiscovery: got < 0 for receive size " << ReverseRecvSizes[i] << std::endl;
370 error = true;
371 }
372#endif
373 }
374 Teuchos::ArrayRCP<pidgidpair_t> AllReverseRecv = Teuchos::arcp(new pidgidpair_t[totalexportpairrecsize], 0, totalexportpairrecsize, true);
375 int offset = 0;
376 mpireq_idx = 0;
377 for (int i = 0; i < ProcsTo.size(); ++i) {
378 int recv_data_size = ReverseRecvSizes[i] * 2;
379 int recvData_MPI_Tag = mpi_tag_base_ * 2 + ProcsTo[i];
380 MPI_Request rawRequest = MPI_REQUEST_NULL;
381 GO* rec_bptr = (GO*)(&AllReverseRecv[offset]);
382 offset += ReverseRecvSizes[i];
383 MPI_Irecv(rec_bptr,
384 recv_data_size,
385 ::Tpetra::Details::MpiTypeTraits<GO>::getType(rec_bptr[0]),
386 ProcsTo[i],
387 recvData_MPI_Tag,
388 rawComm,
389 &rawRequest);
390 rawBreq[mpireq_idx++] = rawRequest;
391 }
392 for (int ii = 0; ii < ProcsFrom.size(); ++ii) {
393 GO* send_bptr = (GO*)(RSB[ii].getRawPtr());
394 MPI_Request rawSequest = MPI_REQUEST_NULL;
395 int send_data_size = ReverseSendSizes[ii] * 2; // 2 == count of pair
396 int sendData_MPI_Tag = mpi_tag_base_ * 2 + MyPID;
397 MPI_Isend(send_bptr,
398 send_data_size,
399 ::Tpetra::Details::MpiTypeTraits<GO>::getType(send_bptr[0]),
400 ProcsFrom[ii],
401 sendData_MPI_Tag,
402 rawComm,
403 &rawSequest);
404
405 rawBreq[mpireq_idx++] = rawSequest;
406 }
407#ifdef HAVE_TPETRA_DEBUG
408 const int err =
409#endif
410 MPI_Waitall(rawBreq.size(),
411 rawBreq.getRawPtr(),
412 rawBstatus.getRawPtr());
413#ifdef HAVE_TPETRA_DEBUG
414 if (err) {
415 errstr << MyPID << "E3.r reverseNeighborDiscovery Mpi_Waitall error on receive ";
416 error = true;
417 std::cerr << errstr.str() << std::flush;
418 }
419#endif
420 std::sort(AllReverseRecv.begin(), AllReverseRecv.end(), Tpetra::Import_Util::sort_PID_then_GID<GlobalOrdinal, GlobalOrdinal>);
421
422 auto newEndOfPairs = std::unique(AllReverseRecv.begin(), AllReverseRecv.end());
423 // don't resize to remove non-unique, just use the end-of-unique iterator
424 if (AllReverseRecv.begin() == newEndOfPairs) return;
425 int ARRsize = std::distance(AllReverseRecv.begin(), newEndOfPairs);
426 auto rPIDs = Teuchos::arcp(new int[ARRsize], 0, ARRsize, true);
427 auto rLIDs = Teuchos::arcp(new LocalOrdinal[ARRsize], 0, ARRsize, true);
428
429 int tsize = 0;
430 for (auto itr = AllReverseRecv.begin(); itr != newEndOfPairs; ++itr) {
431 if ((int)(itr->first) != MyPID) {
432 rPIDs[tsize] = (int)itr->first;
433 LocalOrdinal lid = MyDomainMap->getLocalElement(itr->second);
434 rLIDs[tsize] = lid;
435 tsize++;
436 }
437 }
438
439 type3PIDs = rPIDs.persistingView(0, tsize);
440 type3LIDs = rLIDs.persistingView(0, tsize);
441
442 if (error) {
443 std::cerr << errstr.str() << std::flush;
444 comm->barrier();
445 comm->barrier();
446 comm->barrier();
447 MPI_Abort(MPI_COMM_WORLD, -1);
448 }
449#endif
450}
451
452// Note: This should get merged with the other Tpetra sort routines eventually.
453template <typename Scalar, typename Ordinal>
454void sortCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
455 const Teuchos::ArrayView<Ordinal>& CRS_colind,
456 const Teuchos::ArrayView<Scalar>& CRS_vals) {
457 auto rowptr_k = Kokkos::View<size_t*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(CRS_rowptr.data(), CRS_rowptr.size());
458 auto colind_k = Kokkos::View<Ordinal*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(CRS_colind.data(), CRS_colind.size());
459 auto vals_k = Kokkos::View<Scalar*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(CRS_vals.data(), CRS_vals.size());
461}
462
463template <typename Ordinal>
464void sortCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
465 const Teuchos::ArrayView<Ordinal>& CRS_colind) {
466 auto rowptr_k = Kokkos::View<size_t*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(CRS_rowptr.data(), CRS_rowptr.size());
467 auto colind_k = Kokkos::View<Ordinal*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(CRS_colind.data(), CRS_colind.size());
468 sortCrsEntries(rowptr_k, colind_k);
469}
470
471template <typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
472void sortCrsEntries(const rowptr_array_type& CRS_rowptr,
473 const colind_array_type& CRS_colind,
474 const vals_array_type& CRS_vals) {
475 KokkosSparse::sort_crs_matrix(CRS_rowptr, CRS_colind, CRS_vals);
476}
477
478template <typename rowptr_array_type, typename colind_array_type>
479void sortCrsEntries(const rowptr_array_type& CRS_rowptr,
480 const colind_array_type& CRS_colind) {
481 KokkosSparse::sort_crs_graph(CRS_rowptr, CRS_colind);
482}
483
484// Note: This should get merged with the other Tpetra sort routines eventually.
485template <typename Scalar, typename Ordinal>
486void sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
487 const Teuchos::ArrayView<Ordinal>& CRS_colind,
488 const Teuchos::ArrayView<Scalar>& CRS_vals) {
489 // For each row, sort column entries from smallest to largest,
490 // merging column ids that are identify by adding values. Use shell
491 // sort. Stable sort so it is fast if indices are already sorted.
492 // Code copied from Epetra_CrsMatrix::SortEntries()
493
494 if (CRS_rowptr.size() == 0) {
495 return; // no rows, so nothing to sort
496 }
497 const size_t NumRows = CRS_rowptr.size() - 1;
498 const size_t nnz = CRS_colind.size();
499 size_t new_curr = CRS_rowptr[0];
500 size_t old_curr = CRS_rowptr[0];
501
502 const bool permute_values_array = CRS_vals.size() > 0;
503
504 for (size_t i = 0; i < NumRows; i++) {
505 const size_t old_rowptr_i = CRS_rowptr[i];
507 if (old_rowptr_i >= nnz) continue;
508
509 size_t NumEntries = CRS_rowptr[i + 1] - old_rowptr_i;
510 Teuchos::ArrayRCP<Scalar> locValues;
512 locValues = Teuchos::arcp<Scalar>(&CRS_vals[old_rowptr_i], 0, NumEntries, false);
513 Teuchos::ArrayRCP<Ordinal> locIndices(&CRS_colind[old_rowptr_i], 0, NumEntries, false);
514
515 // Sort phase
517 Ordinal m = n / 2;
518
519 while (m > 0) {
520 Ordinal max = n - m;
521 for (Ordinal j = 0; j < max; j++) {
522 for (Ordinal k = j; k >= 0; k -= m) {
523 if (locIndices[k + m] >= locIndices[k])
524 break;
526 Scalar dtemp = locValues[k + m];
527 locValues[k + m] = locValues[k];
528 locValues[k] = dtemp;
529 }
531 locIndices[k + m] = locIndices[k];
532 locIndices[k] = itemp;
533 }
534 }
535 m = m / 2;
536 }
537
538 // Merge & shrink
539 for (size_t j = old_rowptr_i; j < CRS_rowptr[i + 1]; j++) {
540 if (j > old_rowptr_i && CRS_colind[j] == CRS_colind[new_curr - 1]) {
542 } else if (new_curr == j) {
543 new_curr++;
544 } else {
547 new_curr++;
548 }
549 }
551 }
552
554}
555
556template <typename Ordinal>
557void sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
558 const Teuchos::ArrayView<Ordinal>& CRS_colind) {
559 Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type> CRS_vals;
561}
562
563template <class rowptr_view_type, class colind_view_type, class vals_view_type>
564void sortAndMergeCrsEntries(rowptr_view_type& CRS_rowptr,
565 colind_view_type& CRS_colind,
566 vals_view_type& CRS_vals) {
567 using execution_space = typename vals_view_type::execution_space;
568
569 auto CRS_rowptr_in = CRS_rowptr;
570 auto CRS_colind_in = CRS_colind;
571 auto CRS_vals_in = CRS_vals;
572
573 KokkosSparse::sort_and_merge_matrix<execution_space, rowptr_view_type,
574 colind_view_type, vals_view_type>(CRS_rowptr_in, CRS_colind_in, CRS_vals_in,
575 CRS_rowptr, CRS_colind, CRS_vals);
576}
577
578template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
579void lowCommunicationMakeColMapAndReindexSerial(const Teuchos::ArrayView<const size_t>& rowptr,
580 const Teuchos::ArrayView<LocalOrdinal>& colind_LID,
581 const Teuchos::ArrayView<GlobalOrdinal>& colind_GID,
582 const Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& domainMapRCP,
583 const Teuchos::ArrayView<const int>& owningPIDs,
584 Teuchos::Array<int>& remotePIDs,
585 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap) {
586 using Teuchos::rcp;
587 typedef LocalOrdinal LO;
588 typedef GlobalOrdinal GO;
589 typedef Tpetra::global_size_t GST;
590 typedef Tpetra::Map<LO, GO, Node> map_type;
591 const char prefix[] = "lowCommunicationMakeColMapAndReindexSerial: ";
592
593 // The domainMap is an RCP because there is a shortcut for a
594 // (common) special case to return the columnMap = domainMap.
595 const map_type& domainMap = *domainMapRCP;
596
597 // Scan all column indices and sort into two groups:
598 // Local: those whose GID matches a GID of the domain map on this processor and
599 // Remote: All others.
600 const size_t numDomainElements = domainMap.getLocalNumElements();
601 Teuchos::Array<bool> LocalGIDs;
602 if (numDomainElements > 0) {
603 LocalGIDs.resize(numDomainElements, false); // Assume domain GIDs are not local
604 }
605
606 // In principle it is good to have RemoteGIDs and RemotGIDList be as
607 // long as the number of remote GIDs on this processor, but this
608 // would require two passes through the column IDs, so we make it
609 // the max of 100 and the number of block rows.
610 //
611 // FIXME (mfh 11 Feb 2015) Tpetra::Details::HashTable can hold at
612 // most INT_MAX entries, but it's possible to have more rows than
613 // that (if size_t is 64 bits and int is 32 bits).
614 const size_t numMyRows = rowptr.size() - 1;
615 const int hashsize = std::max(static_cast<int>(numMyRows), 100);
616
617 Tpetra::Details::HashTable<GO, LO> RemoteGIDs(hashsize);
618 Teuchos::Array<GO> RemoteGIDList;
619 RemoteGIDList.reserve(hashsize);
620 Teuchos::Array<int> PIDList;
621 PIDList.reserve(hashsize);
622
623 // Here we start using the *LocalOrdinal* colind_LID array. This is
624 // safe even if both columnIndices arrays are actually the same
625 // (because LocalOrdinal==GO). For *local* GID's set
626 // colind_LID with with their LID in the domainMap. For *remote*
627 // GIDs, we set colind_LID with (numDomainElements+NumRemoteColGIDs)
628 // before the increment of the remote count. These numberings will
629 // be separate because no local LID is greater than
630 // numDomainElements.
631
632 size_t NumLocalColGIDs = 0;
633 LO NumRemoteColGIDs = 0;
634 for (size_t i = 0; i < numMyRows; ++i) {
635 for (size_t j = rowptr[i]; j < rowptr[i + 1]; ++j) {
636 const GO GID = colind_GID[j];
637 // Check if GID matches a row GID
638 const LO LID = domainMap.getLocalElement(GID);
639 if (LID != -1) {
640 const bool alreadyFound = LocalGIDs[LID];
641 if (!alreadyFound) {
642 LocalGIDs[LID] = true; // There is a column in the graph associated with this domain map GID
643 NumLocalColGIDs++;
644 }
645 colind_LID[j] = LID;
646 } else {
647 const LO hash_value = RemoteGIDs.get(GID);
648 if (hash_value == -1) { // This means its a new remote GID
649 const int PID = owningPIDs[j];
650 TEUCHOS_TEST_FOR_EXCEPTION(
651 PID == -1, std::invalid_argument, prefix << "Cannot figure out if "
652 "PID is owned.");
653 colind_LID[j] = static_cast<LO>(numDomainElements + NumRemoteColGIDs);
654 RemoteGIDs.add(GID, NumRemoteColGIDs);
655 RemoteGIDList.push_back(GID);
656 PIDList.push_back(PID);
657 NumRemoteColGIDs++;
658 } else {
659 colind_LID[j] = static_cast<LO>(numDomainElements + hash_value);
660 }
661 }
662 }
663 }
664
665 // Possible short-circuit: If all domain map GIDs are present as
666 // column indices, then set ColMap=domainMap and quit.
667 if (domainMap.getComm()->getSize() == 1) {
668 // Sanity check: When there is only one process, there can be no
669 // remoteGIDs.
670 TEUCHOS_TEST_FOR_EXCEPTION(
671 NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
672 "process in the domain Map's communicator, which means that there are no "
673 "\"remote\" indices. Nevertheless, some column indices are not in the "
674 "domain Map.");
675 if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
676 // In this case, we just use the domainMap's indices, which is,
677 // not coincidently, what we clobbered colind with up above
678 // anyway. No further reindexing is needed.
679 colMap = domainMapRCP;
680 return;
681 }
682 }
683
684 // Now build the array containing column GIDs
685 // Build back end, containing remote GIDs, first
686 const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
687 Teuchos::Array<GO> ColIndices;
688 GO* RemoteColIndices = NULL;
689 if (numMyCols > 0) {
690 ColIndices.resize(numMyCols);
691 if (NumLocalColGIDs != static_cast<size_t>(numMyCols)) {
692 RemoteColIndices = &ColIndices[NumLocalColGIDs]; // Points to back half of ColIndices
693 }
694 }
695
696 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
697 RemoteColIndices[i] = RemoteGIDList[i];
698 }
699
700 // Build permute array for *remote* reindexing.
701 Teuchos::Array<LO> RemotePermuteIDs(NumRemoteColGIDs);
702 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
703 RemotePermuteIDs[i] = i;
704 }
705
706 // Sort External column indices so that all columns coming from a
707 // given remote processor are contiguous. This is a sort with two
708 // auxilary arrays: RemoteColIndices and RemotePermuteIDs.
709 Tpetra::sort3(PIDList.begin(), PIDList.end(),
710 ColIndices.begin() + NumLocalColGIDs,
711 RemotePermuteIDs.begin());
712
713 // Stash the RemotePIDs.
714 //
715 // Note: If Teuchos::Array had a shrink_to_fit like std::vector,
716 // we'd call it here.
717 remotePIDs = PIDList;
718
719 // Sort external column indices so that columns from a given remote
720 // processor are not only contiguous but also in ascending
721 // order. NOTE: I don't know if the number of externals associated
722 // with a given remote processor is known at this point ... so I
723 // count them here.
724
725 // NTS: Only sort the RemoteColIndices this time...
726 LO StartCurrent = 0, StartNext = 1;
727 while (StartNext < NumRemoteColGIDs) {
728 if (PIDList[StartNext] == PIDList[StartNext - 1]) {
729 StartNext++;
730 } else {
731 Tpetra::sort2(ColIndices.begin() + NumLocalColGIDs + StartCurrent,
732 ColIndices.begin() + NumLocalColGIDs + StartNext,
733 RemotePermuteIDs.begin() + StartCurrent);
734 StartCurrent = StartNext;
735 StartNext++;
736 }
737 }
738 Tpetra::sort2(ColIndices.begin() + NumLocalColGIDs + StartCurrent,
739 ColIndices.begin() + NumLocalColGIDs + StartNext,
740 RemotePermuteIDs.begin() + StartCurrent);
741
742 // Reverse the permutation to get the information we actually care about
743 Teuchos::Array<LO> ReverseRemotePermuteIDs(NumRemoteColGIDs);
744 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
745 ReverseRemotePermuteIDs[RemotePermuteIDs[i]] = i;
746 }
747
748 // Build permute array for *local* reindexing.
749 bool use_local_permute = false;
750 Teuchos::Array<LO> LocalPermuteIDs(numDomainElements);
751
752 // Now fill front end. Two cases:
753 //
754 // (1) If the number of Local column GIDs is the same as the number
755 // of Local domain GIDs, we can simply read the domain GIDs into
756 // the front part of ColIndices, otherwise
757 //
758 // (2) We step through the GIDs of the domainMap, checking to see if
759 // each domain GID is a column GID. we want to do this to
760 // maintain a consistent ordering of GIDs between the columns
761 // and the domain.
762 Teuchos::ArrayView<const GO> domainGlobalElements = domainMap.getLocalElementList();
763 if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
764 if (NumLocalColGIDs > 0) {
765 // Load Global Indices into first numMyCols elements column GID list
766 std::copy(domainGlobalElements.begin(), domainGlobalElements.end(),
767 ColIndices.begin());
768 }
769 } else {
770 LO NumLocalAgain = 0;
771 use_local_permute = true;
772 for (size_t i = 0; i < numDomainElements; ++i) {
773 if (LocalGIDs[i]) {
774 LocalPermuteIDs[i] = NumLocalAgain;
775 ColIndices[NumLocalAgain++] = domainGlobalElements[i];
776 }
777 }
778 TEUCHOS_TEST_FOR_EXCEPTION(
779 static_cast<size_t>(NumLocalAgain) != NumLocalColGIDs,
780 std::runtime_error, prefix << "Local ID count test failed.");
781 }
782
783 // Make column Map
784 const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid();
785 colMap = rcp(new map_type(minus_one, ColIndices, domainMap.getIndexBase(),
786 domainMap.getComm()));
787
788 // Low-cost reindex of the matrix
789 for (size_t i = 0; i < numMyRows; ++i) {
790 for (size_t j = rowptr[i]; j < rowptr[i + 1]; ++j) {
791 const LO ID = colind_LID[j];
792 if (static_cast<size_t>(ID) < numDomainElements) {
793 if (use_local_permute) {
794 colind_LID[j] = LocalPermuteIDs[colind_LID[j]];
795 }
796 // In the case where use_local_permute==false, we just copy
797 // the DomainMap's ordering, which it so happens is what we
798 // put in colind_LID to begin with.
799 } else {
800 colind_LID[j] = NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j] - numDomainElements];
801 }
802 }
803 }
804}
805
806template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
808 const Teuchos::ArrayView<const size_t>& rowptr,
809 const Teuchos::ArrayView<LocalOrdinal>& colind_LID,
810 const Teuchos::ArrayView<GlobalOrdinal>& colind_GID,
812 const Teuchos::ArrayView<const int>& owningPIDs,
813 Teuchos::Array<int>& remotePIDs,
814 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap) {
815 using DT = typename Node::device_type;
816 using execution_space = typename DT::execution_space;
817 execution_space exec;
818 DT device;
819
820 // Create device mirror and host mirror views from function parameters
821 // When we pass in views instead of Teuchos::ArrayViews, we can avoid copying views
822 auto colind_LID_view = Details::create_mirror_view_from_raw_host_array(device, colind_LID.getRawPtr(), colind_LID.size(), true, "colind_LID");
823 auto rowptr_view = Details::create_mirror_view_from_raw_host_array(device, rowptr.getRawPtr(), rowptr.size(), true, "rowptr");
824 auto colind_GID_view = Details::create_mirror_view_from_raw_host_array(device, colind_GID.getRawPtr(), colind_GID.size(), true, "colind_GID");
825 auto owningPIDs_view = Details::create_mirror_view_from_raw_host_array(device, owningPIDs.getRawPtr(), owningPIDs.size(), true, "owningPIDs");
826
827 typename decltype(colind_LID_view)::host_mirror_type colind_LID_host(colind_LID.getRawPtr(), colind_LID.size());
828
830
831 // For now, we copy back into colind_LID_host (which also overwrites the colind_LID Teuchos array)
832 // When colind_LID becomes a Kokkos View we can delete this
833 Kokkos::deep_copy(exec, colind_LID_host, colind_LID_view);
834}
835
836template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
838 const Kokkos::View<const size_t*, typename Node::device_type> rowptr_view,
839 const Kokkos::View<LocalOrdinal*, typename Node::device_type> colind_LID_view,
840 const Kokkos::View<GlobalOrdinal*, typename Node::device_type> colind_GID_view,
842 const Kokkos::View<const int*, typename Node::device_type> owningPIDs_view,
843 Teuchos::Array<int>& remotePIDs,
844 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap) {
845 using Teuchos::rcp;
846 typedef LocalOrdinal LO;
847 typedef GlobalOrdinal GO;
849 typedef Tpetra::Map<LO, GO, Node> map_type;
850 const char prefix[] = "lowCommunicationMakeColMapAndReindex: ";
851 Tpetra::Details::ProfilingRegion regOuter("lowCommunicationMakeColMapAndReindex");
852
853 typedef typename Node::device_type DT;
854 using execution_space = typename DT::execution_space;
855 using memory_space = typename DT::memory_space;
856 execution_space exec;
857 using team_policy = Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
858 typedef typename map_type::local_map_type local_map_type;
859
860 // The domainMap is an RCP because there is a shortcut for a
861 // (common) special case to return the columnMap = domainMap.
862 const map_type& domainMap = *domainMapRCP;
863
864 const size_t numMyRows = rowptr_view.size() - 1;
865 local_map_type domainMap_local = domainMap.getLocalMap();
866
867 const size_t numDomainElements = domainMap.getLocalNumElements();
868 Kokkos::View<bool*, DT> LocalGIDs_view("LocalGIDs", numDomainElements);
869
870 auto map_capacity = std::max(std::min(uint32_t(1.2 * colind_LID_view.size()),
872 uint32_t(5000));
873 Kokkos::UnorderedMap<GO, int, DT> GIDs_map(map_capacity);
874 size_t NumLocalColGIDs;
875 size_t NumRemoteColGIDs;
876 while (true) {
877 Tpetra::Details::ProfilingRegion reg("lowCommunicationMakeColMapAndReindex::fill_map");
878 const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
879
880 // Scan all column indices and sort into two groups:
881 // Local: those whose GID matches a GID of the domain map on this processor and
882 // Remote: All others.
883 // Kokkos::Parallel_reduce sums up NumLocalColGIDs, while we use the size of the Remote GIDs map to find NumRemoteColGIDs
884 LocalRemoteCount counts;
885 Kokkos::parallel_reduce(
886 team_policy(numMyRows, Kokkos::AUTO), KOKKOS_LAMBDA(const typename team_policy::member_type& member, LocalRemoteCount& update) {
887 const int i = member.league_rank();
888 LocalRemoteCount NumLocalColGIDs_temp;
889 size_t rowptr_start = rowptr_view[i];
890 size_t rowptr_end = rowptr_view[i + 1];
891 Kokkos::parallel_reduce(
892 Kokkos::TeamThreadRange(member, rowptr_start, rowptr_end), [&](const size_t j, LocalRemoteCount& innerUpdate) {
893 const GO GID = colind_GID_view[j];
894 // Check if GID matches a row GID in local domain map
895 const LO LID = domainMap_local.getLocalElement(GID);
896 if (LID != LINVALID) {
897 auto outcome = GIDs_map.insert(GID, -1);
898 // Fresh insert
899 if (outcome.success()) {
900 LocalGIDs_view[LID] = true;
901 innerUpdate.numLocalColGIDs++;
902 }
903 } else {
904 const int PID = owningPIDs_view[j];
905 auto outcome = GIDs_map.insert(GID, PID);
906 if (outcome.success()) {
907 innerUpdate.numRemoteColGIDs++;
908 if (PID == -1) {
909 Kokkos::abort("Cannot figure out if ID is owned.\n");
910 }
911 }
912 }
913 },
915 if (member.team_rank() == 0) update += NumLocalColGIDs_temp;
916 },
917 counts);
918 if (!GIDs_map.failed_insert()) {
919 NumLocalColGIDs = counts.numLocalColGIDs;
920 NumRemoteColGIDs = counts.numRemoteColGIDs;
921 break;
922 } else {
923 map_capacity *= 1.5;
924 GIDs_map = Kokkos::UnorderedMap<GO, int, DT>(map_capacity);
925 }
926 }
927
928 // Possible short-circuit: If all domain map GIDs are present as
929 // column indices, then set ColMap=domainMap and quit.
930 if (domainMap.getComm()->getSize() == 1) {
931 // Sanity check: When there is only one process, there can be no
932 // remoteGIDs.
934 NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
935 "process in the domain Map's communicator, which means that there are no "
936 "\"remote\" indices. Nevertheless, some column indices are not in the "
937 "domain Map.");
938 if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
939 // In this case, we just use the domainMap's indices, which is,
940 // not coincidently, what we clobbered colind with up above
941 // anyway. No further reindexing is needed.
942 colMap = domainMapRCP;
943
944 // Fill out local colMap (which should only contain local GIDs)
945 auto localColMap = colMap->getLocalMap();
946 Kokkos::parallel_for(
947 Kokkos::RangePolicy<execution_space>(0, colind_GID_view.size()), KOKKOS_LAMBDA(const int i) {
948 colind_LID_view(i) = localColMap.getLocalElement(colind_GID_view(i));
949 });
950 return;
951 }
952 }
953
954 Kokkos::View<GO*, DT> RemoteColMapIndices_unsorted(Kokkos::ViewAllocateWithoutInitializing("RemoteColMapIndices_unsorted"), NumRemoteColGIDs);
955 Kokkos::View<int*, DT> RemotePIDs_unsorted(Kokkos::ViewAllocateWithoutInitializing("RemotePIDs_unsorted"), NumRemoteColGIDs);
956
957 // For each index in RemoteGIDs_map that contains a GID, use "update" to indicate the number of GIDs "before" this GID
958 // This maps each element in the RemoteGIDs hash table to an index in RemoteGIDList / PIDList without any overwriting or empty spaces between indices
959
960 {
961 Tpetra::Details::ProfilingRegion reg("lowCommunicationMakeColMapAndReindex::process_map");
962 size_t numEnteredRemotes;
963 Kokkos::parallel_scan(
964 Kokkos::RangePolicy<execution_space>(0, GIDs_map.capacity()), KOKKOS_LAMBDA(const int i, size_t& update, const bool final) {
965 if (GIDs_map.valid_at(i) && GIDs_map.value_at(i) != -1) {
966 if (final) {
967 RemoteColMapIndices_unsorted(update) = GIDs_map.key_at(i);
968 RemotePIDs_unsorted(update) = GIDs_map.value_at(i);
969 }
970 ++update;
971 }
972 },
973 numEnteredRemotes);
974 TEUCHOS_ASSERT(numEnteredRemotes == NumRemoteColGIDs);
975 }
976 // Now build the array containing column GIDs
977 // Build back end, containing remote GIDs, first
978 const size_t numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
979 Kokkos::View<GO*, DT> ColMapIndices(Kokkos::ViewAllocateWithoutInitializing("ColMapIndices"), numMyCols);
980
981 // We don't need to load the back end of ColMapIndices or sort if there are no remote GIDs
982 if (NumRemoteColGIDs > 0) {
983 Tpetra::Details::ProfilingRegion reg("lowCommunicationMakeColMapAndReindex::sort");
984 auto RemoteColMapIndices_sorted = Kokkos::subview(ColMapIndices, Kokkos::make_pair(NumLocalColGIDs, numMyCols));
985
986 // Create a permutation that sorts by first by PID and then by GID.
987 Kokkos::View<size_t*, memory_space> index(Kokkos::ViewAllocateWithoutInitializing("index"), NumRemoteColGIDs);
988 Kokkos::parallel_for(
989 Kokkos::RangePolicy<execution_space>(0, NumRemoteColGIDs), KOKKOS_LAMBDA(const size_t i) {
990 index(i) = i;
991 });
992 Kokkos::sort(
993 exec, index, KOKKOS_LAMBDA(const size_t i, const size_t j) {
994 auto pid_i = RemotePIDs_unsorted(i);
995 auto pid_j = RemotePIDs_unsorted(j);
996 if (pid_i == pid_j)
997 return RemoteColMapIndices_unsorted(i) < RemoteColMapIndices_unsorted(j);
998 else
999 return pid_i < pid_j;
1000 });
1001 // Apply the permutation to remote GIDs and PIDs.
1002 Kokkos::View<int*, memory_space> RemotePIDs_sorted(Kokkos::ViewAllocateWithoutInitializing("RemotePIDs_new"), NumRemoteColGIDs);
1003 Kokkos::parallel_for(
1004 Kokkos::RangePolicy<execution_space>(0, NumRemoteColGIDs), KOKKOS_LAMBDA(const size_t i) {
1005 RemoteColMapIndices_sorted(i) = RemoteColMapIndices_unsorted(index(i));
1006 RemotePIDs_sorted(i) = RemotePIDs_unsorted(index(i));
1007 });
1008
1009 // Deep copy back from device to host
1010 // Stash the RemotePIDs. Once remotePIDs is changed to become a Kokkos view, we can remove this and copy directly.
1011 Teuchos::Array<int> PIDList(NumRemoteColGIDs);
1012 Kokkos::View<int*, Kokkos::HostSpace> RemotePIDs_sorted_host(PIDList.data(), NumRemoteColGIDs);
1013 Kokkos::deep_copy(exec, RemotePIDs_sorted_host, RemotePIDs_sorted);
1014 exec.fence();
1015
1016 remotePIDs = PIDList;
1017 }
1018
1019 // Build permute array for *local* reindexing.
1020
1021 // Now fill front end. Two cases:
1022 //
1023 // (1) If the number of Local column GIDs is the same as the number
1024 // of Local domain GIDs, we can simply read the domain GIDs into
1025 // the front part of ColIndices, otherwise
1026 //
1027 // (2) We step through the GIDs of the domainMap, checking to see if
1028 // each domain GID is a column GID. we want to do this to
1029 // maintain a consistent ordering of GIDs between the columns
1030 // and the domain.
1031 if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
1032 if (NumLocalColGIDs > 0) {
1033 // Load Global Indices into first numMyCols elements column GID list
1034 auto LocalColMapIndices = Kokkos::subview(ColMapIndices, Kokkos::make_pair((size_t)0, numDomainElements));
1035 Kokkos::deep_copy(LocalColMapIndices, domainMap.getMyGlobalIndicesDevice());
1036 }
1037 } else {
1038 // This part isn't actually tested in the unit tests
1039 LO NumLocalAgain = 0;
1040 Kokkos::parallel_scan(
1041 Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i, LO& update, const bool final) {
1042 if (LocalGIDs_view(i)) {
1043 if (final) {
1044 ColMapIndices(update) = domainMap_local.getGlobalElement(i);
1045 }
1046 update++;
1047 }
1048 },
1049 NumLocalAgain);
1050
1051 TEUCHOS_TEST_FOR_EXCEPTION(
1052 static_cast<size_t>(NumLocalAgain) != NumLocalColGIDs,
1053 std::runtime_error, prefix << "Local ID count test failed.");
1054 }
1055
1056 // Make column Map
1057 const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid();
1058
1059 colMap = rcp(new map_type(minus_one, ColMapIndices, domainMap.getIndexBase(),
1060 domainMap.getComm()));
1061
1062 // Fill out colind_LID using local map
1063 auto localColMap = colMap->getLocalMap();
1064 Kokkos::parallel_for(
1065 Kokkos::RangePolicy<execution_space>(0, colind_GID_view.size()), KOKKOS_LAMBDA(const int i) {
1066 colind_LID_view(i) = localColMap.getLocalElement(colind_GID_view(i));
1067 });
1068}
1069
1070// Generates an list of owning PIDs based on two transfer (aka import/export objects)
1071// Let:
1072// OwningMap = useReverseModeForOwnership ? transferThatDefinesOwnership.getTargetMap() : transferThatDefinesOwnership.getSourceMap();
1073// MapAo = useReverseModeForOwnership ? transferThatDefinesOwnership.getSourceMap() : transferThatDefinesOwnership.getTargetMap();
1074// MapAm = useReverseModeForMigration ? transferThatDefinesMigration.getTargetMap() : transferThatDefinesMigration.getSourceMap();
1075// VectorMap = useReverseModeForMigration ? transferThatDefinesMigration.getSourceMap() : transferThatDefinesMigration.getTargetMap();
1076// Precondition:
1077// 1) MapAo.isSameAs(*MapAm) - map compatibility between transfers
1078// 2) VectorMap->isSameAs(*owningPIDs->getMap()) - map compabibility between transfer & vector
1079// 3) OwningMap->isOneToOne() - owning map is 1-to-1
1080// --- Precondition 3 is only checked in DEBUG mode ---
1081// Postcondition:
1082// owningPIDs[VectorMap->getLocalElement(GID i)] = j iff (OwningMap->isLocalElement(GID i) on rank j)
1083template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
1084void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
1086 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesMigration,
1091
1092 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> OwningMap = useReverseModeForOwnership ? transferThatDefinesOwnership.getTargetMap() : transferThatDefinesOwnership.getSourceMap();
1093 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> MapAo = useReverseModeForOwnership ? transferThatDefinesOwnership.getSourceMap() : transferThatDefinesOwnership.getTargetMap();
1094 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> MapAm = useReverseModeForMigration ? transferThatDefinesMigration.getTargetMap() : transferThatDefinesMigration.getSourceMap();
1095 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> VectorMap = useReverseModeForMigration ? transferThatDefinesMigration.getSourceMap() : transferThatDefinesMigration.getTargetMap();
1096
1097 TEUCHOS_TEST_FOR_EXCEPTION(!MapAo->isSameAs(*MapAm), std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch between transfers");
1098 TEUCHOS_TEST_FOR_EXCEPTION(!VectorMap->isSameAs(*owningPIDs.getMap()), std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch transfer and vector");
1099#ifdef HAVE_TPETRA_DEBUG
1100 TEUCHOS_TEST_FOR_EXCEPTION(!OwningMap->isOneToOne(), std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
1101#endif
1102
1103 int rank = OwningMap->getComm()->getRank();
1104 // Generate "A" vector and fill it with owning information. We can read this from transferThatDefinesOwnership w/o communication
1105 // Note: Due to the 1-to-1 requirement, several of these options throw
1107 const import_type* ownAsImport = dynamic_cast<const import_type*>(&transferThatDefinesOwnership);
1108 const export_type* ownAsExport = dynamic_cast<const export_type*>(&transferThatDefinesOwnership);
1109
1110 Teuchos::ArrayRCP<int> pids = temp.getDataNonConst();
1111 Teuchos::ArrayView<int> v_pids = pids();
1113 TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
1115 getPids(*ownAsImport, v_pids, false);
1117 TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector this option not yet implemented");
1118 } else {
1119 TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
1120 }
1121
1122 const import_type* xferAsImport = dynamic_cast<const import_type*>(&transferThatDefinesMigration);
1123 const export_type* xferAsExport = dynamic_cast<const export_type*>(&transferThatDefinesMigration);
1124 TEUCHOS_TEST_FOR_EXCEPTION(!xferAsImport && !xferAsExport, std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector transfer undefined");
1125
1126 // Migrate from "A" vector to output vector
1127 owningPIDs.putScalar(rank);
1134 else
1136}
1137
1138} // namespace Import_Util
1139} // namespace Tpetra
1140
1141#endif // TPETRA_IMPORT_UTIL_HPP
Declaration of the Tpetra::CrsMatrix class.
Add specializations of Teuchos::Details::MpiTypeTraits for Kokkos::complex<float> and Kokkos::complex...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
void lowCommunicationMakeColMapAndReindex(const Teuchos::ArrayView< const size_t > &rowptr, const Teuchos::ArrayView< LocalOrdinal > &colind_LID, const Teuchos::ArrayView< GlobalOrdinal > &colind_GID, const Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMapRCP, const Teuchos::ArrayView< const int > &owningPIDs, Teuchos::Array< int > &remotePIDs, Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &colMap)
lowCommunicationMakeColMapAndReindex
void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer< LocalOrdinal, GlobalOrdinal, Node > &transferThatDefinesOwnership, bool useReverseModeForOwnership, const ::Tpetra::Details::Transfer< LocalOrdinal, GlobalOrdinal, Node > &transferForMigratingData, bool useReverseModeForMigration, Tpetra::Vector< int, LocalOrdinal, GlobalOrdinal, Node > &owningPIDs)
Generates an list of owning PIDs based on two transfer (aka import/export objects) Let: OwningMap = u...
void sortAndMergeCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort and merge the entries of the (raw CSR) matrix by column index within each row.
void sortCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort the entries of the (raw CSR) matrix by column index within each row.
void getPids(const Tpetra::Import< LocalOrdinal, GlobalOrdinal, Node > &Importer, Teuchos::Array< int > &pids, bool use_minus_one_for_local)
Like getPidGidPairs, but just gets the PIDs, ordered by the column Map.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
Common base class of Import and Export.
Teuchos::ArrayView< const LO > getExportLIDs() const
List of entries in the source Map that will be sent to other processes.
size_t getNumExportIDs() const
Number of entries that must be sent by the calling process to other processes.
Teuchos::ArrayView< const int > getExportPIDs() const
List of processes to which entries will be sent.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3, const bool stableSort=false)
Sort the first array, and apply the same permutation to the second and third arrays.
@ REPLACE
Replace existing values with new values.