Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Import_Util2.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_IMPORT_UTIL2_HPP
11#define TPETRA_IMPORT_UTIL2_HPP
12
17
18#include "Tpetra_ConfigDefs.hpp"
19#include "Tpetra_Import.hpp"
20#include "Tpetra_HashTable.hpp"
21#include "Tpetra_Map.hpp"
22#include "Tpetra_Util.hpp"
23#include "Tpetra_Distributor.hpp"
26#include "Tpetra_Vector.hpp"
27#include "Kokkos_DualView.hpp"
28#include "KokkosSparse_SortCrs.hpp"
29#include <Teuchos_Array.hpp>
31#include <Kokkos_UnorderedMap.hpp>
32#include <unordered_map>
33#include <utility>
34#include <set>
35
37
38#include <Kokkos_Core.hpp>
39#include <Kokkos_Sort.hpp>
40
41namespace Tpetra {
42namespace Import_Util {
43
46template <typename Scalar, typename Ordinal>
47void sortCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
48 const Teuchos::ArrayView<Ordinal>& CRS_colind,
49 const Teuchos::ArrayView<Scalar>& CRS_vals);
50
51template <typename Ordinal>
52void sortCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
53 const Teuchos::ArrayView<Ordinal>& CRS_colind);
54
55template <typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
56void sortCrsEntries(const rowptr_array_type& CRS_rowptr,
57 const colind_array_type& CRS_colind,
58 const vals_array_type& CRS_vals);
59
60template <typename rowptr_array_type, typename colind_array_type>
61void sortCrsEntries(const rowptr_array_type& CRS_rowptr,
62 const colind_array_type& CRS_colind);
63
68template <typename Scalar, typename Ordinal>
69void sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
70 const Teuchos::ArrayView<Ordinal>& CRS_colind,
71 const Teuchos::ArrayView<Scalar>& CRS_vals);
72
73template <typename Ordinal>
74void sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
75 const Teuchos::ArrayView<Ordinal>& CRS_colind);
76
77template <class rowptr_view_type, class colind_view_type, class vals_view_type>
78void sortAndMergeCrsEntries(const rowptr_view_type& CRS_rowptr,
79 const colind_view_type& CRS_colind,
80 const vals_view_type& CRS_vals);
81
97template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
99 const Teuchos::ArrayView<const size_t>& rowptr,
100 const Teuchos::ArrayView<LocalOrdinal>& colind_LID,
101 const Teuchos::ArrayView<GlobalOrdinal>& colind_GID,
102 const Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& domainMapRCP,
103 const Teuchos::ArrayView<const int>& owningPIDs,
104 Teuchos::Array<int>& remotePIDs,
105 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap);
106
111template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
113 const Kokkos::View<size_t*, typename Node::device_type> rowptr_view,
114 const Kokkos::View<LocalOrdinal*, typename Node::device_type> colind_LID_view,
115 const Kokkos::View<GlobalOrdinal*, typename Node::device_type> colind_GID_view,
117 const Teuchos::ArrayView<const int>& owningPIDs,
118 Teuchos::Array<int>& remotePIDs,
119 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap);
120
134template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
135void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
137 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferForMigratingData,
140
141} // namespace Import_Util
142} // namespace Tpetra
143
144//
145// Implementations
146//
147
148namespace Tpetra {
149namespace Import_Util {
150
151template <typename PID, typename GlobalOrdinal>
152bool sort_PID_then_GID(const std::pair<PID, GlobalOrdinal>& a,
153 const std::pair<PID, GlobalOrdinal>& b) {
154 if (a.first != b.first)
155 return (a.first < b.first);
156 return (a.second < b.second);
157}
158
159template <typename PID,
160 typename GlobalOrdinal,
161 typename LocalOrdinal>
162bool sort_PID_then_pair_GID_LID(const std::pair<PID, std::pair<GlobalOrdinal, LocalOrdinal>>& a,
163 const std::pair<PID, std::pair<GlobalOrdinal, LocalOrdinal>>& b) {
164 if (a.first != b.first)
165 return a.first < b.first;
166 else
167 return (a.second.first < b.second.first);
168}
169
170template <typename Scalar,
171 typename LocalOrdinal,
172 typename GlobalOrdinal,
173 typename Node>
174void reverseNeighborDiscovery(const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& SourceMatrix,
175 const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type& rowptr,
176 const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type& colind,
178 Teuchos::RCP<const Tpetra::Import<LocalOrdinal, GlobalOrdinal, Node>> MyImporter,
179 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> MyDomainMap,
180 Teuchos::ArrayRCP<int>& type3PIDs,
181 Teuchos::ArrayRCP<LocalOrdinal>& type3LIDs,
182 Teuchos::RCP<const Teuchos::Comm<int>>& rcomm) {
183#ifdef HAVE_TPETRACORE_MPI
184 using ::Tpetra::Details::Behavior;
185 typedef LocalOrdinal LO;
186 typedef GlobalOrdinal GO;
187 typedef std::pair<GO, GO> pidgidpair_t;
188 using Teuchos::RCP;
189 const std::string prefix{" Import_Util2::ReverseND:: "};
190 const std::string label("IU2::Neighbor");
191
192 // There can be no neighbor discovery if you don't have an importer
193 if (MyImporter.is_null()) return;
194
195 std::ostringstream errstr;
196 bool error = false;
197 auto const comm = MyDomainMap->getComm();
198
199 MPI_Comm rawComm = getRawMpiComm(*comm);
200 const int MyPID = rcomm->getRank();
201
202 // Things related to messages I am sending in forward mode (RowTransfer)
203 // *** Note: this will be incorrect for transferAndFillComplete if it is in reverse mode. FIXME cbl.
204 auto ExportPIDs = RowTransfer.getExportPIDs();
205 auto ExportLIDs = RowTransfer.getExportLIDs();
206 auto NumExportLIDs = RowTransfer.getNumExportIDs();
207
208 Distributor& Distor = MyImporter->getDistributor();
209 const size_t NumRecvs = Distor.getNumReceives();
210 const size_t NumSends = Distor.getNumSends();
211 auto RemoteLIDs = MyImporter->getRemoteLIDs();
212 auto const ProcsFrom = Distor.getProcsFrom();
213 auto const ProcsTo = Distor.getProcsTo();
214
215 auto LengthsFrom = Distor.getLengthsFrom();
216 auto MyColMap = SourceMatrix.getColMap();
217 const size_t numCols = MyColMap->getLocalNumElements();
218 RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> target = MyImporter->getTargetMap();
219
220 // Get the owning pids in a special way,
221 // s.t. ProcsFrom[RemotePIDs[i]] is the proc that owns RemoteLIDs[j]....
222 Teuchos::Array<int> RemotePIDOrder(numCols, -1);
223
224 // For each remote ID, record index into ProcsFrom, who owns it.
225 for (size_t i = 0, j = 0; i < NumRecvs; ++i) {
226 for (size_t k = 0; k < LengthsFrom[i]; ++k) {
227 const int pid = ProcsFrom[i];
228 if (pid != MyPID) {
229 RemotePIDOrder[RemoteLIDs[j]] = i;
230 }
231 j++;
232 }
233 }
234
235 // Step One: Start tacking the (GID,PID) pairs on the std sets
236 //
237 // For each index in ProcsFrom, we will insert into a set of (PID,
238 // GID) pairs, in order to build a list of such pairs for each of
239 // those processes. Since this is building a reverse, we will send
240 // to these processes.
241 Teuchos::Array<int> ReverseSendSizes(NumRecvs, 0);
242 // do this as C array to avoid Teuchos::Array value initialization of all reserved memory
243 Teuchos::Array<Teuchos::ArrayRCP<pidgidpair_t>> RSB(NumRecvs);
244
245 {
246 Tpetra::Details::ProfilingRegion set_all("Import_Util2::ReverseND::isMMallSetRSB");
247
248 // 25 Jul 2018: CBL
249 // todo:std::unordered_set (hash table),
250 // with an adequate prereservation ("bucket count").
251 // An onordered_set has to have a custom hasher for pid/gid pair
252 // However, when pidsets is copied to RSB, it will be in key
253 // order _not_ in pid,gid order. (unlike std::set).
254 // Impliment this with a reserve, and time BOTH building pidsets
255 // _and_ the sort after the receive. Even if unordered_set saves
256 // time, if it causes the sort to be longer, it's not a win.
257
258 Teuchos::Array<std::set<pidgidpair_t>> pidsets(NumRecvs);
259 {
260 Tpetra::Details::ProfilingRegion set_insert("Import_Util2::ReverseND::isMMallSetRSBinsert");
261 for (size_t i = 0; i < NumExportLIDs; i++) {
262 LO lid = ExportLIDs[i];
263 GO exp_pid = ExportPIDs[i];
264 for (auto j = rowptr[lid]; j < rowptr[lid + 1]; j++) {
265 int pid_order = RemotePIDOrder[colind[j]];
266 if (pid_order != -1) {
267 GO gid = MyColMap->getGlobalElement(colind[j]); // Epetra SM.GCID46 =>sm->graph-> {colmap(colind)}
268 auto tpair = pidgidpair_t(exp_pid, gid);
269 pidsets[pid_order].insert(pidsets[pid_order].end(), tpair);
270 }
271 }
272 }
273 }
274
275 {
276 Tpetra::Details::ProfilingRegion set_cpy("Import_Util2::ReverseND::isMMallSetRSBcpy");
277 int jj = 0;
278 for (auto&& ps : pidsets) {
279 auto s = ps.size();
280 RSB[jj] = Teuchos::arcp(new pidgidpair_t[s], 0, s, true);
281 std::copy(ps.begin(), ps.end(), RSB[jj]);
282 ReverseSendSizes[jj] = s;
283 ++jj;
284 }
285 }
286 } // end of set based packing.
287
288 Teuchos::Array<int> ReverseRecvSizes(NumSends, -1);
289 Teuchos::Array<MPI_Request> rawBreq(ProcsFrom.size() + ProcsTo.size(), MPI_REQUEST_NULL);
290 // 25 Jul 2018: MPI_TAG_UB is the largest tag value; could be < 32768.
291 const int mpi_tag_base_ = 3;
292
293 int mpireq_idx = 0;
294 for (int i = 0; i < ProcsTo.size(); ++i) {
295 int Rec_Tag = mpi_tag_base_ + ProcsTo[i];
296 int* thisrecv = (int*)(&ReverseRecvSizes[i]);
297 MPI_Request rawRequest = MPI_REQUEST_NULL;
298 MPI_Irecv(const_cast<int*>(thisrecv),
299 1,
300 MPI_INT,
301 ProcsTo[i],
302 Rec_Tag,
303 rawComm,
304 &rawRequest);
305 rawBreq[mpireq_idx++] = rawRequest;
306 }
307 for (int i = 0; i < ProcsFrom.size(); ++i) {
308 int Send_Tag = mpi_tag_base_ + MyPID;
309 int* mysend = (int*)(&ReverseSendSizes[i]);
310 MPI_Request rawRequest = MPI_REQUEST_NULL;
311 MPI_Isend(mysend,
312 1,
313 MPI_INT,
314 ProcsFrom[i],
315 Send_Tag,
316 rawComm,
317 &rawRequest);
318 rawBreq[mpireq_idx++] = rawRequest;
319 }
320 Teuchos::Array<MPI_Status> rawBstatus(rawBreq.size());
321#ifdef HAVE_TPETRA_DEBUG
322 const int err1 =
323#endif
324 MPI_Waitall(rawBreq.size(), rawBreq.getRawPtr(),
325 rawBstatus.getRawPtr());
326
327#ifdef HAVE_TPETRA_DEBUG
328 if (err1) {
329 errstr << MyPID << "sE1 reverseNeighborDiscovery Mpi_Waitall error on send ";
330 error = true;
331 std::cerr << errstr.str() << std::flush;
332 }
333#endif
334
335 int totalexportpairrecsize = 0;
336 for (size_t i = 0; i < NumSends; ++i) {
337 totalexportpairrecsize += ReverseRecvSizes[i];
338#ifdef HAVE_TPETRA_DEBUG
339 if (ReverseRecvSizes[i] < 0) {
340 errstr << MyPID << "E4 reverseNeighborDiscovery: got < 0 for receive size " << ReverseRecvSizes[i] << std::endl;
341 error = true;
342 }
343#endif
344 }
345 Teuchos::ArrayRCP<pidgidpair_t> AllReverseRecv = Teuchos::arcp(new pidgidpair_t[totalexportpairrecsize], 0, totalexportpairrecsize, true);
346 int offset = 0;
347 mpireq_idx = 0;
348 for (int i = 0; i < ProcsTo.size(); ++i) {
349 int recv_data_size = ReverseRecvSizes[i] * 2;
350 int recvData_MPI_Tag = mpi_tag_base_ * 2 + ProcsTo[i];
351 MPI_Request rawRequest = MPI_REQUEST_NULL;
352 GO* rec_bptr = (GO*)(&AllReverseRecv[offset]);
353 offset += ReverseRecvSizes[i];
354 MPI_Irecv(rec_bptr,
355 recv_data_size,
356 ::Tpetra::Details::MpiTypeTraits<GO>::getType(rec_bptr[0]),
357 ProcsTo[i],
358 recvData_MPI_Tag,
359 rawComm,
360 &rawRequest);
361 rawBreq[mpireq_idx++] = rawRequest;
362 }
363 for (int ii = 0; ii < ProcsFrom.size(); ++ii) {
364 GO* send_bptr = (GO*)(RSB[ii].getRawPtr());
365 MPI_Request rawSequest = MPI_REQUEST_NULL;
366 int send_data_size = ReverseSendSizes[ii] * 2; // 2 == count of pair
367 int sendData_MPI_Tag = mpi_tag_base_ * 2 + MyPID;
368 MPI_Isend(send_bptr,
369 send_data_size,
370 ::Tpetra::Details::MpiTypeTraits<GO>::getType(send_bptr[0]),
371 ProcsFrom[ii],
372 sendData_MPI_Tag,
373 rawComm,
374 &rawSequest);
375
376 rawBreq[mpireq_idx++] = rawSequest;
377 }
378#ifdef HAVE_TPETRA_DEBUG
379 const int err =
380#endif
381 MPI_Waitall(rawBreq.size(),
382 rawBreq.getRawPtr(),
383 rawBstatus.getRawPtr());
384#ifdef HAVE_TPETRA_DEBUG
385 if (err) {
386 errstr << MyPID << "E3.r reverseNeighborDiscovery Mpi_Waitall error on receive ";
387 error = true;
388 std::cerr << errstr.str() << std::flush;
389 }
390#endif
391 std::sort(AllReverseRecv.begin(), AllReverseRecv.end(), Tpetra::Import_Util::sort_PID_then_GID<GlobalOrdinal, GlobalOrdinal>);
392
393 auto newEndOfPairs = std::unique(AllReverseRecv.begin(), AllReverseRecv.end());
394 // don't resize to remove non-unique, just use the end-of-unique iterator
395 if (AllReverseRecv.begin() == newEndOfPairs) return;
396 int ARRsize = std::distance(AllReverseRecv.begin(), newEndOfPairs);
397 auto rPIDs = Teuchos::arcp(new int[ARRsize], 0, ARRsize, true);
398 auto rLIDs = Teuchos::arcp(new LocalOrdinal[ARRsize], 0, ARRsize, true);
399
400 int tsize = 0;
401 for (auto itr = AllReverseRecv.begin(); itr != newEndOfPairs; ++itr) {
402 if ((int)(itr->first) != MyPID) {
403 rPIDs[tsize] = (int)itr->first;
404 LocalOrdinal lid = MyDomainMap->getLocalElement(itr->second);
405 rLIDs[tsize] = lid;
406 tsize++;
407 }
408 }
409
410 type3PIDs = rPIDs.persistingView(0, tsize);
411 type3LIDs = rLIDs.persistingView(0, tsize);
412
413 if (error) {
414 std::cerr << errstr.str() << std::flush;
415 comm->barrier();
416 comm->barrier();
417 comm->barrier();
418 MPI_Abort(MPI_COMM_WORLD, -1);
419 }
420#endif
421}
422
423// Note: This should get merged with the other Tpetra sort routines eventually.
424template <typename Scalar, typename Ordinal>
425void sortCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
426 const Teuchos::ArrayView<Ordinal>& CRS_colind,
427 const Teuchos::ArrayView<Scalar>& CRS_vals) {
428 auto rowptr_k = Kokkos::View<size_t*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(CRS_rowptr.data(), CRS_rowptr.size());
429 auto colind_k = Kokkos::View<Ordinal*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(CRS_colind.data(), CRS_colind.size());
430 auto vals_k = Kokkos::View<Scalar*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(CRS_vals.data(), CRS_vals.size());
432}
433
434template <typename Ordinal>
435void sortCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
436 const Teuchos::ArrayView<Ordinal>& CRS_colind) {
437 auto rowptr_k = Kokkos::View<size_t*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(CRS_rowptr.data(), CRS_rowptr.size());
438 auto colind_k = Kokkos::View<Ordinal*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(CRS_colind.data(), CRS_colind.size());
439 sortCrsEntries(rowptr_k, colind_k);
440}
441
442template <typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
443void sortCrsEntries(const rowptr_array_type& CRS_rowptr,
444 const colind_array_type& CRS_colind,
445 const vals_array_type& CRS_vals) {
446 KokkosSparse::sort_crs_matrix(CRS_rowptr, CRS_colind, CRS_vals);
447}
448
449template <typename rowptr_array_type, typename colind_array_type>
450void sortCrsEntries(const rowptr_array_type& CRS_rowptr,
451 const colind_array_type& CRS_colind) {
452 KokkosSparse::sort_crs_graph(CRS_rowptr, CRS_colind);
453}
454
455// Note: This should get merged with the other Tpetra sort routines eventually.
456template <typename Scalar, typename Ordinal>
457void sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
458 const Teuchos::ArrayView<Ordinal>& CRS_colind,
459 const Teuchos::ArrayView<Scalar>& CRS_vals) {
460 // For each row, sort column entries from smallest to largest,
461 // merging column ids that are identify by adding values. Use shell
462 // sort. Stable sort so it is fast if indices are already sorted.
463 // Code copied from Epetra_CrsMatrix::SortEntries()
464
465 if (CRS_rowptr.size() == 0) {
466 return; // no rows, so nothing to sort
467 }
468 const size_t NumRows = CRS_rowptr.size() - 1;
469 const size_t nnz = CRS_colind.size();
470 size_t new_curr = CRS_rowptr[0];
471 size_t old_curr = CRS_rowptr[0];
472
473 const bool permute_values_array = CRS_vals.size() > 0;
474
475 for (size_t i = 0; i < NumRows; i++) {
476 const size_t old_rowptr_i = CRS_rowptr[i];
478 if (old_rowptr_i >= nnz) continue;
479
480 size_t NumEntries = CRS_rowptr[i + 1] - old_rowptr_i;
481 Teuchos::ArrayRCP<Scalar> locValues;
483 locValues = Teuchos::arcp<Scalar>(&CRS_vals[old_rowptr_i], 0, NumEntries, false);
484 Teuchos::ArrayRCP<Ordinal> locIndices(&CRS_colind[old_rowptr_i], 0, NumEntries, false);
485
486 // Sort phase
488 Ordinal m = n / 2;
489
490 while (m > 0) {
491 Ordinal max = n - m;
492 for (Ordinal j = 0; j < max; j++) {
493 for (Ordinal k = j; k >= 0; k -= m) {
494 if (locIndices[k + m] >= locIndices[k])
495 break;
497 Scalar dtemp = locValues[k + m];
498 locValues[k + m] = locValues[k];
499 locValues[k] = dtemp;
500 }
502 locIndices[k + m] = locIndices[k];
503 locIndices[k] = itemp;
504 }
505 }
506 m = m / 2;
507 }
508
509 // Merge & shrink
510 for (size_t j = old_rowptr_i; j < CRS_rowptr[i + 1]; j++) {
511 if (j > old_rowptr_i && CRS_colind[j] == CRS_colind[new_curr - 1]) {
513 } else if (new_curr == j) {
514 new_curr++;
515 } else {
518 new_curr++;
519 }
520 }
522 }
523
525}
526
527template <typename Ordinal>
528void sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t>& CRS_rowptr,
529 const Teuchos::ArrayView<Ordinal>& CRS_colind) {
530 Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type> CRS_vals;
532}
533
534template <class rowptr_view_type, class colind_view_type, class vals_view_type>
535void sortAndMergeCrsEntries(rowptr_view_type& CRS_rowptr,
536 colind_view_type& CRS_colind,
537 vals_view_type& CRS_vals) {
538 using execution_space = typename vals_view_type::execution_space;
539
540 auto CRS_rowptr_in = CRS_rowptr;
541 auto CRS_colind_in = CRS_colind;
542 auto CRS_vals_in = CRS_vals;
543
544 KokkosSparse::sort_and_merge_matrix<execution_space, rowptr_view_type,
545 colind_view_type, vals_view_type>(CRS_rowptr_in, CRS_colind_in, CRS_vals_in,
546 CRS_rowptr, CRS_colind, CRS_vals);
547}
548
549template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
550void lowCommunicationMakeColMapAndReindexSerial(const Teuchos::ArrayView<const size_t>& rowptr,
551 const Teuchos::ArrayView<LocalOrdinal>& colind_LID,
552 const Teuchos::ArrayView<GlobalOrdinal>& colind_GID,
553 const Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& domainMapRCP,
554 const Teuchos::ArrayView<const int>& owningPIDs,
555 Teuchos::Array<int>& remotePIDs,
556 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap) {
557 using Teuchos::rcp;
558 typedef LocalOrdinal LO;
559 typedef GlobalOrdinal GO;
560 typedef Tpetra::global_size_t GST;
561 typedef Tpetra::Map<LO, GO, Node> map_type;
562 const char prefix[] = "lowCommunicationMakeColMapAndReindexSerial: ";
563
564 // The domainMap is an RCP because there is a shortcut for a
565 // (common) special case to return the columnMap = domainMap.
566 const map_type& domainMap = *domainMapRCP;
567
568 // Scan all column indices and sort into two groups:
569 // Local: those whose GID matches a GID of the domain map on this processor and
570 // Remote: All others.
571 const size_t numDomainElements = domainMap.getLocalNumElements();
572 Teuchos::Array<bool> LocalGIDs;
573 if (numDomainElements > 0) {
574 LocalGIDs.resize(numDomainElements, false); // Assume domain GIDs are not local
575 }
576
577 // In principle it is good to have RemoteGIDs and RemotGIDList be as
578 // long as the number of remote GIDs on this processor, but this
579 // would require two passes through the column IDs, so we make it
580 // the max of 100 and the number of block rows.
581 //
582 // FIXME (mfh 11 Feb 2015) Tpetra::Details::HashTable can hold at
583 // most INT_MAX entries, but it's possible to have more rows than
584 // that (if size_t is 64 bits and int is 32 bits).
585 const size_t numMyRows = rowptr.size() - 1;
586 const int hashsize = std::max(static_cast<int>(numMyRows), 100);
587
588 Tpetra::Details::HashTable<GO, LO> RemoteGIDs(hashsize);
589 Teuchos::Array<GO> RemoteGIDList;
590 RemoteGIDList.reserve(hashsize);
591 Teuchos::Array<int> PIDList;
592 PIDList.reserve(hashsize);
593
594 // Here we start using the *LocalOrdinal* colind_LID array. This is
595 // safe even if both columnIndices arrays are actually the same
596 // (because LocalOrdinal==GO). For *local* GID's set
597 // colind_LID with with their LID in the domainMap. For *remote*
598 // GIDs, we set colind_LID with (numDomainElements+NumRemoteColGIDs)
599 // before the increment of the remote count. These numberings will
600 // be separate because no local LID is greater than
601 // numDomainElements.
602
603 size_t NumLocalColGIDs = 0;
604 LO NumRemoteColGIDs = 0;
605 for (size_t i = 0; i < numMyRows; ++i) {
606 for (size_t j = rowptr[i]; j < rowptr[i + 1]; ++j) {
607 const GO GID = colind_GID[j];
608 // Check if GID matches a row GID
609 const LO LID = domainMap.getLocalElement(GID);
610 if (LID != -1) {
611 const bool alreadyFound = LocalGIDs[LID];
612 if (!alreadyFound) {
613 LocalGIDs[LID] = true; // There is a column in the graph associated with this domain map GID
614 NumLocalColGIDs++;
615 }
616 colind_LID[j] = LID;
617 } else {
618 const LO hash_value = RemoteGIDs.get(GID);
619 if (hash_value == -1) { // This means its a new remote GID
620 const int PID = owningPIDs[j];
621 TEUCHOS_TEST_FOR_EXCEPTION(
622 PID == -1, std::invalid_argument, prefix << "Cannot figure out if "
623 "PID is owned.");
624 colind_LID[j] = static_cast<LO>(numDomainElements + NumRemoteColGIDs);
625 RemoteGIDs.add(GID, NumRemoteColGIDs);
626 RemoteGIDList.push_back(GID);
627 PIDList.push_back(PID);
628 NumRemoteColGIDs++;
629 } else {
630 colind_LID[j] = static_cast<LO>(numDomainElements + hash_value);
631 }
632 }
633 }
634 }
635
636 // Possible short-circuit: If all domain map GIDs are present as
637 // column indices, then set ColMap=domainMap and quit.
638 if (domainMap.getComm()->getSize() == 1) {
639 // Sanity check: When there is only one process, there can be no
640 // remoteGIDs.
641 TEUCHOS_TEST_FOR_EXCEPTION(
642 NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
643 "process in the domain Map's communicator, which means that there are no "
644 "\"remote\" indices. Nevertheless, some column indices are not in the "
645 "domain Map.");
646 if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
647 // In this case, we just use the domainMap's indices, which is,
648 // not coincidently, what we clobbered colind with up above
649 // anyway. No further reindexing is needed.
650 colMap = domainMapRCP;
651 return;
652 }
653 }
654
655 // Now build the array containing column GIDs
656 // Build back end, containing remote GIDs, first
657 const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
658 Teuchos::Array<GO> ColIndices;
659 GO* RemoteColIndices = NULL;
660 if (numMyCols > 0) {
661 ColIndices.resize(numMyCols);
662 if (NumLocalColGIDs != static_cast<size_t>(numMyCols)) {
663 RemoteColIndices = &ColIndices[NumLocalColGIDs]; // Points to back half of ColIndices
664 }
665 }
666
667 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
668 RemoteColIndices[i] = RemoteGIDList[i];
669 }
670
671 // Build permute array for *remote* reindexing.
672 Teuchos::Array<LO> RemotePermuteIDs(NumRemoteColGIDs);
673 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
674 RemotePermuteIDs[i] = i;
675 }
676
677 // Sort External column indices so that all columns coming from a
678 // given remote processor are contiguous. This is a sort with two
679 // auxilary arrays: RemoteColIndices and RemotePermuteIDs.
680 Tpetra::sort3(PIDList.begin(), PIDList.end(),
681 ColIndices.begin() + NumLocalColGIDs,
682 RemotePermuteIDs.begin());
683
684 // Stash the RemotePIDs.
685 //
686 // Note: If Teuchos::Array had a shrink_to_fit like std::vector,
687 // we'd call it here.
688 remotePIDs = PIDList;
689
690 // Sort external column indices so that columns from a given remote
691 // processor are not only contiguous but also in ascending
692 // order. NOTE: I don't know if the number of externals associated
693 // with a given remote processor is known at this point ... so I
694 // count them here.
695
696 // NTS: Only sort the RemoteColIndices this time...
697 LO StartCurrent = 0, StartNext = 1;
698 while (StartNext < NumRemoteColGIDs) {
699 if (PIDList[StartNext] == PIDList[StartNext - 1]) {
700 StartNext++;
701 } else {
702 Tpetra::sort2(ColIndices.begin() + NumLocalColGIDs + StartCurrent,
703 ColIndices.begin() + NumLocalColGIDs + StartNext,
704 RemotePermuteIDs.begin() + StartCurrent);
705 StartCurrent = StartNext;
706 StartNext++;
707 }
708 }
709 Tpetra::sort2(ColIndices.begin() + NumLocalColGIDs + StartCurrent,
710 ColIndices.begin() + NumLocalColGIDs + StartNext,
711 RemotePermuteIDs.begin() + StartCurrent);
712
713 // Reverse the permutation to get the information we actually care about
714 Teuchos::Array<LO> ReverseRemotePermuteIDs(NumRemoteColGIDs);
715 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
716 ReverseRemotePermuteIDs[RemotePermuteIDs[i]] = i;
717 }
718
719 // Build permute array for *local* reindexing.
720 bool use_local_permute = false;
721 Teuchos::Array<LO> LocalPermuteIDs(numDomainElements);
722
723 // Now fill front end. Two cases:
724 //
725 // (1) If the number of Local column GIDs is the same as the number
726 // of Local domain GIDs, we can simply read the domain GIDs into
727 // the front part of ColIndices, otherwise
728 //
729 // (2) We step through the GIDs of the domainMap, checking to see if
730 // each domain GID is a column GID. we want to do this to
731 // maintain a consistent ordering of GIDs between the columns
732 // and the domain.
733 Teuchos::ArrayView<const GO> domainGlobalElements = domainMap.getLocalElementList();
734 if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
735 if (NumLocalColGIDs > 0) {
736 // Load Global Indices into first numMyCols elements column GID list
737 std::copy(domainGlobalElements.begin(), domainGlobalElements.end(),
738 ColIndices.begin());
739 }
740 } else {
741 LO NumLocalAgain = 0;
742 use_local_permute = true;
743 for (size_t i = 0; i < numDomainElements; ++i) {
744 if (LocalGIDs[i]) {
745 LocalPermuteIDs[i] = NumLocalAgain;
746 ColIndices[NumLocalAgain++] = domainGlobalElements[i];
747 }
748 }
749 TEUCHOS_TEST_FOR_EXCEPTION(
750 static_cast<size_t>(NumLocalAgain) != NumLocalColGIDs,
751 std::runtime_error, prefix << "Local ID count test failed.");
752 }
753
754 // Make column Map
755 const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid();
756 colMap = rcp(new map_type(minus_one, ColIndices, domainMap.getIndexBase(),
757 domainMap.getComm()));
758
759 // Low-cost reindex of the matrix
760 for (size_t i = 0; i < numMyRows; ++i) {
761 for (size_t j = rowptr[i]; j < rowptr[i + 1]; ++j) {
762 const LO ID = colind_LID[j];
763 if (static_cast<size_t>(ID) < numDomainElements) {
764 if (use_local_permute) {
765 colind_LID[j] = LocalPermuteIDs[colind_LID[j]];
766 }
767 // In the case where use_local_permute==false, we just copy
768 // the DomainMap's ordering, which it so happens is what we
769 // put in colind_LID to begin with.
770 } else {
771 colind_LID[j] = NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j] - numDomainElements];
772 }
773 }
774 }
775}
776
777template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
779 const Teuchos::ArrayView<const size_t>& rowptr,
780 const Teuchos::ArrayView<LocalOrdinal>& colind_LID,
781 const Teuchos::ArrayView<GlobalOrdinal>& colind_GID,
783 const Teuchos::ArrayView<const int>& owningPIDs,
784 Teuchos::Array<int>& remotePIDs,
785 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap) {
786 using Teuchos::rcp;
787 typedef LocalOrdinal LO;
788 typedef GlobalOrdinal GO;
790 typedef Tpetra::Map<LO, GO, Node> map_type;
791 const char prefix[] = "lowCommunicationMakeColMapAndReindex: ";
792
793 typedef typename Node::device_type DT;
794 using execution_space = typename DT::execution_space;
795 execution_space exec;
796 using team_policy = Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
797 typedef typename map_type::local_map_type local_map_type;
798
799 // Create device mirror and host mirror views from function parameters
800 // When we pass in views instead of Teuchos::ArrayViews, we can avoid copying views
801 auto colind_LID_view = Details::create_mirror_view_from_raw_host_array(exec, colind_LID.getRawPtr(), colind_LID.size(), true, "colind_LID");
802 auto rowptr_view = Details::create_mirror_view_from_raw_host_array(exec, rowptr.getRawPtr(), rowptr.size(), true, "rowptr");
803 auto colind_GID_view = Details::create_mirror_view_from_raw_host_array(exec, colind_GID.getRawPtr(), colind_GID.size(), true, "colind_GID");
804 auto owningPIDs_view = Details::create_mirror_view_from_raw_host_array(exec, owningPIDs.getRawPtr(), owningPIDs.size(), true, "owningPIDs");
805
806 typename decltype(colind_LID_view)::host_mirror_type colind_LID_host(colind_LID.getRawPtr(), colind_LID.size());
807 typename decltype(colind_GID_view)::host_mirror_type colind_GID_host(colind_GID.getRawPtr(), colind_GID.size());
808
809 Kokkos::deep_copy(colind_LID_view, colind_LID_host);
810 Kokkos::deep_copy(colind_GID_view, colind_GID_host);
811
812 // The domainMap is an RCP because there is a shortcut for a
813 // (common) special case to return the columnMap = domainMap.
814 const map_type& domainMap = *domainMapRCP;
815
816 Kokkos::UnorderedMap<LO, bool, DT> LocalGIDs_view_map(colind_LID.size());
817 Kokkos::UnorderedMap<GO, LO, DT> RemoteGIDs_view_map(colind_LID.size());
818
819 const size_t numMyRows = rowptr.size() - 1;
820 local_map_type domainMap_local = domainMap.getLocalMap();
821
822 const size_t numDomainElements = domainMap.getLocalNumElements();
823 Kokkos::View<bool*, DT> LocalGIDs_view("LocalGIDs", numDomainElements);
824 auto LocalGIDs_host = Kokkos::create_mirror_view(LocalGIDs_view);
825
826 size_t NumLocalColGIDs = 0;
827
828 // Scan all column indices and sort into two groups:
829 // Local: those whose GID matches a GID of the domain map on this processor and
830 // Remote: All others.
831 // Kokkos::Parallel_reduce sums up NumLocalColGIDs, while we use the size of the Remote GIDs map to find NumRemoteColGIDs
832 Kokkos::parallel_reduce(
833 team_policy(numMyRows, Kokkos::AUTO), KOKKOS_LAMBDA(const typename team_policy::member_type& member, size_t& update) {
834 const int i = member.league_rank();
835 size_t NumLocalColGIDs_temp = 0;
836 size_t rowptr_start = rowptr_view[i];
837 size_t rowptr_end = rowptr_view[i + 1];
838 Kokkos::parallel_reduce(
839 Kokkos::TeamThreadRange(member, rowptr_start, rowptr_end), [&](const size_t j, size_t& innerUpdate) {
840 const GO GID = colind_GID_view[j];
841 // Check if GID matches a row GID in local domain map
842 const LO LID = domainMap_local.getLocalElement(GID);
843 if (LID != -1) {
844 auto outcome = LocalGIDs_view_map.insert(LID);
845 // Fresh insert
846 if (outcome.success()) {
847 LocalGIDs_view[LID] = true;
848 innerUpdate++;
849 }
850 } else {
851 const int PID = owningPIDs_view[j];
852 auto outcome = RemoteGIDs_view_map.insert(GID, PID);
853 if (outcome.success() && PID == -1) {
854 Kokkos::abort("Cannot figure out if ID is owned.\n");
855 }
856 }
857 },
859 if (member.team_rank() == 0) update += NumLocalColGIDs_temp;
860 },
862
864
865 Kokkos::View<int*, DT> PIDList_view("PIDList", NumRemoteColGIDs);
866 auto PIDList_host = Kokkos::create_mirror_view(PIDList_view);
867
868 Kokkos::View<GO*, DT> RemoteGIDList_view("RemoteGIDList", NumRemoteColGIDs);
869 auto RemoteGIDList_host = Kokkos::create_mirror_view(RemoteGIDList_view);
870
871 // For each index in RemoteGIDs_map that contains a GID, use "update" to indicate the number of GIDs "before" this GID
872 // This maps each element in the RemoteGIDs hash table to an index in RemoteGIDList / PIDList without any overwriting or empty spaces between indices
873 Kokkos::parallel_scan(
874 Kokkos::RangePolicy<execution_space>(0, RemoteGIDs_view_map.capacity()), KOKKOS_LAMBDA(const int i, GO& update, const bool final) {
875 if (final && RemoteGIDs_view_map.valid_at(i)) {
876 RemoteGIDList_view[update] = RemoteGIDs_view_map.key_at(i);
877 PIDList_view[update] = RemoteGIDs_view_map.value_at(i);
878 }
879 if (RemoteGIDs_view_map.valid_at(i)) {
880 update += 1;
881 }
882 });
883
884 // Possible short-circuit: If all domain map GIDs are present as
885 // column indices, then set ColMap=domainMap and quit.
886 if (domainMap.getComm()->getSize() == 1) {
887 // Sanity check: When there is only one process, there can be no
888 // remoteGIDs.
890 NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
891 "process in the domain Map's communicator, which means that there are no "
892 "\"remote\" indices. Nevertheless, some column indices are not in the "
893 "domain Map.");
894 if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
895 // In this case, we just use the domainMap's indices, which is,
896 // not coincidently, what we clobbered colind with up above
897 // anyway. No further reindexing is needed.
898 colMap = domainMapRCP;
899
900 // Fill out local colMap (which should only contain local GIDs)
901 auto localColMap = colMap->getLocalMap();
902 Kokkos::parallel_for(
903 Kokkos::RangePolicy<execution_space>(0, colind_GID.size()), KOKKOS_LAMBDA(const int i) {
904 colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
905 });
906 Kokkos::deep_copy(execution_space(), colind_LID_host, colind_LID_view);
907 return;
908 }
909 }
910
911 // Now build the array containing column GIDs
912 // Build back end, containing remote GIDs, first
914 Kokkos::View<GO*, DT> ColIndices_view("ColIndices", numMyCols);
915
916 // We don't need to load the backend of ColIndices or sort if there are no remote GIDs
917 if (NumRemoteColGIDs > 0) {
918 if (NumLocalColGIDs != static_cast<size_t>(numMyCols)) {
919 Kokkos::parallel_for(
920 Kokkos::RangePolicy<execution_space>(0, NumRemoteColGIDs), KOKKOS_LAMBDA(const int i) {
922 });
923 }
924
925 // Find the largest PID for bin sorting purposes
926 int PID_max = 0;
927 Kokkos::parallel_reduce(
928 Kokkos::RangePolicy<execution_space>(0, PIDList_host.size()), KOKKOS_LAMBDA(const int i, int& max) {
929 if (max < PIDList_view[i]) max = PIDList_view[i];
930 },
931 Kokkos::Max<int>(PID_max));
932
933 using KeyViewTypePID = decltype(PIDList_view);
934 using BinSortOpPID = Kokkos::BinOp1D<KeyViewTypePID>;
935
936 // Make a subview of ColIndices for remote GID sorting
937 auto ColIndices_subview = Kokkos::subview(ColIndices_view, Kokkos::make_pair(NumLocalColGIDs, ColIndices_view.size()));
938
939 // Make binOp with bins = PID_max + 1, min = 0, max = PID_max
941
942 // Sort External column indices so that all columns coming from a
943 // given remote processor are contiguous. This is a sort with one
944 // auxilary array: RemoteColIndices
945 Kokkos::BinSort<KeyViewTypePID, BinSortOpPID> bin_sort2(PIDList_view, 0, PIDList_view.size(), binOp2, false);
946 bin_sort2.create_permute_vector(exec);
949
950 // Deep copy back from device to host
951 Kokkos::deep_copy(exec, PIDList_host, PIDList_view);
952
953 // Stash the RemotePIDs. Once remotePIDs is changed to become a Kokkos view, we can remove this and copy directly.
954 // Note: If Teuchos::Array had a shrink_to_fit like std::vector,
955 // we'd call it here.
956
957 exec.fence("fence before setting PIDList");
958 Teuchos::Array<int> PIDList(NumRemoteColGIDs);
959 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
961 }
962
963 remotePIDs = PIDList;
964
965 // Sort external column indices so that columns from a given remote
966 // processor are not only contiguous but also in ascending
967 // order. NOTE: I don't know if the number of externals associated
968 // with a given remote processor is known at this point ... so I
969 // count them here.
970 LO StartCurrent = 0, StartNext = 1;
971 while (StartNext < NumRemoteColGIDs) {
973 StartNext++;
974 } else {
977 StartNext++;
978 }
979 }
980
982 }
983
984 // Build permute array for *local* reindexing.
985
986 // Now fill front end. Two cases:
987 //
988 // (1) If the number of Local column GIDs is the same as the number
989 // of Local domain GIDs, we can simply read the domain GIDs into
990 // the front part of ColIndices, otherwise
991 //
992 // (2) We step through the GIDs of the domainMap, checking to see if
993 // each domain GID is a column GID. we want to do this to
994 // maintain a consistent ordering of GIDs between the columns
995 // and the domain.
996 if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
997 if (NumLocalColGIDs > 0) {
998 // Load Global Indices into first numMyCols elements column GID list
999 Kokkos::parallel_for(
1000 Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i) {
1001 ColIndices_view[i] = domainMap_local.getGlobalElement(i);
1002 });
1003 }
1004 } else {
1005 // This part isn't actually tested in the unit tests
1006 LO NumLocalAgain = 0;
1007 Kokkos::parallel_scan(
1008 Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i, LO& update, const bool final) {
1009 if (final && LocalGIDs_view[i]) {
1010 ColIndices_view[update] = domainMap_local.getGlobalElement(i);
1011 }
1012 if (LocalGIDs_view[i]) {
1013 update++;
1014 }
1015 },
1017
1019 static_cast<size_t>(NumLocalAgain) != NumLocalColGIDs,
1020 std::runtime_error, prefix << "Local ID count test failed.");
1021 }
1022
1023 // Make column Map
1024 const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid();
1025
1026 colMap = rcp(new map_type(minus_one, ColIndices_view, domainMap.getIndexBase(),
1027 domainMap.getComm()));
1028
1029 // Fill out colind_LID using local map
1030 auto localColMap = colMap->getLocalMap();
1031 Kokkos::parallel_for(
1032 Kokkos::RangePolicy<execution_space>(0, colind_GID.size()), KOKKOS_LAMBDA(const int i) {
1033 colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1034 });
1035
1036 // For now, we copy back into colind_LID_host (which also overwrites the colind_LID Tuechos array)
1037 // When colind_LID becomes a Kokkos View we can delete this
1038 Kokkos::deep_copy(exec, colind_LID_host, colind_LID_view);
1039}
1040
1041template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
1042void lowCommunicationMakeColMapAndReindex(
1043 const Kokkos::View<size_t*, typename Node::device_type> rowptr_view,
1044 const Kokkos::View<LocalOrdinal*, typename Node::device_type> colind_LID_view,
1045 const Kokkos::View<GlobalOrdinal*, typename Node::device_type> colind_GID_view,
1047 const Kokkos::View<int*, typename Node::device_type> owningPIDs_view,
1048 Teuchos::Array<int>& remotePIDs,
1049 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>& colMap) {
1050 using Teuchos::rcp;
1051 typedef LocalOrdinal LO;
1052 typedef GlobalOrdinal GO;
1053 typedef Tpetra::global_size_t GST;
1054 typedef Tpetra::Map<LO, GO, Node> map_type;
1055 const char prefix[] = "lowCommunicationMakeColMapAndReindex: ";
1056
1057 typedef typename Node::device_type DT;
1058 using execution_space = typename DT::execution_space;
1059 execution_space exec;
1060 using team_policy = Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
1061 typedef typename map_type::local_map_type local_map_type;
1062
1063 // The domainMap is an RCP because there is a shortcut for a
1064 // (common) special case to return the columnMap = domainMap.
1065 const map_type& domainMap = *domainMapRCP;
1066
1067 Kokkos::UnorderedMap<LO, bool, DT> LocalGIDs_view_map(colind_LID_view.size());
1068 Kokkos::UnorderedMap<GO, LO, DT> RemoteGIDs_view_map(colind_LID_view.size());
1069
1070 const size_t numMyRows = rowptr_view.size() - 1;
1071 local_map_type domainMap_local = domainMap.getLocalMap();
1072
1073 const size_t numDomainElements = domainMap.getLocalNumElements();
1074 Kokkos::View<bool*, DT> LocalGIDs_view("LocalGIDs", numDomainElements);
1075 auto LocalGIDs_host = Kokkos::create_mirror_view(LocalGIDs_view);
1076
1077 size_t NumLocalColGIDs = 0;
1078
1079 // Scan all column indices and sort into two groups:
1080 // Local: those whose GID matches a GID of the domain map on this processor and
1081 // Remote: All others.
1082 // Kokkos::Parallel_reduce sums up NumLocalColGIDs, while we use the size of the Remote GIDs map to find NumRemoteColGIDs
1083 Kokkos::parallel_reduce(
1084 team_policy(numMyRows, Kokkos::AUTO), KOKKOS_LAMBDA(const typename team_policy::member_type& member, size_t& update) {
1085 const int i = member.league_rank();
1086 size_t NumLocalColGIDs_temp = 0;
1087 size_t rowptr_start = rowptr_view[i];
1088 size_t rowptr_end = rowptr_view[i + 1];
1089 Kokkos::parallel_reduce(
1090 Kokkos::TeamThreadRange(member, rowptr_start, rowptr_end), [&](const size_t j, size_t& innerUpdate) {
1091 const GO GID = colind_GID_view[j];
1092 // Check if GID matches a row GID in local domain map
1093 const LO LID = domainMap_local.getLocalElement(GID);
1094 if (LID != -1) {
1095 auto outcome = LocalGIDs_view_map.insert(LID);
1096 // Fresh insert
1097 if (outcome.success()) {
1098 LocalGIDs_view[LID] = true;
1099 innerUpdate++;
1100 }
1101 } else {
1102 const int PID = owningPIDs_view[j];
1103 auto outcome = RemoteGIDs_view_map.insert(GID, PID);
1104 if (outcome.success() && PID == -1) {
1105 Kokkos::abort("Cannot figure out if ID is owned.\n");
1106 }
1107 }
1108 },
1109 NumLocalColGIDs_temp);
1110 if (member.team_rank() == 0) update += NumLocalColGIDs_temp;
1111 },
1112 NumLocalColGIDs);
1113
1114 LO NumRemoteColGIDs = RemoteGIDs_view_map.size();
1115
1116 Kokkos::View<int*, DT> PIDList_view("PIDList_d", NumRemoteColGIDs);
1117
1118 Kokkos::View<GO*, DT> RemoteGIDList_view("RemoteGIDList", NumRemoteColGIDs);
1119 auto RemoteGIDList_host = Kokkos::create_mirror_view(RemoteGIDList_view);
1120
1121 // For each index in RemoteGIDs_map that contains a GID, use "update" to indicate the number of GIDs "before" this GID
1122 // This maps each element in the RemoteGIDs hash table to an index in RemoteGIDList / PIDList without any overwriting or empty spaces between indices
1123 Kokkos::parallel_scan(
1124 Kokkos::RangePolicy<execution_space>(0, RemoteGIDs_view_map.capacity()), KOKKOS_LAMBDA(const int i, GO& update, const bool final) {
1125 if (final && RemoteGIDs_view_map.valid_at(i)) {
1126 RemoteGIDList_view[update] = RemoteGIDs_view_map.key_at(i);
1127 PIDList_view[update] = RemoteGIDs_view_map.value_at(i);
1128 }
1129 if (RemoteGIDs_view_map.valid_at(i)) {
1130 update += 1;
1131 }
1132 });
1133
1134 // Possible short-circuit: If all domain map GIDs are present as
1135 // column indices, then set ColMap=domainMap and quit.
1136 if (domainMap.getComm()->getSize() == 1) {
1137 // Sanity check: When there is only one process, there can be no
1138 // remoteGIDs.
1139 TEUCHOS_TEST_FOR_EXCEPTION(
1140 NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
1141 "process in the domain Map's communicator, which means that there are no "
1142 "\"remote\" indices. Nevertheless, some column indices are not in the "
1143 "domain Map.");
1144 if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
1145 // In this case, we just use the domainMap's indices, which is,
1146 // not coincidently, what we clobbered colind with up above
1147 // anyway. No further reindexing is needed.
1148 colMap = domainMapRCP;
1149
1150 // Fill out local colMap (which should only contain local GIDs)
1151 auto localColMap = colMap->getLocalMap();
1152 Kokkos::parallel_for(
1153 Kokkos::RangePolicy<execution_space>(0, colind_GID_view.size()), KOKKOS_LAMBDA(const int i) {
1154 colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1155 });
1156 return;
1157 }
1158 }
1159
1160 // Now build the array containing column GIDs
1161 // Build back end, containing remote GIDs, first
1162 const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
1163 Kokkos::View<GO*, DT> ColIndices_view("ColIndices", numMyCols);
1164
1165 // We don't need to load the backend of ColIndices or sort if there are no remote GIDs
1166 if (NumRemoteColGIDs > 0) {
1167 if (NumLocalColGIDs != static_cast<size_t>(numMyCols)) {
1168 Kokkos::parallel_for(
1169 Kokkos::RangePolicy<execution_space>(0, NumRemoteColGIDs), KOKKOS_LAMBDA(const int i) {
1170 ColIndices_view[NumLocalColGIDs + i] = RemoteGIDList_view[i];
1171 });
1172 }
1173
1174 // Find the largest PID for bin sorting purposes
1175 int PID_max = 0;
1176 Kokkos::parallel_reduce(
1177 Kokkos::RangePolicy<execution_space>(0, PIDList_view.size()), KOKKOS_LAMBDA(const int i, int& max) {
1178 if (max < PIDList_view[i]) max = PIDList_view[i];
1179 },
1180 Kokkos::Max<int>(PID_max));
1181
1182 using KeyViewTypePID = decltype(PIDList_view);
1183 using BinSortOpPID = Kokkos::BinOp1D<KeyViewTypePID>;
1184
1185 // Make a subview of ColIndices for remote GID sorting
1186 auto ColIndices_subview = Kokkos::subview(ColIndices_view, Kokkos::make_pair(NumLocalColGIDs, ColIndices_view.size()));
1187
1188 // Make binOp with bins = PID_max + 1, min = 0, max = PID_max
1189 BinSortOpPID binOp2(PID_max + 1, 0, PID_max);
1190
1191 // Sort External column indices so that all columns coming from a
1192 // given remote processor are contiguous. This is a sort with one
1193 // auxilary array: RemoteColIndices
1194 Kokkos::BinSort<KeyViewTypePID, BinSortOpPID> bin_sort2(PIDList_view, 0, PIDList_view.size(), binOp2, false);
1195 bin_sort2.create_permute_vector(exec);
1196 bin_sort2.sort(exec, PIDList_view);
1197 bin_sort2.sort(exec, ColIndices_subview);
1198
1199 // Deep copy back from device to host
1200 // Stash the RemotePIDs. Once remotePIDs is changed to become a Kokkos view, we can remove this and copy directly.
1201 Teuchos::Array<int> PIDList(NumRemoteColGIDs);
1202 Kokkos::View<int*, Kokkos::HostSpace> PIDList_host(PIDList.data(), PIDList.size());
1203 Kokkos::deep_copy(exec, PIDList_host, PIDList_view);
1204 exec.fence();
1205
1206 remotePIDs = PIDList;
1207
1208 // Sort external column indices so that columns from a given remote
1209 // processor are not only contiguous but also in ascending
1210 // order. NOTE: I don't know if the number of externals associated
1211 // with a given remote processor is known at this point ... so I
1212 // count them here.
1213 LO StartCurrent = 0, StartNext = 1;
1214 while (StartNext < NumRemoteColGIDs) {
1215 if (PIDList_host[StartNext] == PIDList_host[StartNext - 1]) {
1216 StartNext++;
1217 } else {
1218 Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1219 StartCurrent = StartNext;
1220 StartNext++;
1221 }
1222 }
1223
1224 Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1225 }
1226
1227 // Build permute array for *local* reindexing.
1228
1229 // Now fill front end. Two cases:
1230 //
1231 // (1) If the number of Local column GIDs is the same as the number
1232 // of Local domain GIDs, we can simply read the domain GIDs into
1233 // the front part of ColIndices, otherwise
1234 //
1235 // (2) We step through the GIDs of the domainMap, checking to see if
1236 // each domain GID is a column GID. we want to do this to
1237 // maintain a consistent ordering of GIDs between the columns
1238 // and the domain.
1239 if (static_cast<size_t>(NumLocalColGIDs) == numDomainElements) {
1240 if (NumLocalColGIDs > 0) {
1241 // Load Global Indices into first numMyCols elements column GID list
1242 Kokkos::parallel_for(
1243 Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i) {
1244 ColIndices_view[i] = domainMap_local.getGlobalElement(i);
1245 });
1246 }
1247 } else {
1248 // This part isn't actually tested in the unit tests
1249 LO NumLocalAgain = 0;
1250 Kokkos::parallel_scan(
1251 Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i, LO& update, const bool final) {
1252 if (final && LocalGIDs_view[i]) {
1253 ColIndices_view[update] = domainMap_local.getGlobalElement(i);
1254 }
1255 if (LocalGIDs_view[i]) {
1256 update++;
1257 }
1258 },
1259 NumLocalAgain);
1260
1261 TEUCHOS_TEST_FOR_EXCEPTION(
1262 static_cast<size_t>(NumLocalAgain) != NumLocalColGIDs,
1263 std::runtime_error, prefix << "Local ID count test failed.");
1264 }
1265
1266 // Make column Map
1267 const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid();
1268
1269 colMap = rcp(new map_type(minus_one, ColIndices_view, domainMap.getIndexBase(),
1270 domainMap.getComm()));
1271
1272 // Fill out colind_LID using local map
1273 auto localColMap = colMap->getLocalMap();
1274 Kokkos::parallel_for(
1275 Kokkos::RangePolicy<execution_space>(0, colind_GID_view.size()), KOKKOS_LAMBDA(const int i) {
1276 colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1277 });
1278}
1279
1280// Generates an list of owning PIDs based on two transfer (aka import/export objects)
1281// Let:
1282// OwningMap = useReverseModeForOwnership ? transferThatDefinesOwnership.getTargetMap() : transferThatDefinesOwnership.getSourceMap();
1283// MapAo = useReverseModeForOwnership ? transferThatDefinesOwnership.getSourceMap() : transferThatDefinesOwnership.getTargetMap();
1284// MapAm = useReverseModeForMigration ? transferThatDefinesMigration.getTargetMap() : transferThatDefinesMigration.getSourceMap();
1285// VectorMap = useReverseModeForMigration ? transferThatDefinesMigration.getSourceMap() : transferThatDefinesMigration.getTargetMap();
1286// Precondition:
1287// 1) MapAo.isSameAs(*MapAm) - map compatibility between transfers
1288// 2) VectorMap->isSameAs(*owningPIDs->getMap()) - map compabibility between transfer & vector
1289// 3) OwningMap->isOneToOne() - owning map is 1-to-1
1290// --- Precondition 3 is only checked in DEBUG mode ---
1291// Postcondition:
1292// owningPIDs[VectorMap->getLocalElement(GID i)] = j iff (OwningMap->isLocalElement(GID i) on rank j)
1293template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
1294void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
1296 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesMigration,
1301
1302 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> OwningMap = useReverseModeForOwnership ? transferThatDefinesOwnership.getTargetMap() : transferThatDefinesOwnership.getSourceMap();
1303 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> MapAo = useReverseModeForOwnership ? transferThatDefinesOwnership.getSourceMap() : transferThatDefinesOwnership.getTargetMap();
1304 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> MapAm = useReverseModeForMigration ? transferThatDefinesMigration.getTargetMap() : transferThatDefinesMigration.getSourceMap();
1305 Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> VectorMap = useReverseModeForMigration ? transferThatDefinesMigration.getSourceMap() : transferThatDefinesMigration.getTargetMap();
1306
1307 TEUCHOS_TEST_FOR_EXCEPTION(!MapAo->isSameAs(*MapAm), std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch between transfers");
1308 TEUCHOS_TEST_FOR_EXCEPTION(!VectorMap->isSameAs(*owningPIDs.getMap()), std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch transfer and vector");
1309#ifdef HAVE_TPETRA_DEBUG
1310 TEUCHOS_TEST_FOR_EXCEPTION(!OwningMap->isOneToOne(), std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
1311#endif
1312
1313 int rank = OwningMap->getComm()->getRank();
1314 // Generate "A" vector and fill it with owning information. We can read this from transferThatDefinesOwnership w/o communication
1315 // Note: Due to the 1-to-1 requirement, several of these options throw
1317 const import_type* ownAsImport = dynamic_cast<const import_type*>(&transferThatDefinesOwnership);
1318 const export_type* ownAsExport = dynamic_cast<const export_type*>(&transferThatDefinesOwnership);
1319
1320 Teuchos::ArrayRCP<int> pids = temp.getDataNonConst();
1321 Teuchos::ArrayView<int> v_pids = pids();
1323 TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
1325 getPids(*ownAsImport, v_pids, false);
1327 TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector this option not yet implemented");
1328 } else {
1329 TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
1330 }
1331
1332 const import_type* xferAsImport = dynamic_cast<const import_type*>(&transferThatDefinesMigration);
1333 const export_type* xferAsExport = dynamic_cast<const export_type*>(&transferThatDefinesMigration);
1334 TEUCHOS_TEST_FOR_EXCEPTION(!xferAsImport && !xferAsExport, std::runtime_error, "Tpetra::Import_Util::getTwoTransferOwnershipVector transfer undefined");
1335
1336 // Migrate from "A" vector to output vector
1337 owningPIDs.putScalar(rank);
1344 else
1346}
1347
1348} // namespace Import_Util
1349} // namespace Tpetra
1350
1351#endif // TPETRA_IMPORT_UTIL_HPP
Declaration of the Tpetra::CrsMatrix class.
Add specializations of Teuchos::Details::MpiTypeTraits for Kokkos::complex<float> and Kokkos::complex...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
void lowCommunicationMakeColMapAndReindex(const Teuchos::ArrayView< const size_t > &rowptr, const Teuchos::ArrayView< LocalOrdinal > &colind_LID, const Teuchos::ArrayView< GlobalOrdinal > &colind_GID, const Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMapRCP, const Teuchos::ArrayView< const int > &owningPIDs, Teuchos::Array< int > &remotePIDs, Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &colMap)
lowCommunicationMakeColMapAndReindex
void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer< LocalOrdinal, GlobalOrdinal, Node > &transferThatDefinesOwnership, bool useReverseModeForOwnership, const ::Tpetra::Details::Transfer< LocalOrdinal, GlobalOrdinal, Node > &transferForMigratingData, bool useReverseModeForMigration, Tpetra::Vector< int, LocalOrdinal, GlobalOrdinal, Node > &owningPIDs)
Generates an list of owning PIDs based on two transfer (aka import/export objects) Let: OwningMap = u...
void sortAndMergeCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort and merge the entries of the (raw CSR) matrix by column index within each row.
void sortCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort the entries of the (raw CSR) matrix by column index within each row.
void getPids(const Tpetra::Import< LocalOrdinal, GlobalOrdinal, Node > &Importer, Teuchos::Array< int > &pids, bool use_minus_one_for_local)
Like getPidGidPairs, but just gets the PIDs, ordered by the column Map.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
Common base class of Import and Export.
Teuchos::ArrayView< const LO > getExportLIDs() const
List of entries in the source Map that will be sent to other processes.
size_t getNumExportIDs() const
Number of entries that must be sent by the calling process to other processes.
Teuchos::ArrayView< const int > getExportPIDs() const
List of processes to which entries will be sent.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
size_t global_size_t
Global size_t object.
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3, const bool stableSort=false)
Sort the first array, and apply the same permutation to the second and third arrays.
@ REPLACE
Replace existing values with new values.