Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_makeColMap_def.hpp
Go to the documentation of this file.
1#ifndef TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
2#define TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
3
4// @HEADER
5// *****************************************************************************
6// Tpetra: Templated Linear Algebra Services Package
7//
8// Copyright 2008 NTESS and the Tpetra contributors.
9// SPDX-License-Identifier: BSD-3-Clause
10// *****************************************************************************
11// @HEADER
12
23
24#include "Tpetra_RowGraph.hpp"
25#include "Tpetra_Util.hpp"
26#include "Teuchos_Array.hpp"
27#include "Kokkos_Bitset.hpp"
28#include <set>
29#include <vector>
30
31namespace Tpetra {
32namespace Details {
33
34template <class LO, class GO, class NT>
35int makeColMapImpl(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
36 Teuchos::Array<int>& remotePIDs,
37 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
38 size_t numLocalColGIDs,
39 size_t numRemoteColGIDs,
40 std::set<GO>& RemoteGIDSet,
41 std::vector<GO>& RemoteGIDUnorderedVector,
42 std::vector<bool>& GIDisLocal,
43 const bool sortEachProcsGids,
44 std::ostream* errStrm) {
45 using std::endl;
46 using Teuchos::Array;
47 using Teuchos::ArrayView;
48 using Teuchos::rcp;
49 int errCode = 0;
50 const char prefix[] = "Tpetra::Details::makeColMapImpl: ";
51 typedef ::Tpetra::Map<LO, GO, NT> map_type;
52 // Possible short-circuit for serial scenario:
53 //
54 // If all domain GIDs are present as column indices, then set
55 // ColMap=DomainMap. By construction, LocalGIDs is a subset of
56 // DomainGIDs.
57 //
58 // If we have
59 // * Number of remote GIDs is 0, so that ColGIDs == LocalGIDs,
60 // and
61 // * Number of local GIDs is number of domain GIDs
62 // then
63 // * LocalGIDs \subset DomainGIDs && size(LocalGIDs) ==
64 // size(DomainGIDs) => DomainGIDs == LocalGIDs == ColGIDs
65 // on the calling process.
66 //
67 // We will concern ourselves only with the special case of a
68 // serial DomainMap, obviating the need for communication.
69 //
70 // If
71 // * DomainMap has a serial communicator
72 // then we can set the column Map as the domain Map
73 // return. Benefit: this graph won't need an Import object
74 // later.
75 //
76 // Note, for a serial domain map, there can be no RemoteGIDs,
77 // because there are no remote processes. Likely explanations
78 // for this are:
79 // * user submitted erroneous column indices
80 // * user submitted erroneous domain Map
81 if (domMap->getComm()->getSize() == 1) {
82 if (numRemoteColGIDs != 0) {
83 errCode = -2;
84 if (errStrm != NULL) {
85 *errStrm << prefix << "The domain Map only has one process, but "
86 << numRemoteColGIDs << " column "
87 << (numRemoteColGIDs != 1 ? "indices are" : "index is")
88 << " not in the domain Map. Either these indices are "
89 "invalid or the domain Map is invalid. Remember that nonsquare "
90 "matrices, or matrices where the row and range Maps differ, "
91 "require calling the version of fillComplete that takes the "
92 "domain and range Maps as input."
93 << endl;
94 }
95 }
96 if (numLocalColGIDs == domMap->getLocalNumElements()) {
97 colMap = domMap; // shallow copy
98 return errCode;
99 }
100 }
101
102 // Populate myColumns with a list of all column GIDs. Put
103 // locally owned (in the domain Map) GIDs at the front: they
104 // correspond to "same" and "permuted" entries between the
105 // column Map and the domain Map. Put remote GIDs at the back.
106 Array<GO> myColumns(numLocalColGIDs + numRemoteColGIDs);
107 // get pointers into myColumns for each part
108 ArrayView<GO> LocalColGIDs = myColumns(0, numLocalColGIDs);
109 ArrayView<GO> remoteColGIDs = myColumns(numLocalColGIDs, numRemoteColGIDs);
110
111 // Copy the remote GIDs into myColumns
112 if (sortEachProcsGids) {
113 // The std::set puts GIDs in increasing order.
114 std::copy(RemoteGIDSet.begin(), RemoteGIDSet.end(),
115 remoteColGIDs.begin());
116 } else {
117 // Respect the originally encountered order.
118 std::copy(RemoteGIDUnorderedVector.begin(),
119 RemoteGIDUnorderedVector.end(), remoteColGIDs.begin());
120 }
121
122 // Make a list of process ranks corresponding to the remote GIDs.
123 // remotePIDs is an output argument of getRemoteIndexList below;
124 // its initial contents don't matter.
125 if (static_cast<size_t>(remotePIDs.size()) != numRemoteColGIDs) {
126 remotePIDs.resize(numRemoteColGIDs);
127 }
128 // Look up the remote process' ranks in the domain Map.
129 {
130 const LookupStatus stat =
131 domMap->getRemoteIndexList(remoteColGIDs, remotePIDs());
132
133 // If any process returns IDNotPresent, then at least one of
134 // the remote indices was not present in the domain Map. This
135 // means that the Import object cannot be constructed, because
136 // of incongruity between the column Map and domain Map.
137 // This has two likely causes:
138 // - The user has made a mistake in the column indices
139 // - The user has made a mistake with respect to the domain Map
140 if (stat == IDNotPresent) {
141 if (errStrm != NULL) {
142 *errStrm << prefix << "Some column indices are not in the domain Map."
143 "Either these column indices are invalid or the domain Map is "
144 "invalid. Likely cause: For a nonsquare matrix, you must give the "
145 "domain and range Maps as input to fillComplete."
146 << endl;
147 }
148 // Don't return yet, because not all processes may have
149 // encountered this error state. This function ends with an
150 // all-reduce, so we have to make sure that everybody gets to
151 // that point. The resulting Map may be wrong, but at least
152 // nothing should crash.
153 errCode = -3;
154 }
155 }
156
157 // Sort incoming remote column indices by their owning process
158 // rank, so that all columns coming from a given remote process
159 // are contiguous. This means the Import's Distributor doesn't
160 // need to reorder data.
161 //
162 // NOTE (mfh 02 Sep 2014) This needs to be a stable sort, so that
163 // it respects either of the possible orderings of GIDs (sorted,
164 // or original order) specified above.
165 sort2(remotePIDs.begin(), remotePIDs.end(), remoteColGIDs.begin(), true);
166
167 // Copy the local GIDs into myColumns. Two cases:
168 // 1. If the number of Local column GIDs is the same as the number
169 // of Local domain GIDs, we can simply read the domain GIDs
170 // into the front part of ColIndices (see logic above from the
171 // serial short circuit case)
172 // 2. We step through the GIDs of the DomainMap, checking to see
173 // if each domain GID is a column GID. We want to do this to
174 // maintain a consistent ordering of GIDs between the columns
175 // and the domain.
176
177 const size_t numDomainElts = domMap->getLocalNumElements();
178 if (numLocalColGIDs == numDomainElts) {
179 // If the number of locally owned GIDs are the same as the
180 // number of local domain Map elements, then the local domain
181 // Map elements are the same as the locally owned GIDs.
182 if (domMap->isContiguous()) {
183 // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case that
184 // the domain Map is contiguous, it's more efficient to avoid
185 // calling getLocalElementList(), since that permanently
186 // constructs and caches the GID list in the contiguous Map.
187 GO curColMapGid = domMap->getMinGlobalIndex();
188 for (size_t k = 0; k < numLocalColGIDs; ++k, ++curColMapGid) {
189 LocalColGIDs[k] = curColMapGid;
190 }
191 } else {
192 ArrayView<const GO> domainElts = domMap->getLocalElementList();
193 std::copy(domainElts.begin(), domainElts.end(), LocalColGIDs.begin());
194 }
195 } else {
196 // Count the number of locally owned GIDs, both to keep track of
197 // the current array index, and as a sanity check.
198 size_t numLocalCount = 0;
199 if (domMap->isContiguous()) {
200 // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case that
201 // the domain Map is contiguous, it's more efficient to avoid
202 // calling getLocalElementList(), since that permanently
203 // constructs and caches the GID list in the contiguous Map.
204 GO curColMapGid = domMap->getMinGlobalIndex();
205 for (size_t i = 0; i < numDomainElts; ++i, ++curColMapGid) {
206 if (GIDisLocal[i]) {
207 LocalColGIDs[numLocalCount++] = curColMapGid;
208 }
209 }
210 } else {
211 ArrayView<const GO> domainElts = domMap->getLocalElementList();
212 for (size_t i = 0; i < numDomainElts; ++i) {
213 if (GIDisLocal[i]) {
214 LocalColGIDs[numLocalCount++] = domainElts[i];
215 }
216 }
217 }
218 if (numLocalCount != numLocalColGIDs) {
219 if (errStrm != NULL) {
220 *errStrm << prefix << "numLocalCount = " << numLocalCount
221 << " != numLocalColGIDs = " << numLocalColGIDs
222 << ". This should never happen. "
223 "Please report this bug to the Tpetra developers."
224 << endl;
225 }
226 // Don't return yet, because not all processes may have
227 // encountered this error state. This function ends with an
228 // all-reduce, so we have to make sure that everybody gets to
229 // that point.
230 errCode = -4;
231 }
232 }
233
234 // FIXME (mfh 03 Apr 2013) Now would be a good time to use the
235 // information we collected above to construct the Import. In
236 // particular, building an Import requires:
237 //
238 // 1. numSameIDs (length of initial contiguous sequence of GIDs
239 // on this process that are the same in both Maps; this
240 // equals the number of domain Map elements on this process)
241 //
242 // 2. permuteToLIDs and permuteFromLIDs (both empty in this
243 // case, since there's no permutation going on; the column
244 // Map starts with the domain Map's GIDs, and immediately
245 // after them come the remote GIDs)
246 //
247 // 3. remoteGIDs (exactly those GIDs that we found out above
248 // were not in the domain Map) and remoteLIDs (which we could
249 // have gotten above by using the three-argument version of
250 // getRemoteIndexList() that computes local indices as well
251 // as process ranks, instead of the two-argument version that
252 // was used above)
253 //
254 // 4. remotePIDs (which we have from the getRemoteIndexList() call
255 // above) -- NOTE (mfh 14 Sep 2017) Fix for Trilinos GitHub
256 // Issue #628 (https://github.com/trilinos/Trilinos/issues/628)
257 // addresses this. remotePIDs is now an output argument of
258 // both this function and CrsGraph::makeColMap, and
259 // CrsGraph::makeImportExport now has an option to use that
260 // information, if available (that is, if makeColMap was
261 // actually called, which it would not be if the graph already
262 // had a column Map).
263 //
264 // 5. Apply the permutation from sorting remotePIDs to both
265 // remoteGIDs and remoteLIDs (by calling sort3 above instead of
266 // sort2), instead of just to remoteLIDs alone.
267 //
268 // 6. Everything after the sort3 call in Import::setupExport():
269 // a. Create the Distributor via createFromRecvs(), which
270 // computes exportGIDs and exportPIDs
271 // b. Compute exportLIDs from exportGIDs (by asking the
272 // source Map, in this case the domain Map, to convert
273 // global to local)
274 //
275 // Steps 1-5 come for free, since we must do that work anyway in
276 // order to compute the column Map. In particular, Step 3 is
277 // even more expensive than Step 6a, since it involves both
278 // creating and using a new Distributor object.
279 const global_size_t INV =
280 Tpetra::Details::OrdinalTraits<global_size_t>::invalid();
281 // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
282 // be the same as the Map's min GID? If the first column is empty
283 // (contains no entries), then the column Map's min GID won't
284 // necessarily be the same as the domain Map's index base.
285 const GO indexBase = domMap->getIndexBase();
286 colMap = rcp(new map_type(INV, myColumns, indexBase, domMap->getComm()));
287 return errCode;
288}
289
290template <class LO, class GO, class NT>
291int makeColMap(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
292 Teuchos::Array<int>& remotePIDs,
293 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
295 const bool sortEachProcsGids,
296 std::ostream* errStrm) {
297 using std::endl;
298 using Teuchos::Array;
299 using Teuchos::ArrayView;
300 using Teuchos::rcp;
301 typedef ::Tpetra::Map<LO, GO, NT> map_type;
302 const char prefix[] = "Tpetra::Details::makeColMap: ";
303 int errCode = 0;
304
305 // If the input domain Map or its communicator is null on the
306 // calling process, then the calling process does not participate in
307 // the returned column Map. Thus, we can set the returned column
308 // Map to null on those processes, and return immediately. This is
309 // not an error condition, as long as when domMap and its
310 // communicator are NOT null, the graph's other Maps and
311 // communicator are not also null.
312 if (domMap.is_null() || domMap->getComm().is_null()) {
313 colMap = Teuchos::null;
314 return errCode;
315 }
316
318 if (graph.isLocallyIndexed()) {
319 colMap = graph.getColMap();
320 // If the graph is locally indexed, it had better have a column Map.
321 // The extra check for ! graph.hasColMap() is conservative.
322 if (colMap.is_null() || !graph.hasColMap()) {
323 errCode = -1;
324 if (errStrm != NULL) {
325 *errStrm << prefix << "The graph is locally indexed on the calling "
326 "process, but has no column Map (either getColMap() returns null, "
327 "or hasColMap() returns false)."
328 << endl;
329 }
330 // Under this error condition, this process will not fill
331 // myColumns. The resulting new column Map will be incorrect,
332 // but at least we won't hang, and this process will report the
333 // error.
334 } else {
335 // The graph already has a column Map, and is locally indexed on
336 // the calling process. However, it may be globally indexed (or
337 // neither locally nor globally indexed) on other processes.
338 // Assume that we want to recreate the column Map.
339 if (colMap->isContiguous()) {
340 // The number of indices on each process must fit in LO.
341 const LO numCurGids = static_cast<LO>(colMap->getLocalNumElements());
342 myColumns.resize(numCurGids);
343 const GO myFirstGblInd = colMap->getMinGlobalIndex();
344 for (LO k = 0; k < numCurGids; ++k) {
345 myColumns[k] = myFirstGblInd + static_cast<GO>(k);
346 }
347 } else { // the column Map is NOT contiguous
348 ArrayView<const GO> curGids = graph.getColMap()->getLocalElementList();
349 // The number of indices on each process must fit in LO.
350 const LO numCurGids = static_cast<LO>(curGids.size());
351 myColumns.resize(numCurGids);
352 for (LO k = 0; k < numCurGids; ++k) {
353 myColumns[k] = curGids[k];
354 }
355 } // whether the graph's current column Map is contiguous
356 } // does the graph currently have a column Map?
357 } else if (graph.isGloballyIndexed()) {
358 // Go through all the rows, finding the populated column indices.
359 //
360 // Our final list of indices for the column Map constructor will
361 // have the following properties (all of which are with respect to
362 // the calling process):
363 //
364 // 1. Indices in the domain Map go first.
365 // 2. Indices not in the domain Map follow, ordered first
366 // contiguously by their owning process rank (in the domain
367 // Map), then in increasing order within that.
368 // 3. No duplicate indices.
369 //
370 // This imitates the ordering used by Aztec(OO) and Epetra.
371 // Storing indices owned by the same process (in the domain Map)
372 // contiguously permits the use of contiguous send and receive
373 // buffers.
374 //
375 // We begin by partitioning the column indices into "local" GIDs
376 // (owned by the domain Map) and "remote" GIDs (not owned by the
377 // domain Map). We use the same order for local GIDs as the
378 // domain Map, so we track them in place in their array. We use
379 // an std::set (RemoteGIDSet) to keep track of remote GIDs, so
380 // that we don't have to merge duplicates later.
381 const LO LINV = Tpetra::Details::OrdinalTraits<LO>::invalid();
382 size_t numLocalColGIDs = 0;
383 size_t numRemoteColGIDs = 0;
384
385 // GIDisLocal[lid] is false if and only if local index lid in the
386 // domain Map is remote (not local).
387 std::vector<bool> GIDisLocal(domMap->getLocalNumElements(), false);
388 std::set<GO> RemoteGIDSet;
389 // This preserves the not-sorted Epetra order of GIDs.
390 // We only use this if sortEachProcsGids is false.
391 std::vector<GO> RemoteGIDUnorderedVector;
392
393 if (!graph.getRowMap().is_null()) {
394 const Tpetra::Map<LO, GO, NT>& rowMap = *(graph.getRowMap());
395 const LO lclNumRows = rowMap.getLocalNumElements();
396 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
397 const GO gblRow = rowMap.getGlobalElement(lclRow);
398 typename RowGraph<LO, GO, NT>::global_inds_host_view_type rowGids;
399 graph.getGlobalRowView(gblRow, rowGids);
400
401 const LO numEnt = static_cast<LO>(rowGids.size());
402 if (numEnt != 0) {
403 for (LO k = 0; k < numEnt; ++k) {
404 const GO gid = rowGids[k];
405 const LO lid = domMap->getLocalElement(gid);
406 if (lid != LINV) {
407 const bool alreadyFound = GIDisLocal[lid];
408 if (!alreadyFound) {
409 GIDisLocal[lid] = true;
411 }
412 } else {
413 const bool notAlreadyFound = RemoteGIDSet.insert(gid).second;
414 if (notAlreadyFound) { // gid did not exist in the set before
415 if (!sortEachProcsGids) {
416 // The user doesn't want to sort remote GIDs (for each
417 // remote process); they want us to keep remote GIDs
418 // in their original order. We do this by stuffing
419 // each remote GID into an array as we encounter it
420 // for the first time. The std::set helpfully tracks
421 // first encounters.
422 RemoteGIDUnorderedVector.push_back(gid);
423 }
425 }
426 }
427 } // for each entry k in row r
428 } // if row r contains > 0 entries
429 } // for each locally owned row r
430 } // if the graph has a nonnull row Map
431
433 colMap, remotePIDs,
434 domMap,
438
439 } // if the graph is globally indexed
440 else {
441 // If we reach this point, the graph is neither locally nor
442 // globally indexed. Thus, the graph is empty on this process
443 // (per the usual legacy Petra convention), so myColumns will be
444 // left empty.
445 ; // do nothing
446 }
447
448 const global_size_t INV =
449 Tpetra::Details::OrdinalTraits<global_size_t>::invalid();
450 // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
451 // be the same as the Map's min GID? If the first column is empty
452 // (contains no entries), then the column Map's min GID won't
453 // necessarily be the same as the domain Map's index base.
454 const GO indexBase = domMap->getIndexBase();
455 colMap = rcp(new map_type(INV, myColumns, indexBase, domMap->getComm()));
456 return errCode;
457}
458
459template <typename GOView, typename bitset_t>
460struct GatherPresentEntries {
461 using GO = typename GOView::non_const_value_type;
462
463 GatherPresentEntries(GO minGID_, const GOView& gids_, const bitset_t& present_)
464 : minGID(minGID_)
465 , gids(gids_)
466 , present(present_) {}
467
468 KOKKOS_INLINE_FUNCTION void operator()(const GO i) const {
469 present.set(gids(i) - minGID);
470 }
471
472 GO minGID;
473 GOView gids;
474 bitset_t present;
475};
476
477template <typename LO, typename GO, typename device_t, typename LocalMapType, typename const_bitset_t, bool doingRemotes>
478struct ListGIDs {
479 using mem_space = typename device_t::memory_space;
480 using GOView = Kokkos::View<GO*, mem_space>;
481 using SingleView = Kokkos::View<GO, mem_space>;
482
483 ListGIDs(GO minGID_, GOView& gidList_, SingleView& numElems_, const_bitset_t& present_, const LocalMapType& localDomainMap_)
484 : minGID(minGID_)
485 , gidList(gidList_)
486 , numElems(numElems_)
487 , present(present_)
488 , localDomainMap(localDomainMap_) {}
489
490 KOKKOS_INLINE_FUNCTION void operator()(const GO i, GO& lcount, const bool finalPass) const {
491 bool isRemote = localDomainMap.getLocalElement(i + minGID) == ::Tpetra::Details::OrdinalTraits<LO>::invalid();
492 if (present.test(i) && doingRemotes == isRemote) {
493 if (finalPass) {
494 // lcount is the index where this GID should be inserted in gidList.
495 gidList(lcount) = minGID + i;
496 }
497 lcount++;
498 }
499 if ((i == static_cast<GO>(present.size() - 1)) && finalPass) {
500 // Set the number of inserted indices in a single-element view
501 numElems() = lcount;
502 }
503 }
504
505 GO minGID;
506 GOView gidList;
507 SingleView numElems;
508 const_bitset_t present;
509 const LocalMapType localDomainMap;
510};
511
512template <typename GO, typename mem_space>
513struct MinMaxReduceFunctor {
514 using MinMaxValue = typename Kokkos::MinMax<GO>::value_type;
515 using GOView = Kokkos::View<GO*, mem_space>;
516
517 MinMaxReduceFunctor(const GOView& gids_)
518 : gids(gids_) {}
519
520 KOKKOS_INLINE_FUNCTION void operator()(const GO i, MinMaxValue& lminmax) const {
521 GO gid = gids(i);
522 if (gid < lminmax.min_val)
523 lminmax.min_val = gid;
524 if (gid > lminmax.max_val)
525 lminmax.max_val = gid;
526 };
527
528 const GOView gids;
529};
530
531template <class LO, class GO, class NT>
532int makeColMap(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
533 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
534 Kokkos::View<GO*, typename NT::memory_space> gids,
535 std::ostream* errStrm) {
536 using Kokkos::RangePolicy;
537 using Teuchos::Array;
538 using Teuchos::RCP;
539 using device_t = typename NT::device_type;
540 using exec_space = typename device_t::execution_space;
541 using memory_space = typename device_t::memory_space;
542 // Note BMK 5-2021: this is deliberately not just device_t.
543 // Bitset cannot use HIPHostPinnedSpace currently, so this needs to
544 // use the default memory space for HIP (HIPSpace). Using the default mem
545 // space is fine for all other backends too. This bitset type is only used
546 // in this function so it won't cause type mismatches.
547 using bitset_t = Kokkos::Bitset<typename exec_space::memory_space>;
548 using const_bitset_t = Kokkos::ConstBitset<typename exec_space::memory_space>;
549 using GOView = Kokkos::View<GO*, memory_space>;
550 using SingleView = Kokkos::View<GO, memory_space>;
551 using map_type = Tpetra::Map<LO, GO, NT>;
552 using LocalMap = typename map_type::local_map_type;
553 GO nentries = gids.extent(0);
554 GO minGID = Teuchos::OrdinalTraits<GO>::max();
555 GO maxGID = 0;
556 using MinMaxValue = typename Kokkos::MinMax<GO>::value_type;
557 MinMaxValue minMaxGID = {minGID, maxGID};
558 Kokkos::parallel_reduce(RangePolicy<exec_space>(0, nentries),
560 Kokkos::MinMax<GO>(minMaxGID));
561 minGID = minMaxGID.min_val;
562 maxGID = minMaxGID.max_val;
563 // Now, know the full range of input GIDs.
564 // Determine the set of GIDs in the column map using a dense bitset, which corresponds to the range [minGID, maxGID]
565 bitset_t presentGIDs(maxGID - minGID + 1);
568 // Get the set of local and remote GIDs on device
569 SingleView numLocals("Num local GIDs");
570 SingleView numRemotes("Num remote GIDs");
571 GOView localGIDView(Kokkos::ViewAllocateWithoutInitializing("Local GIDs"), constPresentGIDs.count());
572 GOView remoteGIDView(Kokkos::ViewAllocateWithoutInitializing("Remote GIDs"), constPresentGIDs.count());
573 LocalMap localDomMap = domMap->getLocalMap();
574 // This lists the locally owned GIDs in localGIDView
575 Kokkos::parallel_scan(RangePolicy<exec_space>(0, constPresentGIDs.size()),
577 // And this lists the remote GIDs in remoteGIDView
578 Kokkos::parallel_scan(RangePolicy<exec_space>(0, constPresentGIDs.size()),
580 // Pull down the sizes
581 GO numLocalColGIDs = 0;
582 GO numRemoteColGIDs = 0;
583 // DEEP_COPY REVIEW - DEVICE-TO-VALUE
584 Kokkos::deep_copy(exec_space(), numLocalColGIDs, numLocals);
585 // DEEP_COPY REVIEW - DEVICE-TO-numLocalColGIDs
586 Kokkos::deep_copy(exec_space(), numRemoteColGIDs, numRemotes);
587 // Pull down the remote lists
588 auto localsHost = Kokkos::create_mirror_view(localGIDView);
589 auto remotesHost = Kokkos::create_mirror_view(remoteGIDView);
590 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
591 Kokkos::deep_copy(exec_space(), localsHost, localGIDView);
592 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
593 Kokkos::deep_copy(exec_space(), remotesHost, remoteGIDView);
594 // CAG: This fence was found to be required on Cuda with UVM=on.
595 Kokkos::fence("Tpetra::makeColMap");
596 // Finally, populate the STL structures which hold the index lists
597 std::set<GO> RemoteGIDSet;
598 std::vector<GO> RemoteGIDUnorderedVector;
599 std::vector<bool> GIDisLocal(domMap->getLocalNumElements(), false);
600 for (GO i = 0; i < numLocalColGIDs; i++) {
601 GO gid = localsHost(i);
602 // Already know that gid is locally owned, so this index will never be invalid().
603 // makeColMapImpl uses this and the domain map to get the the local GID list.
604 GIDisLocal[domMap->getLocalElement(gid)] = true;
605 }
607 for (GO i = 0; i < numRemoteColGIDs; i++) {
608 GO gid = remotesHost(i);
609 RemoteGIDSet.insert(gid);
610 RemoteGIDUnorderedVector.push_back(gid);
611 }
612 // remotePIDs will be discarded in this version of makeColMap
613 Array<int> remotePIDs;
614 // Find the min and max GID
616 colMap,
617 remotePIDs,
618 domMap,
619 static_cast<size_t>(numLocalColGIDs),
620 static_cast<size_t>(numRemoteColGIDs),
624 true, // always sort remotes
625 errStrm);
626}
627
628} // namespace Details
629} // namespace Tpetra
630
631//
632// Explicit instantiation macros
633//
634// Must be expanded from within the Tpetra namespace!
635//
636#define TPETRA_DETAILS_MAKECOLMAP_INSTANT(LO, GO, NT) \
637 namespace Details { \
638 template int \
639 makeColMap(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>&, \
640 Teuchos::Array<int>&, \
641 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>&, \
642 const RowGraph<LO, GO, NT>&, \
643 const bool, \
644 std::ostream*); \
645 template int \
646 makeColMap(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>&, \
647 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>&, \
648 Kokkos::View<GO*, typename NT::memory_space>, \
649 std::ostream*); \
650 }
651
652#endif // TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
"Local" part of Map suitable for Kokkos kernels.
Implementation details of Tpetra.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph's column Map.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
size_t global_size_t
Global size_t object.