Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_makeColMap_def.hpp
Go to the documentation of this file.
1#ifndef TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
2#define TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
3
4// @HEADER
5// *****************************************************************************
6// Tpetra: Templated Linear Algebra Services Package
7//
8// Copyright 2008 NTESS and the Tpetra contributors.
9// SPDX-License-Identifier: BSD-3-Clause
10// *****************************************************************************
11// @HEADER
12
23
24#include "KokkosCompat_View.hpp"
25#include "Kokkos_Atomic.hpp"
26#include "Kokkos_Core_fwd.hpp"
27#include "Kokkos_Pair.hpp"
28#include "Teuchos_Assert.hpp"
29#include "Tpetra_CrsGraph.hpp"
30#include "Tpetra_Util.hpp"
31#include "Teuchos_Array.hpp"
32#include "Kokkos_Bitset.hpp"
33#include "Kokkos_UnorderedMap.hpp"
34#include "Kokkos_Sort.hpp"
35#include <vector>
36
37namespace Tpetra::Details {
38
39template <class keys_view_type>
40struct StableSortComparator {
41 keys_view_type keys;
42
43 StableSortComparator(keys_view_type keys_)
44 : keys(keys_) {}
45
46 KOKKOS_INLINE_FUNCTION
47 bool operator()(size_t i, size_t j) const {
48 if (keys(i) == keys(j)) {
49 return i < j;
50 } else {
51 return keys(i) < keys(j);
52 }
53 }
54};
55
56template <class ExecutionSpace,
57 class keys_view_type_in,
58 class keys_view_type_out,
59 class values_view_type_in,
60 class values_view_type_out>
61void sort_by_key(const ExecutionSpace& exec,
62 const keys_view_type_in& keys_in,
63 const values_view_type_in& values_in,
64 const keys_view_type_out& keys_out,
65 const values_view_type_out& values_out) {
66 auto numItems = keys_in.extent(0);
67 Kokkos::View<size_t*, typename keys_view_type_out::memory_space> indices(Kokkos::ViewAllocateWithoutInitializing(""), numItems);
68 Kokkos::parallel_for(
69 Kokkos::RangePolicy<ExecutionSpace>(0, numItems), KOKKOS_LAMBDA(const size_t k) { indices(k) = k; });
70 StableSortComparator comparison(keys_in);
71 Kokkos::sort(indices, comparison);
72
73 Kokkos::parallel_for(
74 Kokkos::RangePolicy<ExecutionSpace>(0, numItems), KOKKOS_LAMBDA(const size_t k) {
75 auto idx = indices(k);
76 values_out(k) = values_in(idx);
77 keys_out(k) = keys_in(idx);
78 });
79}
80
81template <class LO, class GO, class NT>
82int makeColMapImpl(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
83 Teuchos::Array<int>& remotePIDs,
84 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
85 size_t numLocalColGIDs,
86 size_t numRemoteColGIDs,
87 const Kokkos::View<GO*, typename NT::memory_space> RemoteGIDs,
88 const Kokkos::View<bool*, typename NT::memory_space>& GIDisLocal,
89 std::ostream* errStrm) {
90 using std::endl;
91 using Teuchos::rcp;
92 using execution_space = typename NT::execution_space;
93 using memory_space = typename NT::memory_space;
94
95 int errCode = 0;
96 const char prefix[] = "Tpetra::Details::makeColMapImpl: ";
97 using map_type = ::Tpetra::Map<LO, GO, NT>;
98 // Possible short-circuit for serial scenario:
99 //
100 // If all domain GIDs are present as column indices, then set
101 // ColMap=DomainMap. By construction, LocalGIDs is a subset of
102 // DomainGIDs.
103 //
104 // If we have
105 // * Number of remote GIDs is 0, so that ColGIDs == LocalGIDs,
106 // and
107 // * Number of local GIDs is number of domain GIDs
108 // then
109 // * LocalGIDs \subset DomainGIDs && size(LocalGIDs) ==
110 // size(DomainGIDs) => DomainGIDs == LocalGIDs == ColGIDs
111 // on the calling process.
112 //
113 // We will concern ourselves only with the special case of a
114 // serial DomainMap, obviating the need for communication.
115 //
116 // If
117 // * DomainMap has a serial communicator
118 // then we can set the column Map as the domain Map
119 // return. Benefit: this graph won't need an Import object
120 // later.
121 //
122 // Note, for a serial domain map, there can be no RemoteGIDs,
123 // because there are no remote processes. Likely explanations
124 // for this are:
125 // * user submitted erroneous column indices
126 // * user submitted erroneous domain Map
127 if (domMap->getComm()->getSize() == 1) {
128 if (numRemoteColGIDs != 0) {
129 errCode = -2;
130 if (errStrm != NULL) {
131 *errStrm << prefix << "The domain Map only has one process, but "
132 << numRemoteColGIDs << " column "
133 << (numRemoteColGIDs != 1 ? "indices are" : "index is")
134 << " not in the domain Map. Either these indices are "
135 "invalid or the domain Map is invalid. Remember that nonsquare "
136 "matrices, or matrices where the row and range Maps differ, "
137 "require calling the version of fillComplete that takes the "
138 "domain and range Maps as input."
139 << endl;
140 }
141 }
142 if (numLocalColGIDs == domMap->getLocalNumElements()) {
143 colMap = domMap; // shallow copy
144 return errCode;
145 }
146 }
147
148 // Populate myColumns with a list of all column GIDs. Put
149 // locally owned (in the domain Map) GIDs at the front: they
150 // correspond to "same" and "permuted" entries between the
151 // column Map and the domain Map. Put remote GIDs at the back.
152 Kokkos::View<GO*, memory_space> myColumns(Kokkos::ViewAllocateWithoutInitializing("myColumns"), numLocalColGIDs + numRemoteColGIDs);
153 // get pointers into myColumns for each part
154 auto LocalColGIDs = Kokkos::subview(myColumns, Kokkos::make_pair((size_t)0, numLocalColGIDs));
155 auto remoteColGIDs = Kokkos::subview(myColumns, Kokkos::make_pair(numLocalColGIDs, numLocalColGIDs + numRemoteColGIDs));
156
157 // Copy the remote GIDs into myColumns
158 Kokkos::deep_copy(remoteColGIDs, RemoteGIDs);
159
160 // Make a list of process ranks corresponding to the remote GIDs.
161 // remotePIDs is an output argument of getRemoteIndexList below;
162 // its initial contents don't matter.
163 if (static_cast<size_t>(remotePIDs.size()) != numRemoteColGIDs) {
164 remotePIDs.resize(numRemoteColGIDs);
165 }
166 // Look up the remote process' ranks in the domain Map.
167 {
168 // TODO: move Directory to device
169 auto remoteColGIDs_h = Kokkos::create_mirror_view(remoteColGIDs);
170 Kokkos::deep_copy(remoteColGIDs_h, remoteColGIDs);
171 auto remoteColGIDs_av = Kokkos::Compat::getArrayView(remoteColGIDs_h);
172 const LookupStatus stat =
173 domMap->getRemoteIndexList(remoteColGIDs_av, remotePIDs());
174
175 // If any process returns IDNotPresent, then at least one of
176 // the remote indices was not present in the domain Map. This
177 // means that the Import object cannot be constructed, because
178 // of incongruity between the column Map and domain Map.
179 // This has two likely causes:
180 // - The user has made a mistake in the column indices
181 // - The user has made a mistake with respect to the domain Map
182 if (stat == IDNotPresent) {
183 if (errStrm != NULL) {
184 *errStrm << prefix << "Some column indices are not in the domain Map."
185 "Either these column indices are invalid or the domain Map is "
186 "invalid. Likely cause: For a nonsquare matrix, you must give the "
187 "domain and range Maps as input to fillComplete."
188 << endl;
189 }
190 // Don't return yet, because not all processes may have
191 // encountered this error state. This function ends with an
192 // all-reduce, so we have to make sure that everybody gets to
193 // that point. The resulting Map may be wrong, but at least
194 // nothing should crash.
195 errCode = -3;
196 }
197 }
198
199 // Sort incoming remote column indices by their owning process
200 // rank, so that all columns coming from a given remote process
201 // are contiguous. This means the Import's Distributor doesn't
202 // need to reorder data.
203 //
204 // NOTE (mfh 02 Sep 2014) This needs to be a stable sort, so that
205 // it respects either of the possible orderings of GIDs (sorted,
206 // or original order) specified above.
207
208 Kokkos::View<int*, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> remotePIDs_h(remotePIDs.data(), remotePIDs.size());
209 {
210 auto remotePIDs_d = Kokkos::create_mirror_view_and_copy(execution_space(), remotePIDs_h);
211 Kokkos::View<GO*, memory_space> remoteColGIDsSorted(Kokkos::ViewAllocateWithoutInitializing("remoteColGIDsSorted"), numRemoteColGIDs);
212 Kokkos::View<int*, memory_space> remotePIDsSorted(Kokkos::ViewAllocateWithoutInitializing("remotePIDsSorted"), numRemoteColGIDs);
213 sort_by_key(execution_space(), remotePIDs_d, remoteColGIDs, remotePIDsSorted, remoteColGIDsSorted);
214 Kokkos::deep_copy(remoteColGIDs, remoteColGIDsSorted);
215 Kokkos::deep_copy(remotePIDs_h, remotePIDsSorted);
216 }
217
218 // Copy the local GIDs into myColumns. Two cases:
219 // 1. If the number of Local column GIDs is the same as the number
220 // of Local domain GIDs, we can simply read the domain GIDs
221 // into the front part of ColIndices (see logic above from the
222 // serial short circuit case)
223 // 2. We step through the GIDs of the DomainMap, checking to see
224 // if each domain GID is a column GID. We want to do this to
225 // maintain a consistent ordering of GIDs between the columns
226 // and the domain.
227
228 const size_t numDomainElts = domMap->getLocalNumElements();
229 if (numLocalColGIDs == numDomainElts) {
230 // If the number of locally owned GIDs are the same as the
231 // number of local domain Map elements, then the local domain
232 // Map elements are the same as the locally owned GIDs.
233 if (domMap->isContiguous()) {
234 // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case that
235 // the domain Map is contiguous, it's more efficient to avoid
236 // calling getLocalElementList(), since that permanently
237 // constructs and caches the GID list in the contiguous Map.
238 auto domMapMinGlobalIndex = domMap->getMinGlobalIndex();
239 Kokkos::parallel_for(
240 Kokkos::RangePolicy<execution_space>(0, numLocalColGIDs), KOKKOS_LAMBDA(const size_t k) { LocalColGIDs(k) = domMapMinGlobalIndex + k; });
241 } else {
242 Kokkos::deep_copy(LocalColGIDs, domMap->getMyGlobalIndicesDevice());
243 }
244 } else {
245 // Count the number of locally owned GIDs, both to keep track of
246 // the current array index, and as a sanity check.
247 size_t numLocalCount = 0;
248 if (domMap->isContiguous()) {
249 // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case that
250 // the domain Map is contiguous, it's more efficient to avoid
251 // calling getLocalElementList(), since that permanently
252 // constructs and caches the GID list in the contiguous Map.
253 auto domMapMinGlobalIndex = domMap->getMinGlobalIndex();
254 Kokkos::parallel_scan(
255 Kokkos::RangePolicy<execution_space>(0, numDomainElts), KOKKOS_LAMBDA(const size_t i, size_t& offset, const bool is_final) {
256 if (GIDisLocal(i)) {
257 if (is_final)
258 LocalColGIDs(offset) = domMapMinGlobalIndex + i;
259 ++offset;
260 } }, numLocalCount);
261 } else {
262 auto domainElts = domMap->getMyGlobalIndicesDevice();
263 Kokkos::parallel_scan(
264 Kokkos::RangePolicy<execution_space>(0, numDomainElts), KOKKOS_LAMBDA(const size_t i, size_t& offset, const bool is_final) {
265 if (GIDisLocal(i)) {
266 if (is_final)
267 LocalColGIDs(offset) = domainElts(i);
268 ++offset;
269 } }, numLocalCount);
270 }
271 if (numLocalCount != numLocalColGIDs) {
272 if (errStrm != NULL) {
273 *errStrm << prefix << "numLocalCount = " << numLocalCount
274 << " != numLocalColGIDs = " << numLocalColGIDs
275 << ". This should never happen. "
276 "Please report this bug to the Tpetra developers."
277 << endl;
278 }
279 // Don't return yet, because not all processes may have
280 // encountered this error state. This function ends with an
281 // all-reduce, so we have to make sure that everybody gets to
282 // that point.
283 errCode = -4;
284 }
285 }
286
287 // FIXME (mfh 03 Apr 2013) Now would be a good time to use the
288 // information we collected above to construct the Import. In
289 // particular, building an Import requires:
290 //
291 // 1. numSameIDs (length of initial contiguous sequence of GIDs
292 // on this process that are the same in both Maps; this
293 // equals the number of domain Map elements on this process)
294 //
295 // 2. permuteToLIDs and permuteFromLIDs (both empty in this
296 // case, since there's no permutation going on; the column
297 // Map starts with the domain Map's GIDs, and immediately
298 // after them come the remote GIDs)
299 //
300 // 3. remoteGIDs (exactly those GIDs that we found out above
301 // were not in the domain Map) and remoteLIDs (which we could
302 // have gotten above by using the three-argument version of
303 // getRemoteIndexList() that computes local indices as well
304 // as process ranks, instead of the two-argument version that
305 // was used above)
306 //
307 // 4. remotePIDs (which we have from the getRemoteIndexList() call
308 // above) -- NOTE (mfh 14 Sep 2017) Fix for Trilinos GitHub
309 // Issue #628 (https://github.com/trilinos/Trilinos/issues/628)
310 // addresses this. remotePIDs is now an output argument of
311 // both this function and CrsGraph::makeColMap, and
312 // CrsGraph::makeImportExport now has an option to use that
313 // information, if available (that is, if makeColMap was
314 // actually called, which it would not be if the graph already
315 // had a column Map).
316 //
317 // 5. Apply the permutation from sorting remotePIDs to both
318 // remoteGIDs and remoteLIDs (by calling sort3 above instead of
319 // sort2), instead of just to remoteLIDs alone.
320 //
321 // 6. Everything after the sort3 call in Import::setupExport():
322 // a. Create the Distributor via createFromRecvs(), which
323 // computes exportGIDs and exportPIDs
324 // b. Compute exportLIDs from exportGIDs (by asking the
325 // source Map, in this case the domain Map, to convert
326 // global to local)
327 //
328 // Steps 1-5 come for free, since we must do that work anyway in
329 // order to compute the column Map. In particular, Step 3 is
330 // even more expensive than Step 6a, since it involves both
331 // creating and using a new Distributor object.
332 const global_size_t INV =
333 Tpetra::Details::OrdinalTraits<global_size_t>::invalid();
334 // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
335 // be the same as the Map's min GID? If the first column is empty
336 // (contains no entries), then the column Map's min GID won't
337 // necessarily be the same as the domain Map's index base.
338 const GO indexBase = domMap->getIndexBase();
339 colMap = rcp(new map_type(INV, myColumns, indexBase, domMap->getComm()));
340 return errCode;
341}
342
343template <class ValueTypeView, class ValuesIdxType>
344struct AtomicMin {
345 using value_type = typename ValueTypeView::non_const_value_type;
346
347 KOKKOS_FUNCTION
348 void op(ValueTypeView values, ValuesIdxType values_idx,
349 const value_type v) const {
350 Kokkos::atomic_min(values.data() + values_idx, v);
351 }
352};
353
354template <class LO, class GO, class NT>
355int makeColMap(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
356 Teuchos::Array<int>& remotePIDs,
357 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
359 const bool sortEachProcsGids,
360 std::ostream* errStrm) {
361 using std::endl;
362 using Teuchos::rcp;
363 using execution_space = typename NT::execution_space;
364 using memory_space = typename NT::memory_space;
365 using map_type = ::Tpetra::Map<LO, GO, NT>;
366 const char prefix[] = "Tpetra::Details::makeColMap: ";
367 int errCode = 0;
368
369 // If the input domain Map or its communicator is null on the
370 // calling process, then the calling process does not participate in
371 // the returned column Map. Thus, we can set the returned column
372 // Map to null on those processes, and return immediately. This is
373 // not an error condition, as long as when domMap and its
374 // communicator are NOT null, the graph's other Maps and
375 // communicator are not also null.
376 if (domMap.is_null() || domMap->getComm().is_null()) {
377 colMap = Teuchos::null;
378 return errCode;
379 }
380
381 Kokkos::View<GO*, memory_space> myColumns;
382 if (graph.isLocallyIndexed()) {
383 colMap = graph.getColMap();
384 // If the graph is locally indexed, it had better have a column Map.
385 // The extra check for ! graph.hasColMap() is conservative.
386 if (colMap.is_null() || !graph.hasColMap()) {
387 errCode = -1;
388 if (errStrm != NULL) {
389 *errStrm << prefix << "The graph is locally indexed on the calling "
390 "process, but has no column Map (either getColMap() returns null, "
391 "or hasColMap() returns false)."
392 << endl;
393 }
394 // Under this error condition, this process will not fill
395 // myColumns. The resulting new column Map will be incorrect,
396 // but at least we won't hang, and this process will report the
397 // error.
398 } else {
399 // The graph already has a column Map, and is locally indexed on
400 // the calling process. However, it may be globally indexed (or
401 // neither locally nor globally indexed) on other processes.
402 // Assume that we want to recreate the column Map.
403 if (colMap->isContiguous()) {
404 // The number of indices on each process must fit in LO.
405 const LO numCurGids = static_cast<LO>(colMap->getLocalNumElements());
406 myColumns = Kokkos::View<GO*, memory_space>(Kokkos::ViewAllocateWithoutInitializing("myColumns"), numCurGids);
407 const GO myFirstGblInd = colMap->getMinGlobalIndex();
408 Kokkos::parallel_for(
409 Kokkos::RangePolicy<execution_space>(0, numCurGids), KOKKOS_LAMBDA(const LO k) { myColumns(k) = myFirstGblInd + k; });
410 } else { // the column Map is NOT contiguous
411 auto curGids = graph.getColMap()->getMyGlobalIndicesDevice();
412 // The number of indices on each process must fit in LO.
413 const LO numCurGids = static_cast<LO>(curGids.extent(0));
414 myColumns = Kokkos::View<GO*, memory_space>(Kokkos::ViewAllocateWithoutInitializing("myColumns"), numCurGids);
415 Kokkos::deep_copy(myColumns, curGids);
416 } // whether the graph's current column Map is contiguous
417 } // does the graph currently have a column Map?
418 } else if (graph.isGloballyIndexed()) {
419 // Go through all the rows, finding the populated column indices.
420 //
421 // Our final list of indices for the column Map constructor will
422 // have the following properties (all of which are with respect to
423 // the calling process):
424 //
425 // 1. Indices in the domain Map go first.
426 // 2. Indices not in the domain Map follow, ordered first
427 // contiguously by their owning process rank (in the domain
428 // Map), then in increasing order within that.
429 // 3. No duplicate indices.
430 //
431 // This imitates the ordering used by Aztec(OO) and Epetra.
432 // Storing indices owned by the same process (in the domain Map)
433 // contiguously permits the use of contiguous send and receive
434 // buffers.
435 //
436 // We begin by partitioning the column indices into "local" GIDs
437 // (owned by the domain Map) and "remote" GIDs (not owned by the
438 // domain Map). We use the same order for local GIDs as the
439 // domain Map, so we track them in place in their array. We use
440 // an Kokkos::UnorderedMap (RemoteGIDsMap) to keep track of remote GIDs, so
441 // that we don't have to merge duplicates later.
442 const LO LINV = Tpetra::Details::OrdinalTraits<LO>::invalid();
443 size_t numLocalColGIDs = 0;
444 size_t numRemoteColGIDs = 0;
445
446 // GIDisLocal(lid) is false if and only if local index lid in the
447 // domain Map is remote (not local).
448 Kokkos::View<bool*, memory_space> GIDisLocal("GIDisLocal", domMap->getLocalNumElements());
449 Kokkos::View<GO*, memory_space> RemoteGIDs;
450 if (!graph.getRowMap().is_null()) {
451 const Tpetra::Map<LO, GO, NT>& rowMap = *(graph.getRowMap());
452 const LO lclNumRows = rowMap.getLocalNumElements();
453 {
454 auto lclRowMap = rowMap.getLocalMap();
455 auto lclDomMap = domMap->getLocalMap();
456 auto rowptrsUnpacked = graph.getRowPtrsUnpackedDevice();
457 // TODO: Maybe graph.k_numRowEntries_ should also be a WrappedDualView?
458 auto numRowEntries = Kokkos::create_mirror_view_and_copy(execution_space(), graph.k_numRowEntries_);
459 auto colindUnpacked = graph.gblInds_wdv.getDeviceView(Access::ReadOnly);
460 using unordered_map_type = Kokkos::UnorderedMap<GO, size_t, typename NT::device_type>;
461 // We need an estimate for the number of remote gids since overallocating an UnorderedMap is expensive.
462 // On a single rank that's easy. If we have multiple ranks we compute an upper bound.
463 size_t remoteGIDCapacity;
464 if (domMap->getComm()->getSize() == 1) {
466 } else {
467 Kokkos::parallel_reduce(
468 Kokkos::RangePolicy<execution_space>(0, lclNumRows), KOKKOS_LAMBDA(const LO lclRow, size_t& myCapacity) {
471 for (auto offset = rowStart; offset < rowEnd; ++offset) {
472 const auto gid = colindUnpacked(offset);
473 const auto lid = lclDomMap.getLocalElement(gid);
474 if (lid == LINV) {
475 ++myCapacity;
476 }
477 }
478 },
480
481 // fudge factor
482 remoteGIDCapacity *= 1.2;
483 }
485 while (true) {
487
488 Kokkos::parallel_reduce(
489 Kokkos::RangePolicy<execution_space>(0, lclNumRows), KOKKOS_LAMBDA(const LO lclRow, size_t& myNumLocalColGIDs) {
492 for (auto offset = rowStart; offset < rowEnd; ++offset) {
493 const auto gid = colindUnpacked(offset);
494 const auto lid = lclDomMap.getLocalElement(gid);
495 if (lid != LINV) {
496 const bool alreadyFound = Kokkos::atomic_exchange(&GIDisLocal(lid), true);
497 if (!alreadyFound)
499 } else {
501 }
502 } }, numLocalColGIDs);
503 if (RemoteGIDMap.failed_insert()) {
504 remoteGIDCapacity *= 1.5;
506 } else {
507 break;
508 }
509 }
510
511 numRemoteColGIDs = RemoteGIDMap.size();
512 RemoteGIDs = Kokkos::View<GO*, memory_space>(Kokkos::ViewAllocateWithoutInitializing("RemoteGIDs"), numRemoteColGIDs);
513 if (!sortEachProcsGids) {
514 auto RemoteGIDsOffsets = Kokkos::View<LO*, memory_space>(Kokkos::ViewAllocateWithoutInitializing("RemoteGIDsOffsets"), numRemoteColGIDs);
515
516 Kokkos::parallel_scan(
517 Kokkos::RangePolicy<execution_space>(0, RemoteGIDMap.capacity()), KOKKOS_LAMBDA(const uint32_t i, size_t& offset, const bool is_final) {
518 if( RemoteGIDMap.valid_at(i) ) {
519 if (is_final) {
520 RemoteGIDs(offset) = RemoteGIDMap.key_at(i);
521 RemoteGIDsOffsets(offset) = RemoteGIDMap.value_at(i);
522 }
523 ++offset;
524 } });
525
526 // Sort by offset in the graph
527 Kokkos::Experimental::sort_by_key(execution_space(), RemoteGIDsOffsets, RemoteGIDs);
528 } else {
529 Kokkos::parallel_scan(
530 Kokkos::RangePolicy<execution_space>(0, RemoteGIDMap.capacity()), KOKKOS_LAMBDA(const uint32_t i, size_t& offset, const bool is_final) {
531 if( RemoteGIDMap.valid_at(i) ) {
532 if (is_final) {
533 RemoteGIDs(offset) = RemoteGIDMap.key_at(i);
534 }
535 ++offset;
536 } });
537
538 // Sort gids
539 Kokkos::sort(RemoteGIDs);
540 }
541 }
542 } // if the graph has a nonnull row Map
543
545 colMap, remotePIDs,
546 domMap,
547 numLocalColGIDs, numRemoteColGIDs,
549 errStrm);
550
551 } // if the graph is globally indexed
552 else {
553 // If we reach this point, the graph is neither locally nor
554 // globally indexed. Thus, the graph is empty on this process
555 // (per the usual legacy Petra convention), so myColumns will be
556 // left empty.
557 ; // do nothing
558 }
559
560 const global_size_t INV =
561 Tpetra::Details::OrdinalTraits<global_size_t>::invalid();
562 // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
563 // be the same as the Map's min GID? If the first column is empty
564 // (contains no entries), then the column Map's min GID won't
565 // necessarily be the same as the domain Map's index base.
566 const GO indexBase = domMap->getIndexBase();
567 colMap = rcp(new map_type(INV, myColumns, indexBase, domMap->getComm()));
568 return errCode;
569}
570
571template <typename GOView, typename bitset_t>
572struct GatherPresentEntries {
573 using GO = typename GOView::non_const_value_type;
574
575 GatherPresentEntries(GO minGID_, const GOView& gids_, const bitset_t& present_)
576 : minGID(minGID_)
577 , gids(gids_)
578 , present(present_) {}
579
580 KOKKOS_INLINE_FUNCTION void operator()(const GO i) const {
581 present.set(gids(i) - minGID);
582 }
583
584 GO minGID;
585 GOView gids;
586 bitset_t present;
587};
588
589template <typename GO, typename mem_space>
590struct MinMaxReduceFunctor {
591 using MinMaxValue = typename Kokkos::MinMax<GO>::value_type;
592 using GOView = Kokkos::View<GO*, mem_space>;
593
594 MinMaxReduceFunctor(const GOView& gids_)
595 : gids(gids_) {}
596
597 KOKKOS_INLINE_FUNCTION void operator()(const GO i, MinMaxValue& lminmax) const {
598 GO gid = gids(i);
599 if (gid < lminmax.min_val)
600 lminmax.min_val = gid;
601 if (gid > lminmax.max_val)
602 lminmax.max_val = gid;
603 };
604
605 const GOView gids;
606};
607
608template <class LO, class GO, class NT>
609int makeColMap(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
610 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
611 Kokkos::View<GO*, typename NT::memory_space> gids,
612 std::ostream* errStrm) {
613 using Kokkos::RangePolicy;
614 using Teuchos::Array;
615 using device_t = typename NT::device_type;
616 using exec_space = typename device_t::execution_space;
617 using memory_space = typename device_t::memory_space;
618 // Note BMK 5-2021: this is deliberately not just device_t.
619 // Bitset cannot use HIPHostPinnedSpace currently, so this needs to
620 // use the default memory space for HIP (HIPSpace). Using the default mem
621 // space is fine for all other backends too. This bitset type is only used
622 // in this function so it won't cause type mismatches.
623 using bitset_t = Kokkos::Bitset<typename exec_space::memory_space>;
624 using const_bitset_t = Kokkos::ConstBitset<typename exec_space::memory_space>;
625 using GOView = Kokkos::View<GO*, memory_space>;
626 GO nentries = gids.extent(0);
627 GO minGID = Teuchos::OrdinalTraits<GO>::max();
628 GO maxGID = 0;
629 using MinMaxValue = typename Kokkos::MinMax<GO>::value_type;
630 MinMaxValue minMaxGID = {minGID, maxGID};
631 Kokkos::parallel_reduce(RangePolicy<exec_space>(0, nentries),
633 Kokkos::MinMax<GO>(minMaxGID));
634 minGID = minMaxGID.min_val;
635 maxGID = minMaxGID.max_val;
636 // Now, know the full range of input GIDs.
637 // Determine the set of GIDs in the column map using a dense bitset, which corresponds to the range [minGID, maxGID]
638 bitset_t presentGIDs(maxGID - minGID + 1);
641 // Get the set of local and remote GIDs on device
642 Kokkos::View<bool*, memory_space> GIDisLocal("GIDisLocal", domMap->getLocalNumElements());
643 auto localDomMap = domMap->getLocalMap();
644
645 GO numLocalColGIDs = 0;
646 const auto LINV = ::Tpetra::Details::OrdinalTraits<LO>::invalid();
647 Kokkos::parallel_reduce(
649 if (constPresentGIDs.test(i)) {
650 auto gid = i + minGID;
651 auto lid = localDomMap.getLocalElement(gid);
652 bool isRemote = (lid == LINV);
653 if (!isRemote) {
654 GIDisLocal(lid) = true;
655 lcount++;
656 }
657 } }, numLocalColGIDs);
658
659 GO numRemoteColGIDs = constPresentGIDs.count() - numLocalColGIDs;
660 auto RemoteGIDs = Kokkos::View<GO*, memory_space>("RemoteGIDs", numRemoteColGIDs);
661
662 Kokkos::parallel_scan(
663 RangePolicy<exec_space>(0, constPresentGIDs.size()), KOKKOS_LAMBDA(const GO i, GO& lcount, const bool finalPass) {
664 if (constPresentGIDs.test(i)) {
665 auto gid = i+minGID;
666 bool isRemote = (localDomMap.getLocalElement(gid) == LINV);
667 if (isRemote) {
668 if (finalPass) {
669 RemoteGIDs(lcount) = gid;
670 }
671 lcount++;
672 }
673 } });
674
675 // remotePIDs will be discarded in this version of makeColMap
676 Array<int> remotePIDs;
677 // Find the min and max GID
678 return makeColMapImpl<LO, GO, NT>(
679 colMap,
680 remotePIDs,
681 domMap,
682 static_cast<size_t>(numLocalColGIDs),
683 static_cast<size_t>(numRemoteColGIDs),
684 RemoteGIDs,
685 GIDisLocal,
686 errStrm);
687}
688
689} // namespace Tpetra::Details
690
691//
692// Explicit instantiation macros
693//
694// Must be expanded from within the Tpetra namespace!
695//
696#define TPETRA_DETAILS_MAKECOLMAP_INSTANT(LO, GO, NT) \
697 namespace Details { \
698 template int \
699 makeColMap(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>&, \
700 Teuchos::Array<int>&, \
701 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>&, \
702 const CrsGraph<LO, GO, NT>&, \
703 const bool, \
704 std::ostream*); \
705 template int \
706 makeColMap(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>&, \
707 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>&, \
708 Kokkos::View<GO*, typename NT::memory_space>, \
709 std::ostream*); \
710 }
711
712#endif // TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
Nonmember function that computes a residual Computes R = B - A * X.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const CrsGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph's column Map.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
size_t global_size_t
Global size_t object.