Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Map_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
14
15#ifndef TPETRA_MAP_DEF_HPP
16#define TPETRA_MAP_DEF_HPP
17
18#include <memory>
19#include <sstream>
20#include <stdexcept>
21#include <typeinfo>
22
23#include "Teuchos_as.hpp"
24#include "Teuchos_TypeNameTraits.hpp"
25#include "Teuchos_CommHelpers.hpp"
26
27#include "Tpetra_Directory.hpp" // must include for implicit instantiation to work
29#include "Tpetra_Details_FixedHashTable.hpp"
32#include "Tpetra_Core.hpp"
33#include "Tpetra_Util.hpp"
34#include "Tpetra_Details_mpiIsInitialized.hpp"
35#include "Tpetra_Details_extractMpiCommFromTeuchos.hpp" // teuchosCommIsAnMpiComm
38
39namespace Tpetra {
40namespace Impl {
41
42inline void checkMapInputArray(const char ctorName[],
43 const void* indexList,
44 const size_t indexListSize,
45 const Teuchos::Comm<int>* const comm) {
47
48 const bool debug = Behavior::debug("Map");
49 if (debug) {
50 using std::endl;
51 using Teuchos::outArg;
52 using Teuchos::REDUCE_MIN;
53 using Teuchos::reduceAll;
54
55 const int myRank = comm == nullptr ? 0 : comm->getRank();
56 const bool verbose = Behavior::verbose("Map");
57 std::ostringstream lclErrStrm;
58 int lclSuccess = 1;
59
60 if (indexListSize != 0 && indexList == nullptr) {
61 lclSuccess = 0;
62 if (verbose) {
63 lclErrStrm << "Proc " << myRank << ": indexList is null, "
64 "but indexListSize="
65 << indexListSize << " != 0." << endl;
66 }
67 }
68 int gblSuccess = 0; // output argument
69 reduceAll(*comm, REDUCE_MIN, lclSuccess, outArg(gblSuccess));
70 if (gblSuccess != 1) {
71 std::ostringstream gblErrStrm;
72 gblErrStrm << "Tpetra::Map constructor " << ctorName << " detected a problem with the input array "
73 "(raw array, Teuchos::ArrayView, or Kokkos::View) "
74 "of global indices."
75 << endl;
76 if (verbose) {
77 using ::Tpetra::Details::gathervPrint;
78 gathervPrint(gblErrStrm, lclErrStrm.str(), *comm);
79 }
80 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, gblErrStrm.str());
81 }
82 }
83}
84
85template <class LocalOrdinal, class GlobalOrdinal, class ViewType>
86void computeConstantsOnDevice(const ViewType& entryList, GlobalOrdinal& minMyGID, GlobalOrdinal& maxMyGID, GlobalOrdinal& firstContiguousGID, GlobalOrdinal& lastContiguousGID_val, LocalOrdinal& lastContiguousGID_loc) {
87 using LO = LocalOrdinal;
88 using GO = GlobalOrdinal;
89 using exec_space = typename ViewType::device_type::execution_space;
90 using range_policy = Kokkos::RangePolicy<exec_space, Kokkos::IndexType<LO>>;
91 const LO numLocalElements = entryList.extent(0);
92
93 // We're going to use the minloc backwards because we need to have it sort on the "location" and have the "value" along for the
94 // ride, rather than the other way around
95 typedef typename Kokkos::MinLoc<LO, GO>::value_type minloc_type;
96 minloc_type myMinLoc;
97
98 // Find the initial sequence of parallel gids
99 // To find the lastContiguousGID_, we find the first guy where entryList[i] - entryList[0] != i-0. That's the first non-contiguous guy.
100 // We want the one *before* that guy.
101 Kokkos::parallel_reduce(
102 range_policy(0, numLocalElements), KOKKOS_LAMBDA(const LO& i, GO& l_myMin, GO& l_myMax, GO& l_firstCont, minloc_type& l_lastCont) {
103 GO entry_0 = entryList[0];
104 GO entry_i = entryList[i];
105
106 // Easy stuff
107 l_myMin = (l_myMin < entry_i) ? l_myMin : entry_i;
108 l_myMax = (l_myMax > entry_i) ? l_myMax : entry_i;
109 l_firstCont = entry_0;
110
111 if (entry_i - entry_0 != i && l_lastCont.val >= i) {
112 // We're non-contiguous, so the guy before us could be the last contiguous guy
113 l_lastCont.val = i - 1;
114 l_lastCont.loc = entryList[i - 1];
115 } else if (i == numLocalElements - 1 && i < l_lastCont.val) {
116 // If we're last, we always think we're the last contiguous guy, unless someone non-contiguous is already here
117 l_lastCont.val = i;
118 l_lastCont.loc = entry_i;
119 }
120 },
121 Kokkos::Min<GO>(minMyGID), Kokkos::Max<GO>(maxMyGID), Kokkos::Min<GO>(firstContiguousGID), Kokkos::MinLoc<LO, GO>(myMinLoc));
122
123 // This switch is intentional, since we're using MinLoc backwards
124 lastContiguousGID_val = myMinLoc.loc;
125 lastContiguousGID_loc = myMinLoc.val;
126}
127
128} // namespace Impl
129
130template <class LocalOrdinal, class GlobalOrdinal, class Node>
132 Map()
133 : comm_(new Teuchos::SerialComm<int>())
134 , indexBase_(0)
135 , numGlobalElements_(0)
136 , numLocalElements_(0)
141 , firstContiguousGID_(Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid())
142 , lastContiguousGID_(Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid())
143 , uniform_(false)
144 , // trivially
145 contiguous_(false)
146 , distributed_(false)
147 , // no communicator yet
148 directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
151}
152
153template <class LocalOrdinal, class GlobalOrdinal, class Node>
157 const Teuchos::RCP<const Teuchos::Comm<int>>& comm,
158 const LocalGlobal lOrG)
159 : comm_(comm)
160 , uniform_(true)
161 , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
162 using std::endl;
163 using Teuchos::as;
164 using Teuchos::broadcast;
165 using Teuchos::outArg;
166 using Teuchos::REDUCE_MAX;
167 using Teuchos::REDUCE_MIN;
168 using Teuchos::reduceAll;
169 using Teuchos::typeName;
170 using GO = global_ordinal_type;
171 using GST = global_size_t;
172 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
173 const char funcName[] = "Map(gblNumInds,indexBase,comm,LG)";
174 const char exPfx[] =
175 "Tpetra::Map::Map(gblNumInds,indexBase,comm,LG): ";
176
177 const bool debug = Details::Behavior::debug("Map");
178 const bool verbose = Details::Behavior::verbose("Map");
179 std::unique_ptr<std::string> prefix;
180 if (verbose) {
182 comm_.getRawPtr(), "Map", funcName);
183 std::ostringstream os;
184 os << *prefix << "Start" << endl;
185 std::cerr << os.str();
186 }
189
190 // In debug mode only, check whether numGlobalElements and
191 // indexBase are the same over all processes in the communicator.
192 if (debug) {
203 std::invalid_argument, exPfx << "All processes must "
204 "provide the same number of global elements. Process 0 set "
205 "numGlobalElements="
206 << proc0NumGlobalElements << ". The "
207 "calling process "
208 << comm->getRank() << " set "
209 "numGlobalElements="
210 << numGlobalElements << ". The min "
211 "and max values over all processes are "
212 << minNumGlobalElements << " resp. " << maxNumGlobalElements << ".");
213
221 std::invalid_argument, exPfx << "All processes must "
222 "provide the same indexBase argument. Process 0 set "
223 "indexBase="
224 << proc0IndexBase << ". The calling process " << comm->getRank() << " set indexBase=" << indexBase << ". The min and max values over all processes are " << minIndexBase << " resp. " << maxIndexBase << ".");
225 }
226
227 // Distribute the elements across the processes in the given
228 // communicator so that global IDs (GIDs) are
229 //
230 // - Nonoverlapping (only one process owns each GID)
231 // - Contiguous (the sequence of GIDs is nondecreasing, and no two
232 // adjacent GIDs differ by more than one)
233 // - As evenly distributed as possible (the numbers of GIDs on two
234 // different processes do not differ by more than one)
235
236 // All processes have the same numGlobalElements, but we still
237 // need to check that it is valid. numGlobalElements must be
238 // positive and not the "invalid" value (GSTI).
239 //
240 // This comparison looks funny, but it avoids compiler warnings
241 // for comparing unsigned integers (numGlobalElements_in is a
242 // GST, which is unsigned) while still working if we
243 // later decide to make GST signed.
246 std::invalid_argument, exPfx << "numGlobalElements (= " << numGlobalElements << ") must be nonnegative.");
247
248 TEUCHOS_TEST_FOR_EXCEPTION(numGlobalElements == GSTI, std::invalid_argument, exPfx << "You provided numGlobalElements = Teuchos::OrdinalTraits<"
249 "Tpetra::global_size_t>::invalid(). This version of the "
250 "constructor requires a valid value of numGlobalElements. "
251 "You probably mistook this constructor for the \"contiguous "
252 "nonuniform\" constructor, which can compute the global "
253 "number of elements for you if you set numGlobalElements to "
254 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid().");
255
256 size_t numLocalElements = 0; // will set below
257 if (lOrG == GloballyDistributed) {
258 // Compute numLocalElements:
259 //
260 // If numGlobalElements == numProcs * B + remainder,
261 // then Proc r gets B+1 elements if r < remainder,
262 // and B elements if r >= remainder.
263 //
264 // This strategy is valid for any value of numGlobalElements and
265 // numProcs, including the following border cases:
266 // - numProcs == 1
267 // - numLocalElements < numProcs
268 //
269 // In the former case, remainder == 0 && numGlobalElements ==
270 // numLocalElements. In the latter case, remainder ==
271 // numGlobalElements && numLocalElements is either 0 or 1.
272 const GST numProcs = static_cast<GST>(comm_->getSize());
273 const GST myRank = static_cast<GST>(comm_->getRank());
276
277 GO startIndex;
278 if (myRank < remainder) {
279 numLocalElements = static_cast<size_t>(1) + static_cast<size_t>(quotient);
280 // myRank was originally an int, so it should never overflow
281 // reasonable GO types.
283 } else {
287 }
288
289 minMyGID_ = indexBase + startIndex;
290 maxMyGID_ = indexBase + startIndex + numLocalElements - 1;
291 minAllGID_ = indexBase;
292 maxAllGID_ = indexBase + numGlobalElements - 1;
293 distributed_ = (numProcs > 1);
294 } else { // lOrG == LocallyReplicated
296 minMyGID_ = indexBase;
297 maxMyGID_ = indexBase + numGlobalElements - 1;
298 distributed_ = false;
299 }
300
301 minAllGID_ = indexBase;
302 maxAllGID_ = indexBase + numGlobalElements - 1;
303 indexBase_ = indexBase;
304 numGlobalElements_ = numGlobalElements;
305 numLocalElements_ = numLocalElements;
306 firstContiguousGID_ = minMyGID_;
307 lastContiguousGID_ = maxMyGID_;
308 contiguous_ = true;
309
310 // Create the Directory on demand in getRemoteIndexList().
311 // setupDirectory ();
312
313 if (verbose) {
314 std::ostringstream os;
315 os << *prefix << "Done" << endl;
316 std::cerr << os.str();
317 }
318}
319
320template <class LocalOrdinal, class GlobalOrdinal, class Node>
323 const size_t numLocalElements,
325 const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
326 : comm_(comm)
327 , uniform_(false)
328 , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
329 using std::endl;
330 using Teuchos::as;
331 using Teuchos::broadcast;
332 using Teuchos::outArg;
333 using Teuchos::REDUCE_MAX;
334 using Teuchos::REDUCE_MIN;
335 using Teuchos::REDUCE_SUM;
336 using Teuchos::reduceAll;
337 using Teuchos::scan;
338 using GO = global_ordinal_type;
339 using GST = global_size_t;
340 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
341 const char funcName[] =
342 "Map(gblNumInds,lclNumInds,indexBase,comm)";
343 const char exPfx[] =
344 "Tpetra::Map::Map(gblNumInds,lclNumInds,indexBase,comm): ";
345 const char suffix[] =
346 ". Please report this bug to the Tpetra developers.";
347
348 const bool debug = Details::Behavior::debug("Map");
349 const bool verbose = Details::Behavior::verbose("Map");
350 std::unique_ptr<std::string> prefix;
351 if (verbose) {
353 comm_.getRawPtr(), "Map", funcName);
354 std::ostringstream os;
355 os << *prefix << "Start" << endl;
356 std::cerr << os.str();
357 }
360
361 // Global sum of numLocalElements over all processes.
362 // Keep this for later debug checks.
364 if (debug) {
365 debugGlobalSum = initialNonuniformDebugCheck(exPfx,
366 numGlobalElements, numLocalElements, indexBase, comm);
367 }
368
369 // Distribute the elements across the nodes so that they are
370 // - non-overlapping
371 // - contiguous
372
373 // This differs from the first Map constructor (that only takes a
374 // global number of elements) in that the user has specified the
375 // number of local elements, so that the elements are not
376 // (necessarily) evenly distributed over the processes.
377
378 // Compute my local offset. This is an inclusive scan, so to get
379 // the final offset, we subtract off the input.
383
384 if (numGlobalElements != GSTI) {
385 numGlobalElements_ = numGlobalElements; // Use the user's value.
386 } else {
387 // Inclusive scan means that the last process has the final sum.
388 // Rather than doing a reduceAll to get the sum of
389 // numLocalElements, we can just have the last process broadcast
390 // its result. That saves us a round of log(numProcs) messages.
391 const int numProcs = comm->getSize();
393 if (numProcs > 1) {
394 broadcast(*comm, numProcs - 1, outArg(globalSum));
395 }
396 numGlobalElements_ = globalSum;
397
398 if (debug) {
399 // No need for an all-reduce here; both come from collectives.
400 TEUCHOS_TEST_FOR_EXCEPTION(globalSum != debugGlobalSum, std::logic_error, exPfx << "globalSum = " << globalSum << " != debugGlobalSum = " << debugGlobalSum << suffix);
401 }
402 }
403 numLocalElements_ = numLocalElements;
404 indexBase_ = indexBase;
405 minAllGID_ = (numGlobalElements_ == 0) ? std::numeric_limits<GO>::max() : indexBase;
406 maxAllGID_ = (numGlobalElements_ == 0) ? std::numeric_limits<GO>::lowest() : indexBase + GO(numGlobalElements_) - GO(1);
407 minMyGID_ = (numLocalElements_ == 0) ? std::numeric_limits<GO>::max() : indexBase + GO(myOffset);
408 maxMyGID_ = (numLocalElements_ == 0) ? std::numeric_limits<GO>::lowest() : indexBase + myOffset + GO(numLocalElements) - GO(1);
409 firstContiguousGID_ = minMyGID_;
410 lastContiguousGID_ = maxMyGID_;
411 contiguous_ = true;
412 distributed_ = checkIsDist();
413
414 // Create the Directory on demand in getRemoteIndexList().
415 // setupDirectory ();
416
417 if (verbose) {
418 std::ostringstream os;
419 os << *prefix << "Done" << endl;
420 std::cerr << os.str();
421 }
422}
423
424template <class LocalOrdinal, class GlobalOrdinal, class Node>
428 const char errorMessagePrefix[],
430 const size_t numLocalElements,
432 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) const {
433 const bool debug = Details::Behavior::debug("Map");
434 if (!debug) {
435 return global_size_t(0);
436 }
437
438 using Teuchos::broadcast;
439 using Teuchos::outArg;
440 using Teuchos::ptr;
441 using Teuchos::REDUCE_MAX;
442 using Teuchos::REDUCE_MIN;
443 using Teuchos::REDUCE_SUM;
444 using Teuchos::reduceAll;
445 using GO = global_ordinal_type;
446 using GST = global_size_t;
447 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
448
449 // The user has specified the distribution of indices over the
450 // processes. The distribution is not necessarily contiguous or
451 // equally shared over the processes.
452 //
453 // We assume that the number of local elements can be stored in a
454 // size_t. The instance member numLocalElements_ is a size_t, so
455 // this variable and that should have the same type.
456
457 GST debugGlobalSum = 0; // Will be global sum of numLocalElements
460 // In debug mode only, check whether numGlobalElements and
461 // indexBase are the same over all processes in the communicator.
462 {
473 std::invalid_argument, errorMessagePrefix << "All processes "
474 "must provide the same number of global elements, even if "
475 "that argument is "
476 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
477 "(which signals that the Map should compute the global "
478 "number of elements). Process 0 set numGlobalElements"
479 "="
480 << proc0NumGlobalElements << ". The calling process " << comm->getRank() << " set numGlobalElements=" << numGlobalElements << ". The min and max values over all "
481 "processes are "
482 << minNumGlobalElements << " resp. " << maxNumGlobalElements << ".");
483
491 std::invalid_argument, errorMessagePrefix << "All processes must provide the same indexBase argument. "
492 "Process 0 set indexBase = "
493 << proc0IndexBase << ". The "
494 "calling process "
495 << comm->getRank() << " set indexBase=" << indexBase << ". The min and max values over all "
496 "processes are "
497 << minIndexBase << " resp. " << maxIndexBase << ".");
498
499 // Make sure that the sum of numLocalElements over all processes
500 // equals numGlobalElements.
503 std::invalid_argument,
504 errorMessagePrefix << "The sum of each process' number of "
505 "indices over all processes, "
506 << debugGlobalSum << ", != "
507 << "numGlobalElements=" << numGlobalElements << ". If you "
508 "would like this constructor to compute numGlobalElements "
509 "for you, you may set numGlobalElements="
510 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
511 "on input. Please note that this is NOT necessarily -1.");
512 }
513 return debugGlobalSum;
514}
515
516template <class LocalOrdinal, class GlobalOrdinal, class Node>
519 const char errorMessagePrefix[],
521 const Kokkos::View<const global_ordinal_type*,
522 Kokkos::LayoutLeft,
523 Kokkos::HostSpace,
524 Kokkos::MemoryUnmanaged>& entryList_host,
526 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
527 Tpetra::Details::ProfilingRegion pr("Map::initWithNonownedHostIndexList()");
528
529 using Kokkos::LayoutLeft;
530 using Kokkos::subview;
531 using Kokkos::View;
532 using Kokkos::view_alloc;
533 using Kokkos::WithoutInitializing;
534 using Teuchos::as;
535 using Teuchos::broadcast;
536 using Teuchos::outArg;
537 using Teuchos::ptr;
538 using Teuchos::REDUCE_MAX;
539 using Teuchos::REDUCE_MIN;
540 using Teuchos::REDUCE_SUM;
541 using Teuchos::reduceAll;
542 using LO = local_ordinal_type;
543 using GO = global_ordinal_type;
544 using GST = global_size_t;
545 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
546 // Make sure that Kokkos has been initialized (Github Issue #513).
547 TEUCHOS_TEST_FOR_EXCEPTION(!Kokkos::is_initialized(), std::runtime_error,
548 errorMessagePrefix << "The Kokkos execution space "
549 << Teuchos::TypeNameTraits<execution_space>::name()
550 << " has not been initialized. "
551 "Please initialize it before creating a Map.")
552
553 // The user has specified the distribution of indices over the
554 // processes, via the input array of global indices on each
555 // process. The distribution is not necessarily contiguous or
556 // equally shared over the processes.
557
558 // The length of the input array on this process is the number of
559 // local indices to associate with this process, even though the
560 // input array contains global indices. We assume that the number
561 // of local indices on a process can be stored in a size_t;
562 // numLocalElements_ is a size_t, so this variable and that should
563 // have the same type.
564 const size_t numLocalElements(entryList_host.size());
565
566 initialNonuniformDebugCheck(errorMessagePrefix, numGlobalElements,
568
569 // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
570 // reduction is redundant, since the directory Map will have to do
571 // the same thing. Thus, we could do the scan and broadcast for
572 // the directory Map here, and give the computed offsets to the
573 // directory Map's constructor. However, a reduction costs less
574 // than a scan and broadcast, so this still saves time if users of
575 // this Map don't ever need the Directory (i.e., if they never
576 // call getRemoteIndexList on this Map).
577 if (numGlobalElements != GSTI) {
578 numGlobalElements_ = numGlobalElements; // Use the user's value.
579 } else { // The user wants us to compute the sum.
580 reduceAll(*comm, REDUCE_SUM,
581 static_cast<GST>(numLocalElements),
582 outArg(numGlobalElements_));
583 }
584
585 // mfh 20 Feb 2013: We've never quite done the right thing for
586 // duplicate GIDs here. Duplicate GIDs have always been counted
587 // distinctly in numLocalElements_, and thus should get a
588 // different LID. However, we've always used std::map or a hash
589 // table for the GID -> LID lookup table, so distinct GIDs always
590 // map to the same LID. Furthermore, the order of the input GID
591 // list matters, so it's not desirable to sort for determining
592 // uniqueness.
593 //
594 // I've chosen for now to write this code as if the input GID list
595 // contains no duplicates. If this is not desired, we could use
596 // the lookup table itself to determine uniqueness: If we haven't
597 // seen the GID before, it gets a new LID and it's added to the
598 // LID -> GID and GID -> LID tables. If we have seen the GID
599 // before, it doesn't get added to either table. I would
600 // implement this, but it would cost more to do the double lookups
601 // in the table (one to check, and one to insert).
602 //
603 // More importantly, since we build the GID -> LID table in (a
604 // thread-) parallel (way), the order in which duplicate GIDs may
605 // get inserted is not defined. This would make the assignment of
606 // LID to GID nondeterministic.
607
608 numLocalElements_ = numLocalElements;
609 indexBase_ = indexBase;
610
611 minMyGID_ = indexBase_;
612 maxMyGID_ = indexBase_;
613
614 // NOTE (mfh 27 May 2015): While finding the initial contiguous
615 // GID range requires looking at all the GIDs in the range,
616 // dismissing an interval of GIDs only requires looking at the
617 // first and last GIDs. Thus, we could do binary search backwards
618 // from the end in order to catch the common case of a contiguous
619 // interval followed by noncontiguous entries. On the other hand,
620 // we could just expose this case explicitly as yet another Map
621 // constructor, and avoid the trouble of detecting it.
622 if (numLocalElements_ > 0) {
623 // Find contiguous GID range, with the restriction that the
624 // beginning of the range starts with the first entry. While
625 // doing so, fill in the LID -> GID table.
626 typename decltype(lgMap_)::non_const_type lgMap(view_alloc("lgMap", WithoutInitializing), numLocalElements_);
627 auto lgMap_host =
628 Kokkos::create_mirror_view(Kokkos::HostSpace(), lgMap);
629
630 // The input array entryList_host is already on host, so we
631 // don't need to take a host view of it.
632 // auto entryList_host =
633 // Kokkos::create_mirror_view (Kokkos::HostSpace (), entryList);
634 // Kokkos::deep_copy (entryList_host, entryList);
635
636 firstContiguousGID_ = entryList_host[0];
637 lastContiguousGID_ = firstContiguousGID_ + 1;
638
639 // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
640 // anyway, so we have to look at them all. The logical way to
641 // find the first noncontiguous entry would thus be to "reduce,"
642 // where the local reduction result is whether entryList[i] + 1
643 // == entryList[i+1].
644
645 lgMap_host[0] = firstContiguousGID_;
646 size_t i = 1;
647 for (; i < numLocalElements_; ++i) {
648 const GO curGid = entryList_host[i];
649 const LO curLid = as<LO>(i);
650
651 if (lastContiguousGID_ != curGid) break;
652
653 // Add the entry to the LID->GID table only after we know that
654 // the current GID is in the initial contiguous sequence, so
655 // that we don't repeat adding it in the first iteration of
656 // the loop below over the remaining noncontiguous GIDs.
658 ++lastContiguousGID_;
659 }
660 --lastContiguousGID_;
661 // NOTE: i is the first non-contiguous index.
662
663 // [firstContiguousGID_, lastContigousGID_] is the initial
664 // sequence of contiguous GIDs. We can start the min and max
665 // GID using this range.
666 minMyGID_ = firstContiguousGID_;
667 maxMyGID_ = lastContiguousGID_;
668
669 // Compute the GID -> LID lookup table, _not_ including the
670 // initial sequence of contiguous GIDs.
672 {
673 const std::pair<size_t, size_t> ncRange(i, entryList_host.extent(0));
675 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(nonContigGids_host.extent(0)) !=
676 static_cast<size_t>(entryList_host.extent(0) - i),
677 std::logic_error,
678 "Tpetra::Map noncontiguous constructor: "
679 "nonContigGids_host.extent(0) = "
680 << nonContigGids_host.extent(0)
681 << " != entryList_host.extent(0) - i = "
682 << (entryList_host.extent(0) - i) << " = "
683 << entryList_host.extent(0) << " - " << i
684 << ". Please report this bug to the Tpetra developers.");
685
686 // FixedHashTable's constructor expects an owned device View,
687 // so we must deep-copy the subview of the input indices.
690 nonContigGids_host.size());
691
692 // DEEP_COPY REVIEW - HOST-TO-DEVICE
693 Kokkos::deep_copy(execution_space(), nonContigGids, nonContigGids_host);
694 Kokkos::fence("Map::initWithNonownedHostIndexList"); // for UVM issues below - which will be refatored soon so FixedHashTable can build as pure CudaSpace - then I think remove this fence
695
698 // Make host version - when memory spaces match these just do trivial assignment
699 glMapHost_ = global_to_local_table_host_type(glMap_);
700 }
701
702 // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
703 // table above, we have to look at all the (noncontiguous) input
704 // indices anyway. Thus, why not have the constructor compute
705 // and return the min and max?
706
707 for (; i < numLocalElements_; ++i) {
708 const GO curGid = entryList_host[i];
709 const LO curLid = static_cast<LO>(i);
710 lgMap_host[curLid] = curGid; // LID -> GID table
711
712 // While iterating through entryList, we compute its
713 // (process-local) min and max elements.
714 if (curGid < minMyGID_) {
715 minMyGID_ = curGid;
716 }
717 if (curGid > maxMyGID_) {
718 maxMyGID_ = curGid;
719 }
720 }
721
722 // We filled lgMap on host above; now sync back to device.
723 // DEEP_COPY REVIEW - HOST-TO-DEVICE
724 Kokkos::deep_copy(execution_space(), lgMap, lgMap_host);
725
726 // "Commit" the local-to-global lookup table we filled in above.
727 lgMap_ = lgMap;
728 // We've already created this, so use it.
729 lgMapHost_ = lgMap_host;
730
731 } else {
732 minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
733 maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
734 // This insures tests for GIDs in the range
735 // [firstContiguousGID_, lastContiguousGID_] fail for processes
736 // with no local elements.
737 firstContiguousGID_ = indexBase_ + 1;
738 lastContiguousGID_ = indexBase_;
739 // glMap_ was default constructed, so it's already empty.
740 }
741
742 // Compute the min and max of all processes' GIDs. If
743 // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
744 // are both indexBase_. This is wrong, but fixing it would
745 // require either a fancy sparse all-reduce, or a custom reduction
746 // operator that ignores invalid values ("invalid" means
747 // Tpetra::Details::OrdinalTraits<GO>::invalid()).
748 //
749 // Also, while we're at it, use the same all-reduce to figure out
750 // if the Map is distributed. "Distributed" means that there is
751 // at least one process with a number of local elements less than
752 // the number of global elements.
753 //
754 // We're computing the min and max of all processes' GIDs using a
755 // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
756 // and y are signed). (This lets us combine the min and max into
757 // a single all-reduce.) If each process sets localDist=1 if its
758 // number of local elements is strictly less than the number of
759 // global elements, and localDist=0 otherwise, then a MAX
760 // all-reduce on localDist tells us if the Map is distributed (1
761 // if yes, 0 if no). Thus, we can append localDist onto the end
762 // of the data and get the global result from the all-reduce.
763 if (std::numeric_limits<GO>::is_signed) {
764 // Does my process NOT own all the elements?
765 const GO localDist =
766 (as<GST>(numLocalElements_) < numGlobalElements_) ? 1 : 0;
767
769 minMaxInput[0] = -minMyGID_;
770 minMaxInput[1] = maxMyGID_;
772
773 GO minMaxOutput[3];
774 minMaxOutput[0] = 0;
775 minMaxOutput[1] = 0;
776 minMaxOutput[2] = 0;
778 minAllGID_ = -minMaxOutput[0];
779 maxAllGID_ = minMaxOutput[1];
781 distributed_ = (comm_->getSize() > 1 && globalDist == 1);
782 } else { // unsigned; use two reductions
783 // This is always correct, no matter the signedness of GO.
784 reduceAll<int, GO>(*comm_, REDUCE_MIN, minMyGID_, outArg(minAllGID_));
785 reduceAll<int, GO>(*comm_, REDUCE_MAX, maxMyGID_, outArg(maxAllGID_));
786 distributed_ = checkIsDist();
787 }
789 contiguous_ = false; // "Contiguous" is conservative.
790
792 minAllGID_ < indexBase_,
793 std::invalid_argument,
794 "Tpetra::Map constructor (noncontiguous): "
795 "Minimum global ID = "
796 << minAllGID_ << " over all process(es) is "
797 "less than the given indexBase = "
798 << indexBase_ << ".");
799
800 // Create the Directory on demand in getRemoteIndexList().
801 // setupDirectory ();
802}
803
804template <class LocalOrdinal, class GlobalOrdinal, class Node>
807 const GlobalOrdinal indexList[],
810 const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
811 : comm_(comm)
812 , uniform_(false)
813 , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
814 using std::endl;
815 const char funcName[] =
816 "Map(gblNumInds,indexList,indexListSize,indexBase,comm)";
817
818 const bool verbose = Details::Behavior::verbose("Map");
819 std::unique_ptr<std::string> prefix;
820 if (verbose) {
821 prefix = Details::createPrefix(
822 comm_.getRawPtr(), "Map", funcName);
823 std::ostringstream os;
824 os << *prefix << "Start" << endl;
825 std::cerr << os.str();
826 }
830 Impl::checkMapInputArray("(GST, const GO[], LO, GO, comm)",
831 indexList, static_cast<size_t>(indexListSize),
832 comm.getRawPtr());
833 // Not quite sure if I trust all code to behave correctly if the
834 // pointer is nonnull but the array length is nonzero, so I'll
835 // make sure the raw pointer is null if the length is zero.
836 const GlobalOrdinal* const indsRaw = indexListSize == 0 ? NULL : indexList;
837 Kokkos::View<const GlobalOrdinal*,
838 Kokkos::LayoutLeft,
839 Kokkos::HostSpace,
840 Kokkos::MemoryUnmanaged>
842 initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
843 indexBase, comm);
844 if (verbose) {
845 std::ostringstream os;
846 os << *prefix << "Done" << endl;
847 std::cerr << os.str();
848 }
849}
850
851template <class LocalOrdinal, class GlobalOrdinal, class Node>
854 const Teuchos::ArrayView<const GlobalOrdinal>& entryList,
856 const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
857 : comm_(comm)
858 , uniform_(false)
859 , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
860 using std::endl;
861 const char* funcName = "Map(gblNumInds,entryList(Teuchos::ArrayView),indexBase,comm)";
862
863 const bool verbose = Details::Behavior::verbose("Map");
864 std::unique_ptr<std::string> prefix;
865 if (verbose) {
866 prefix = Details::createPrefix(
867 comm_.getRawPtr(), "Map", funcName);
868 std::ostringstream os;
869 os << *prefix << "Start" << endl;
870 std::cerr << os.str();
871 }
875 const size_t numLclInds = static_cast<size_t>(entryList.size());
876 Impl::checkMapInputArray("(GST, ArrayView, GO, comm)",
877 entryList.getRawPtr(), numLclInds,
878 comm.getRawPtr());
879 // Not quite sure if I trust both ArrayView and View to behave
880 // correctly if the pointer is nonnull but the array length is
881 // nonzero, so I'll make sure it's null if the length is zero.
882 const GlobalOrdinal* const indsRaw =
883 numLclInds == 0 ? NULL : entryList.getRawPtr();
884 Kokkos::View<const GlobalOrdinal*,
885 Kokkos::LayoutLeft,
886 Kokkos::HostSpace,
887 Kokkos::MemoryUnmanaged>
889 initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
890 indexBase, comm);
891 if (verbose) {
892 std::ostringstream os;
893 os << *prefix << "Done" << endl;
894 std::cerr << os.str();
896}
897
898template <class LocalOrdinal, class GlobalOrdinal, class Node>
901 const Kokkos::View<const GlobalOrdinal*, device_type>& entryList,
903 const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
904 : comm_(comm)
905 , uniform_(false)
906 , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
907 using Kokkos::LayoutLeft;
908 using Kokkos::subview;
909 using Kokkos::View;
910 using Kokkos::view_alloc;
911 using Kokkos::WithoutInitializing;
912 using std::endl;
913 using Teuchos::arcp;
914 using Teuchos::ArrayView;
915 using Teuchos::as;
916 using Teuchos::broadcast;
917 using Teuchos::outArg;
918 using Teuchos::ptr;
919 using Teuchos::REDUCE_MAX;
920 using Teuchos::REDUCE_MIN;
921 using Teuchos::REDUCE_SUM;
922 using Teuchos::reduceAll;
923 using Teuchos::typeName;
924 using LO = local_ordinal_type;
925 using GO = global_ordinal_type;
927 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
928 const char funcName[] =
929 "Map(gblNumInds,entryList(Kokkos::View),indexBase,comm)";
930
931 const bool verbose = Details::Behavior::verbose("Map");
932 std::unique_ptr<std::string> prefix;
933 if (verbose) {
935 comm_.getRawPtr(), "Map", funcName);
936 std::ostringstream os;
937 os << *prefix << "Start" << endl;
938 std::cerr << os.str();
939 }
943 Impl::checkMapInputArray("(GST, Kokkos::View, GO, comm)",
944 entryList.data(),
945 static_cast<size_t>(entryList.extent(0)),
946 comm.getRawPtr());
947
948 // The user has specified the distribution of indices over the
949 // processes, via the input array of global indices on each
950 // process. The distribution is not necessarily contiguous or
951 // equally shared over the processes.
952
953 // The length of the input array on this process is the number of
954 // local indices to associate with this process, even though the
955 // input array contains global indices. We assume that the number
956 // of local indices on a process can be stored in a size_t;
957 // numLocalElements_ is a size_t, so this variable and that should
958 // have the same type.
959 const size_t numLocalElements(entryList.size());
960
961 initialNonuniformDebugCheck(funcName, numGlobalElements,
963
964 // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
965 // reduction is redundant, since the directory Map will have to do
966 // the same thing. Thus, we could do the scan and broadcast for
967 // the directory Map here, and give the computed offsets to the
968 // directory Map's constructor. However, a reduction costs less
969 // than a scan and broadcast, so this still saves time if users of
970 // this Map don't ever need the Directory (i.e., if they never
971 // call getRemoteIndexList on this Map).
972 if (numGlobalElements != GSTI) {
973 numGlobalElements_ = numGlobalElements; // Use the user's value.
974 } else { // The user wants us to compute the sum.
975 reduceAll(*comm, REDUCE_SUM,
976 static_cast<GST>(numLocalElements),
977 outArg(numGlobalElements_));
978 }
979
980 // mfh 20 Feb 2013: We've never quite done the right thing for
981 // duplicate GIDs here. Duplicate GIDs have always been counted
982 // distinctly in numLocalElements_, and thus should get a
983 // different LID. However, we've always used std::map or a hash
984 // table for the GID -> LID lookup table, so distinct GIDs always
985 // map to the same LID. Furthermore, the order of the input GID
986 // list matters, so it's not desirable to sort for determining
987 // uniqueness.
988 //
989 // I've chosen for now to write this code as if the input GID list
990 // contains no duplicates. If this is not desired, we could use
991 // the lookup table itself to determine uniqueness: If we haven't
992 // seen the GID before, it gets a new LID and it's added to the
993 // LID -> GID and GID -> LID tables. If we have seen the GID
994 // before, it doesn't get added to either table. I would
995 // implement this, but it would cost more to do the double lookups
996 // in the table (one to check, and one to insert).
997 //
998 // More importantly, since we build the GID -> LID table in (a
999 // thread-) parallel (way), the order in which duplicate GIDs may
1000 // get inserted is not defined. This would make the assignment of
1001 // LID to GID nondeterministic.
1002
1003 numLocalElements_ = numLocalElements;
1004 indexBase_ = indexBase;
1005
1006 minMyGID_ = indexBase_;
1007 maxMyGID_ = indexBase_;
1008
1009 // NOTE (mfh 27 May 2015): While finding the initial contiguous
1010 // GID range requires looking at all the GIDs in the range,
1011 // dismissing an interval of GIDs only requires looking at the
1012 // first and last GIDs. Thus, we could do binary search backwards
1013 // from the end in order to catch the common case of a contiguous
1014 // interval followed by noncontiguous entries. On the other hand,
1015 // we could just expose this case explicitly as yet another Map
1016 // constructor, and avoid the trouble of detecting it.
1017 if (numLocalElements_ > 0) {
1018 // Find contiguous GID range, with the restriction that the
1019 // beginning of the range starts with the first entry. While
1020 // doing so, fill in the LID -> GID table.
1021 typename decltype(lgMap_)::non_const_type lgMap(view_alloc("lgMap2", WithoutInitializing), numLocalElements_);
1022
1023 // Because you can't use lambdas in constructors on CUDA. Or using private/protected data.
1024 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
1025 Kokkos::deep_copy(typename device_type::execution_space(), lgMap, entryList);
1027 Impl::computeConstantsOnDevice(entryList, minMyGID_, maxMyGID_, firstContiguousGID_, lastContiguousGID_, lastContiguousGID_loc);
1029 auto nonContigGids = Kokkos::subview(entryList, std::pair<size_t, size_t>(firstNonContiguous_loc, entryList.extent(0)));
1030
1031 // NOTE: We do not fill the glMapHost_ and lgMapHost_ views here. They will be filled lazily later
1034
1035 // "Commit" the local-to-global lookup table we filled in above.
1036 lgMap_ = lgMap;
1037
1038 } else {
1039 minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
1040 maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
1041 // This insures tests for GIDs in the range
1042 // [firstContiguousGID_, lastContiguousGID_] fail for processes
1043 // with no local elements.
1044 firstContiguousGID_ = indexBase_ + 1;
1045 lastContiguousGID_ = indexBase_;
1046 // glMap_ was default constructed, so it's already empty.
1047 }
1048
1049 // Compute the min and max of all processes' GIDs. If
1050 // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
1051 // are both indexBase_. This is wrong, but fixing it would
1052 // require either a fancy sparse all-reduce, or a custom reduction
1053 // operator that ignores invalid values ("invalid" means
1054 // Tpetra::Details::OrdinalTraits<GO>::invalid()).
1055 //
1056 // Also, while we're at it, use the same all-reduce to figure out
1057 // if the Map is distributed. "Distributed" means that there is
1058 // at least one process with a number of local elements less than
1059 // the number of global elements.
1060 //
1061 // We're computing the min and max of all processes' GIDs using a
1062 // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
1063 // and y are signed). (This lets us combine the min and max into
1064 // a single all-reduce.) If each process sets localDist=1 if its
1065 // number of local elements is strictly less than the number of
1066 // global elements, and localDist=0 otherwise, then a MAX
1067 // all-reduce on localDist tells us if the Map is distributed (1
1068 // if yes, 0 if no). Thus, we can append localDist onto the end
1069 // of the data and get the global result from the all-reduce.
1070 if (std::numeric_limits<GO>::is_signed) {
1071 // Does my process NOT own all the elements?
1072 const GO localDist =
1073 (as<GST>(numLocalElements_) < numGlobalElements_) ? 1 : 0;
1074
1075 GO minMaxInput[3];
1076 minMaxInput[0] = -minMyGID_;
1077 minMaxInput[1] = maxMyGID_;
1078 minMaxInput[2] = localDist;
1079
1080 GO minMaxOutput[3];
1081 minMaxOutput[0] = 0;
1082 minMaxOutput[1] = 0;
1083 minMaxOutput[2] = 0;
1084 reduceAll<int, GO>(*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
1085 minAllGID_ = -minMaxOutput[0];
1086 maxAllGID_ = minMaxOutput[1];
1087 const GO globalDist = minMaxOutput[2];
1088 distributed_ = (comm_->getSize() > 1 && globalDist == 1);
1089 } else { // unsigned; use two reductions
1090 // This is always correct, no matter the signedness of GO.
1091 reduceAll<int, GO>(*comm_, REDUCE_MIN, minMyGID_, outArg(minAllGID_));
1092 reduceAll<int, GO>(*comm_, REDUCE_MAX, maxMyGID_, outArg(maxAllGID_));
1093 distributed_ = checkIsDist();
1094 }
1095
1096 contiguous_ = false; // "Contiguous" is conservative.
1097
1098 TEUCHOS_TEST_FOR_EXCEPTION(
1099 minAllGID_ < indexBase_,
1100 std::invalid_argument,
1101 "Tpetra::Map constructor (noncontiguous): "
1102 "Minimum global ID = "
1103 << minAllGID_ << " over all process(es) is "
1104 "less than the given indexBase = "
1105 << indexBase_ << ".");
1106
1107 // Create the Directory on demand in getRemoteIndexList().
1108 // setupDirectory ();
1109
1110 if (verbose) {
1111 std::ostringstream os;
1112 os << *prefix << "Done" << endl;
1113 std::cerr << os.str();
1114 }
1115}
1116
1117template <class LocalOrdinal, class GlobalOrdinal, class Node>
1119 if (!Kokkos::is_initialized()) {
1120 std::ostringstream os;
1121 os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1122 "Kokkos::finalize() has been called. This is user error! There are "
1123 "two likely causes: "
1124 << std::endl
1125 << " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1126 << std::endl
1127 << " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1128 "of a Tpetra::Map) at the same scope in main() as Kokkos::finalize() "
1129 "or Tpetra::finalize()."
1130 << std::endl
1131 << std::endl
1132 << "Don't do either of these! Please refer to GitHib Issue #2372."
1133 << std::endl;
1134 ::Tpetra::Details::printOnce(std::cerr, os.str(),
1135 this->getComm().getRawPtr());
1136 } else {
1137 using ::Tpetra::Details::mpiIsFinalized;
1138 using ::Tpetra::Details::mpiIsInitialized;
1139 using ::Tpetra::Details::teuchosCommIsAnMpiComm;
1140
1141 Teuchos::RCP<const Teuchos::Comm<int>> comm = this->getComm();
1142 if (!comm.is_null() && teuchosCommIsAnMpiComm(*comm) &&
1143 mpiIsInitialized() && mpiIsFinalized()) {
1144 // Tpetra itself does not require MPI, even if building with
1145 // MPI. It is legal to create Tpetra objects that do not use
1146 // MPI, even in an MPI program. However, calling Tpetra stuff
1147 // after MPI_Finalize() has been called is a bad idea, since
1148 // some Tpetra defaults may use MPI if available.
1149 std::ostringstream os;
1150 os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1151 "MPI_Finalize() has been called. This is user error! There are "
1152 "two likely causes: "
1153 << std::endl
1154 << " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1155 << std::endl
1156 << " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1157 "of a Tpetra::Map) at the same scope in main() as MPI_finalize() or "
1158 "Tpetra::finalize()."
1159 << std::endl
1160 << std::endl
1161 << "Don't do either of these! Please refer to GitHib Issue #2372."
1162 << std::endl;
1163 ::Tpetra::Details::printOnce(std::cerr, os.str(), comm.getRawPtr());
1164 }
1165 }
1166 // mfh 20 Mar 2018: We can't check Tpetra::isInitialized() yet,
1167 // because Tpetra does not yet require Tpetra::initialize /
1168 // Tpetra::finalize.
1169}
1170
1171template <class LocalOrdinal, class GlobalOrdinal, class Node>
1174 getComm().is_null(), std::logic_error,
1175 "Tpetra::Map::isOneToOne: "
1176 "getComm() returns null. Please report this bug to the Tpetra "
1177 "developers.");
1178
1179 // This is a collective operation, if it hasn't been called before.
1180 setupDirectory();
1181 return directory_->isOneToOne(*this);
1182}
1183
1184template <class LocalOrdinal, class GlobalOrdinal, class Node>
1188 if (isContiguous()) {
1189 if (globalIndex < getMinGlobalIndex() ||
1190 globalIndex > getMaxGlobalIndex()) {
1191 return Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid();
1192 }
1193 return static_cast<LocalOrdinal>(globalIndex - getMinGlobalIndex());
1194 } else if (globalIndex >= firstContiguousGID_ &&
1195 globalIndex <= lastContiguousGID_) {
1196 return static_cast<LocalOrdinal>(globalIndex - firstContiguousGID_);
1197 } else {
1198 // If the given global index is not in the table, this returns
1199 // the same value as OrdinalTraits<LocalOrdinal>::invalid().
1200 // glMapHost_ is Host and does not assume UVM
1201 lazyPushToHost();
1202 return glMapHost_.get(globalIndex);
1203 }
1204}
1205
1206template <class LocalOrdinal, class GlobalOrdinal, class Node>
1210 if (localIndex < getMinLocalIndex() || localIndex > getMaxLocalIndex()) {
1211 return Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid();
1212 }
1213 if (isContiguous()) {
1214 return getMinGlobalIndex() + localIndex;
1215 } else {
1216 // This is a host Kokkos::View access, with no RCP or ArrayRCP
1217 // involvement. As a result, it is thread safe.
1218 //
1219 // lgMapHost_ is a host pointer; this does NOT assume UVM.
1220 lazyPushToHost();
1221 return lgMapHost_[localIndex];
1222 }
1223}
1224
1225template <class LocalOrdinal, class GlobalOrdinal, class Node>
1227 const local_ordinal_type localIndices[], size_t numEntries, global_ordinal_type globalIndices[]) const {
1228 auto const minGI = getMinGlobalIndex();
1229 auto const minLI = getMinLocalIndex();
1230 auto const maxLI = getMaxLocalIndex();
1231 if (isContiguous()) {
1232 for (size_t i = 0; i < numEntries; i++) {
1233 auto lclInd = localIndices[i];
1235 return true;
1236 }
1237 globalIndices[i] = minGI + lclInd;
1238 }
1239 } else {
1240 // This is a host Kokkos::View access, with no RCP or ArrayRCP
1241 // involvement. As a result, it is thread safe.
1242 //
1243 // lgMapHost_ is a host pointer; this does NOT assume UVM.
1244 lazyPushToHost();
1245 for (size_t i = 0; i < numEntries; i++) {
1246 auto lclInd = localIndices[i];
1247 if (lclInd < minLI || lclInd > maxLI) {
1248 return true;
1249 }
1250 globalIndices[i] = lgMapHost_[lclInd];
1251 }
1252 }
1253 return false;
1254}
1255
1256template <class LocalOrdinal, class GlobalOrdinal, class Node>
1259 if (localIndex < getMinLocalIndex() || localIndex > getMaxLocalIndex()) {
1260 return false;
1261 } else {
1262 return true;
1263 }
1264}
1265
1266template <class LocalOrdinal, class GlobalOrdinal, class Node>
1269 return this->getLocalElement(globalIndex) !=
1270 Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid();
1271}
1272
1273template <class LocalOrdinal, class GlobalOrdinal, class Node>
1275 return uniform_;
1276}
1277
1278template <class LocalOrdinal, class GlobalOrdinal, class Node>
1280 return contiguous_;
1281}
1282
1283template <class LocalOrdinal, class GlobalOrdinal, class Node>
1286 getLocalMap() const {
1287 return local_map_type(glMap_, lgMap_, getIndexBase(),
1288 getMinGlobalIndex(), getMaxGlobalIndex(),
1289 firstContiguousGID_, lastContiguousGID_,
1290 getLocalNumElements(), isContiguous());
1291}
1292
1293template <class LocalOrdinal, class GlobalOrdinal, class Node>
1296 using Teuchos::outArg;
1297 using Teuchos::REDUCE_MIN;
1298 using Teuchos::reduceAll;
1299 //
1300 // Tests that avoid the Boolean all-reduce below by using
1301 // globally consistent quantities.
1302 //
1303 if (this == &map) {
1304 // Pointer equality on one process always implies pointer
1305 // equality on all processes, since Map is immutable.
1306 return true;
1307 } else if (getComm()->getSize() != map.getComm()->getSize()) {
1308 // The two communicators have different numbers of processes.
1309 // It's not correct to call isCompatible() in that case. This
1310 // may result in the all-reduce hanging below.
1311 return false;
1312 } else if (getGlobalNumElements() != map.getGlobalNumElements()) {
1313 // Two Maps are definitely NOT compatible if they have different
1314 // global numbers of indices.
1315 return false;
1316 } else if (isContiguous() && isUniform() &&
1317 map.isContiguous() && map.isUniform()) {
1318 // Contiguous uniform Maps with the same number of processes in
1319 // their communicators, and with the same global numbers of
1320 // indices, are always compatible.
1321 return true;
1322 } else if (!isContiguous() && !map.isContiguous() &&
1323 lgMap_.extent(0) != 0 && map.lgMap_.extent(0) != 0 &&
1324 lgMap_.data() == map.lgMap_.data()) {
1325 // Noncontiguous Maps whose global index lists are nonempty and
1326 // have the same pointer must be the same (and therefore
1327 // contiguous).
1328 //
1329 // Nonempty is important. For example, consider a communicator
1330 // with two processes, and two Maps that share this
1331 // communicator, with zero global indices on the first process,
1332 // and different nonzero numbers of global indices on the second
1333 // process. In that case, on the first process, the pointers
1334 // would both be NULL.
1335 return true;
1336 }
1337
1339 getGlobalNumElements() != map.getGlobalNumElements(), std::logic_error,
1340 "Tpetra::Map::isCompatible: There's a bug in this method. We've already "
1341 "checked that this condition is true above, but it's false here. "
1342 "Please report this bug to the Tpetra developers.");
1343
1344 // Do both Maps have the same number of indices on each process?
1345 const int locallyCompat =
1346 (getLocalNumElements() == map.getLocalNumElements()) ? 1 : 0;
1347
1348 int globallyCompat = 0;
1350 return (globallyCompat == 1);
1351}
1352
1353template <class LocalOrdinal, class GlobalOrdinal, class Node>
1356 using Teuchos::ArrayView;
1357 using GO = global_ordinal_type;
1358 using size_type = typename ArrayView<const GO>::size_type;
1359
1360 // If both Maps are contiguous, we can compare their GID ranges
1361 // easily by looking at the min and max GID on this process.
1362 // Otherwise, we'll compare their GID lists. If only one Map is
1363 // contiguous, then we only have to call getLocalElementList() on
1364 // the noncontiguous Map. (It's best to avoid calling it on a
1365 // contiguous Map, since it results in unnecessary storage that
1366 // persists for the lifetime of the Map.)
1367
1368 if (this == &map) {
1369 // Pointer equality on one process always implies pointer
1370 // equality on all processes, since Map is immutable.
1371 return true;
1372 } else if (getLocalNumElements() != map.getLocalNumElements()) {
1373 return false;
1374 } else if (getMinGlobalIndex() != map.getMinGlobalIndex() ||
1375 getMaxGlobalIndex() != map.getMaxGlobalIndex()) {
1376 return false;
1377 } else {
1378 if (isContiguous()) {
1379 if (map.isContiguous()) {
1380 return true; // min and max match, so the ranges match.
1381 } else { // *this is contiguous, but map is not contiguous
1383 !this->isContiguous() || map.isContiguous(), std::logic_error,
1384 "Tpetra::Map::locallySameAs: BUG");
1385 ArrayView<const GO> rhsElts = map.getLocalElementList();
1386 const GO minLhsGid = this->getMinGlobalIndex();
1387 const size_type numRhsElts = rhsElts.size();
1388 for (size_type k = 0; k < numRhsElts; ++k) {
1389 const GO curLhsGid = minLhsGid + static_cast<GO>(k);
1390 if (curLhsGid != rhsElts[k]) {
1391 return false; // stop on first mismatch
1392 }
1393 }
1394 return true;
1395 }
1396 } else if (map.isContiguous()) { // *this is not contiguous, but map is
1398 this->isContiguous() || !map.isContiguous(), std::logic_error,
1399 "Tpetra::Map::locallySameAs: BUG");
1400 ArrayView<const GO> lhsElts = this->getLocalElementList();
1401 const GO minRhsGid = map.getMinGlobalIndex();
1402 const size_type numLhsElts = lhsElts.size();
1403 for (size_type k = 0; k < numLhsElts; ++k) {
1404 const GO curRhsGid = minRhsGid + static_cast<GO>(k);
1405 if (curRhsGid != lhsElts[k]) {
1406 return false; // stop on first mismatch
1407 }
1408 }
1409 return true;
1410 } else if (this->lgMap_.data() == map.lgMap_.data()) {
1411 // Pointers to LID->GID "map" (actually just an array) are the
1412 // same, and the number of GIDs are the same.
1413 return this->getLocalNumElements() == map.getLocalNumElements();
1414 } else { // we actually have to compare the GIDs
1415 if (this->getLocalNumElements() != map.getLocalNumElements()) {
1416 return false; // We already checked above, but check just in case
1417 } else {
1418 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename node_type::execution_space>;
1419
1420 auto lhsLclMap = getLocalMap();
1421 auto rhsLclMap = map.getLocalMap();
1422
1424 Kokkos::parallel_reduce(
1425 "Tpetra::Map::locallySameAs",
1426 range_type(0, this->getLocalNumElements()),
1428 if (lhsLclMap.getGlobalElement(lid) != rhsLclMap.getGlobalElement(lid))
1429 ++numMismatches;
1430 },
1432
1433 return (numMismatchedElements == 0);
1434 }
1435 }
1436 }
1437}
1438
1439template <class LocalOrdinal, class GlobalOrdinal, class Node>
1442 if (this == &map)
1443 return true;
1444
1445 // We are going to check if lmap1 is fitted into lmap2:
1446 // Is lmap1 (map) a subset of lmap2 (this)?
1447 // And do the first lmap1.getLocalNumElements() global elements
1448 // of lmap1,lmap2 owned on each process exactly match?
1449 auto lmap1 = map.getLocalMap();
1450 auto lmap2 = this->getLocalMap();
1451
1452 auto numLocalElements1 = lmap1.getLocalNumElements();
1453 auto numLocalElements2 = lmap2.getLocalNumElements();
1454
1456 // There are more indices in the first map on this process than in second map.
1457 return false;
1458 }
1459
1460 if (lmap1.isContiguous() && lmap2.isContiguous()) {
1461 // When both Maps are contiguous, just check the interval inclusion.
1462 return ((lmap1.getMinGlobalIndex() == lmap2.getMinGlobalIndex()) &&
1463 (lmap1.getMaxGlobalIndex() <= lmap2.getMaxGlobalIndex()));
1464 }
1465
1466 if (lmap1.getMinGlobalIndex() < lmap2.getMinGlobalIndex() ||
1467 lmap1.getMaxGlobalIndex() > lmap2.getMaxGlobalIndex()) {
1468 // The second map does not include the first map bounds, and thus some of
1469 // the first map global indices are not in the second map.
1470 return false;
1471 }
1472
1473 using LO = local_ordinal_type;
1474 using range_type =
1475 Kokkos::RangePolicy<LO, typename node_type::execution_space>;
1476
1477 // Check all elements.
1478 LO numDiff = 0;
1479 Kokkos::parallel_reduce(
1480 "isLocallyFitted",
1481 range_type(0, numLocalElements1),
1482 KOKKOS_LAMBDA(const LO i, LO& diff) {
1483 diff += (lmap1.getGlobalElement(i) != lmap2.getGlobalElement(i));
1484 },
1485 numDiff);
1486
1487 return (numDiff == 0);
1488}
1489
1490template <class LocalOrdinal, class GlobalOrdinal, class Node>
1493 using Teuchos::outArg;
1494 using Teuchos::REDUCE_MIN;
1495 using Teuchos::reduceAll;
1496 //
1497 // Tests that avoid the Boolean all-reduce below by using
1498 // globally consistent quantities.
1499 //
1500 if (this == &map) {
1501 // Pointer equality on one process always implies pointer
1502 // equality on all processes, since Map is immutable.
1503 return true;
1504 } else if (getComm()->getSize() != map.getComm()->getSize()) {
1505 // The two communicators have different numbers of processes.
1506 // It's not correct to call isSameAs() in that case. This
1507 // may result in the all-reduce hanging below.
1508 return false;
1509 } else if (getGlobalNumElements() != map.getGlobalNumElements()) {
1510 // Two Maps are definitely NOT the same if they have different
1511 // global numbers of indices.
1512 return false;
1513 } else if (getMinAllGlobalIndex() != map.getMinAllGlobalIndex() ||
1514 getMaxAllGlobalIndex() != map.getMaxAllGlobalIndex() ||
1515 getIndexBase() != map.getIndexBase()) {
1516 // If the global min or max global index doesn't match, or if
1517 // the index base doesn't match, then the Maps aren't the same.
1518 return false;
1519 } else if (isDistributed() != map.isDistributed()) {
1520 // One Map is distributed and the other is not, which means that
1521 // the Maps aren't the same.
1522 return false;
1523 } else if (isContiguous() && isUniform() &&
1524 map.isContiguous() && map.isUniform()) {
1525 // Contiguous uniform Maps with the same number of processes in
1526 // their communicators, with the same global numbers of indices,
1527 // and with matching index bases and ranges, must be the same.
1528 return true;
1529 }
1530
1531 // The two communicators must have the same number of processes,
1532 // with process ranks occurring in the same order. This uses
1533 // MPI_COMM_COMPARE. The MPI 3.1 standard (Section 6.4) says:
1534 // "Operations that access communicators are local and their
1535 // execution does not require interprocess communication."
1536 // However, just to be sure, I'll put this call after the above
1537 // tests that don't communicate.
1538 if (!::Tpetra::Details::congruent(*comm_, *(map.getComm()))) {
1539 return false;
1540 }
1541
1542 // If we get this far, we need to check local properties and then
1543 // communicate local sameness across all processes.
1544 const int isSame_lcl = locallySameAs(map) ? 1 : 0;
1545
1546 // Return true if and only if all processes report local sameness.
1547 int isSame_gbl = 0;
1549 return isSame_gbl == 1;
1550}
1551
1552namespace { // (anonymous)
1553template <class LO, class GO, class DT>
1554class FillLgMap {
1555 public:
1556 FillLgMap(const Kokkos::View<GO*, DT>& lgMap,
1557 const GO startGid)
1558 : lgMap_(lgMap)
1559 , startGid_(startGid) {
1560 Kokkos::RangePolicy<LO, typename DT::execution_space>
1561 range(static_cast<LO>(0), static_cast<LO>(lgMap.size()));
1562 Kokkos::parallel_for(range, *this);
1563 }
1564
1565 KOKKOS_INLINE_FUNCTION void operator()(const LO& lid) const {
1566 lgMap_(lid) = startGid_ + static_cast<GO>(lid);
1567 }
1568
1569 private:
1570 const Kokkos::View<GO*, DT> lgMap_;
1571 const GO startGid_;
1572};
1573
1574} // namespace
1575
1576template <class LocalOrdinal, class GlobalOrdinal, class Node>
1577typename Map<LocalOrdinal, GlobalOrdinal, Node>::global_indices_array_type
1579 using std::endl;
1580 using LO = local_ordinal_type;
1581 using GO = global_ordinal_type;
1582 using const_lg_view_type = decltype(lgMap_);
1583 using lg_view_type = typename const_lg_view_type::non_const_type;
1584 const bool debug = Details::Behavior::debug("Map");
1585 const bool verbose = Details::Behavior::verbose("Map");
1586
1587 std::unique_ptr<std::string> prefix;
1588 if (verbose) {
1590 comm_.getRawPtr(), "Map", "getMyGlobalIndices");
1591 std::ostringstream os;
1592 os << *prefix << "Start" << endl;
1593 std::cerr << os.str();
1594 }
1595
1596 // If the local-to-global mapping doesn't exist yet, and if we
1597 // have local entries, then create and fill the local-to-global
1598 // mapping.
1600 lgMap_.extent(0) == 0 && numLocalElements_ > 0;
1601
1603 if (verbose) {
1604 std::ostringstream os;
1605 os << *prefix << "Need to create lgMap" << endl;
1606 std::cerr << os.str();
1607 }
1608 if (debug) {
1609 // The local-to-global mapping should have been set up already
1610 // for a noncontiguous map.
1611 TEUCHOS_TEST_FOR_EXCEPTION(!isContiguous(), std::logic_error,
1612 "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1613 "mapping (lgMap_) should have been set up already for a "
1614 "noncontiguous Map. Please report this bug to the Tpetra "
1615 "developers.");
1616 }
1617 const LO numElts = static_cast<LO>(getLocalNumElements());
1618
1619 using Kokkos::view_alloc;
1620 using Kokkos::WithoutInitializing;
1621 lg_view_type lgMap("lgMap3", numElts);
1622 if (verbose) {
1623 std::ostringstream os;
1624 os << *prefix << "Fill lgMap" << endl;
1625 std::cerr << os.str();
1626 }
1628
1629 if (verbose) {
1630 std::ostringstream os;
1631 os << *prefix << "Copy lgMap to lgMapHost" << endl;
1632 std::cerr << os.str();
1633 }
1634
1635 auto lgMapHost = Kokkos::create_mirror_view(Kokkos::HostSpace(), lgMap);
1636 // DEEP_COPY REVIEW - DEVICE-TO-HOST
1638 Kokkos::deep_copy(exec_instance, lgMapHost, lgMap);
1639
1640 // There's a non-trivial chance we'll grab this on the host,
1641 // so let's make sure the copy finishes
1642 exec_instance.fence();
1643
1644 // "Commit" the local-to-global lookup table we filled in above.
1645 lgMap_ = lgMap;
1646 lgMapHost_ = lgMapHost;
1647 } else {
1648 lazyPushToHost();
1649 }
1650
1651 if (verbose) {
1652 std::ostringstream os;
1653 os << *prefix << "Done" << endl;
1654 std::cerr << os.str();
1655 }
1656 return lgMapHost_;
1657}
1658
1659template <class LocalOrdinal, class GlobalOrdinal, class Node>
1660typename Map<LocalOrdinal, GlobalOrdinal, Node>::global_indices_array_device_type
1662 using std::endl;
1663 using LO = local_ordinal_type;
1664 using GO = global_ordinal_type;
1665 using const_lg_view_type = decltype(lgMap_);
1666 using lg_view_type = typename const_lg_view_type::non_const_type;
1667 const bool debug = Details::Behavior::debug("Map");
1668 const bool verbose = Details::Behavior::verbose("Map");
1669
1670 std::unique_ptr<std::string> prefix;
1671 if (verbose) {
1673 comm_.getRawPtr(), "Map", "getMyGlobalIndicesDevice");
1674 std::ostringstream os;
1675 os << *prefix << "Start" << endl;
1676 std::cerr << os.str();
1677 }
1678
1679 // If the local-to-global mapping doesn't exist yet, and if we
1680 // have local entries, then create and fill the local-to-global
1681 // mapping.
1683 lgMap_.extent(0) == 0 && numLocalElements_ > 0;
1684
1686 if (verbose) {
1687 std::ostringstream os;
1688 os << *prefix << "Need to create lgMap" << endl;
1689 std::cerr << os.str();
1690 }
1691 if (debug) {
1692 // The local-to-global mapping should have been set up already
1693 // for a noncontiguous map.
1694 TEUCHOS_TEST_FOR_EXCEPTION(!isContiguous(), std::logic_error,
1695 "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1696 "mapping (lgMap_) should have been set up already for a "
1697 "noncontiguous Map. Please report this bug to the Tpetra "
1698 "developers.");
1699 }
1700 const LO numElts = static_cast<LO>(getLocalNumElements());
1701
1702 using Kokkos::view_alloc;
1703 using Kokkos::WithoutInitializing;
1704 lg_view_type lgMap("lgMap4", numElts);
1705 if (verbose) {
1706 std::ostringstream os;
1707 os << *prefix << "Fill lgMap" << endl;
1708 std::cerr << os.str();
1709 }
1711
1712 // "Commit" the local-to-global lookup table we filled in above.
1713 lgMap_ = lgMap;
1714 }
1715
1716 if (verbose) {
1717 std::ostringstream os;
1718 os << *prefix << "Done" << endl;
1719 std::cerr << os.str();
1720 }
1721 return lgMap_;
1722}
1723
1724template <class LocalOrdinal, class GlobalOrdinal, class Node>
1725Teuchos::ArrayView<const GlobalOrdinal>
1727 using GO = global_ordinal_type;
1728
1729 // If the local-to-global mapping doesn't exist yet, and if we
1730 // have local entries, then create and fill the local-to-global
1731 // mapping.
1732 (void)this->getMyGlobalIndices();
1733
1734 // This does NOT assume UVM; lgMapHost_ is a host pointer.
1735 lazyPushToHost();
1736 const GO* lgMapHostRawPtr = lgMapHost_.data();
1737 // The third argument forces ArrayView not to try to track memory
1738 // in a debug build. We have to use it because the memory does
1739 // not belong to a Teuchos memory management class.
1740 return Teuchos::ArrayView<const GO>(
1742 lgMapHost_.extent(0),
1743 Teuchos::RCP_DISABLE_NODE_LOOKUP);
1744}
1745
1746template <class LocalOrdinal, class GlobalOrdinal, class Node>
1748 return distributed_;
1749}
1750
1751template <class LocalOrdinal, class GlobalOrdinal, class Node>
1753 using Teuchos::TypeNameTraits;
1754 std::ostringstream os;
1755
1756 os << "Tpetra::Map: {"
1757 << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name()
1758 << ", GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name()
1759 << ", NodeType: " << TypeNameTraits<Node>::name();
1760 if (this->getObjectLabel() != "") {
1761 os << ", Label: \"" << this->getObjectLabel() << "\"";
1762 }
1763 os << ", Global number of entries: " << getGlobalNumElements()
1764 << ", Number of processes: " << getComm()->getSize()
1765 << ", Uniform: " << (isUniform() ? "true" : "false")
1766 << ", Contiguous: " << (isContiguous() ? "true" : "false")
1767 << ", Distributed: " << (isDistributed() ? "true" : "false")
1768 << "}";
1769 return os.str();
1770}
1771
1776template <class LocalOrdinal, class GlobalOrdinal, class Node>
1777std::string
1779 localDescribeToString(const Teuchos::EVerbosityLevel vl) const {
1780 using LO = local_ordinal_type;
1781 using std::endl;
1782
1783 // This preserves current behavior of Map.
1784 if (vl < Teuchos::VERB_HIGH) {
1785 return std::string();
1786 }
1787 auto outStringP = Teuchos::rcp(new std::ostringstream());
1788 Teuchos::RCP<Teuchos::FancyOStream> outp =
1789 Teuchos::getFancyOStream(outStringP);
1790 Teuchos::FancyOStream& out = *outp;
1791
1792 auto comm = this->getComm();
1793 const int myRank = comm->getRank();
1794 const int numProcs = comm->getSize();
1795 out << "Process " << myRank << " of " << numProcs << ":" << endl;
1796 Teuchos::OSTab tab1(out);
1797
1798 const LO numEnt = static_cast<LO>(this->getLocalNumElements());
1799 out << "My number of entries: " << numEnt << endl
1800 << "My minimum global index: " << this->getMinGlobalIndex() << endl
1801 << "My maximum global index: " << this->getMaxGlobalIndex() << endl;
1802
1803 if (vl == Teuchos::VERB_EXTREME) {
1804 out << "My global indices: [";
1805 const LO minLclInd = this->getMinLocalIndex();
1806 for (LO k = 0; k < numEnt; ++k) {
1807 out << minLclInd + this->getGlobalElement(k);
1808 if (k + 1 < numEnt) {
1809 out << ", ";
1810 }
1811 }
1812 out << "]" << endl;
1813 }
1814
1815 out.flush(); // make sure the ostringstream got everything
1816 return outStringP->str();
1817}
1818
1819template <class LocalOrdinal, class GlobalOrdinal, class Node>
1821 describe(Teuchos::FancyOStream& out,
1822 const Teuchos::EVerbosityLevel verbLevel) const {
1823 using std::endl;
1824 using Teuchos::TypeNameTraits;
1825 using Teuchos::VERB_DEFAULT;
1826 using Teuchos::VERB_HIGH;
1827 using Teuchos::VERB_LOW;
1828 using Teuchos::VERB_NONE;
1829 using LO = local_ordinal_type;
1830 using GO = global_ordinal_type;
1831 const Teuchos::EVerbosityLevel vl =
1833
1834 if (vl == VERB_NONE) {
1835 return; // don't print anything
1836 }
1837 // If this Map's Comm is null, then the Map does not participate
1838 // in collective operations with the other processes. In that
1839 // case, it is not even legal to call this method. The reasonable
1840 // thing to do in that case is nothing.
1841 auto comm = this->getComm();
1842 if (comm.is_null()) {
1843 return;
1844 }
1845 const int myRank = comm->getRank();
1846 const int numProcs = comm->getSize();
1847
1848 // Only Process 0 should touch the output stream, but this method
1849 // in general may need to do communication. Thus, we may need to
1850 // preserve the current tab level across multiple "if (myRank ==
1851 // 0) { ... }" inner scopes. This is why we sometimes create
1852 // OSTab instances by pointer, instead of by value. We only need
1853 // to create them by pointer if the tab level must persist through
1854 // multiple inner scopes.
1855 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
1856
1857 if (myRank == 0) {
1858 // At every verbosity level but VERB_NONE, Process 0 prints.
1859 // By convention, describe() always begins with a tab before
1860 // printing.
1861 tab0 = Teuchos::rcp(new Teuchos::OSTab(out));
1862 out << "\"Tpetra::Map\":" << endl;
1863 tab1 = Teuchos::rcp(new Teuchos::OSTab(out));
1864 {
1865 out << "Template parameters:" << endl;
1866 Teuchos::OSTab tab2(out);
1867 out << "LocalOrdinal: " << TypeNameTraits<LO>::name() << endl
1868 << "GlobalOrdinal: " << TypeNameTraits<GO>::name() << endl
1869 << "Node: " << TypeNameTraits<Node>::name() << endl;
1870 }
1871 const std::string label = this->getObjectLabel();
1872 if (label != "") {
1873 out << "Label: \"" << label << "\"" << endl;
1874 }
1875 out << "Global number of entries: " << getGlobalNumElements() << endl
1876 << "Minimum global index: " << getMinAllGlobalIndex() << endl
1877 << "Maximum global index: " << getMaxAllGlobalIndex() << endl
1878 << "Index base: " << getIndexBase() << endl
1879 << "Number of processes: " << numProcs << endl
1880 << "Uniform: " << (isUniform() ? "true" : "false") << endl
1881 << "Contiguous: " << (isContiguous() ? "true" : "false") << endl
1882 << "Distributed: " << (isDistributed() ? "true" : "false") << endl;
1883 }
1884
1885 // This is collective over the Map's communicator.
1886 if (vl >= VERB_HIGH) { // VERB_HIGH or VERB_EXTREME
1887 const std::string lclStr = this->localDescribeToString(vl);
1889 }
1890}
1891
1892template <class LocalOrdinal, class GlobalOrdinal, class Node>
1893Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>>
1895 replaceCommWithSubset(const Teuchos::RCP<const Teuchos::Comm<int>>& newComm) const {
1896 using Teuchos::RCP;
1897 using Teuchos::rcp;
1898 using GST = global_size_t;
1899 using LO = local_ordinal_type;
1900 using GO = global_ordinal_type;
1901 using map_type = Map<LO, GO, Node>;
1902
1903 // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
1904 // the Map by calling its ordinary public constructor, using the
1905 // original Map's data. This only involves O(1) all-reduces over
1906 // the new communicator, which in the common case only includes a
1907 // small number of processes.
1908
1909 // Create the Map to return.
1910 if (newComm.is_null() || newComm->getSize() < 1) {
1911 return Teuchos::null; // my process does not participate in the new Map
1912 } else if (newComm->getSize() == 1) {
1913 lazyPushToHost();
1914
1915 // The case where the new communicator has only one process is
1916 // easy. We don't have to communicate to get all the
1917 // information we need. Use the default comm to create the new
1918 // Map, then fill in all the fields directly.
1919 RCP<map_type> newMap(new map_type());
1920
1921 newMap->comm_ = newComm;
1922 // mfh 07 Oct 2016: Preserve original behavior, even though the
1923 // original index base may no longer be the globally min global
1924 // index. See #616 for why this doesn't matter so much anymore.
1925 newMap->indexBase_ = this->indexBase_;
1926 newMap->numGlobalElements_ = this->numLocalElements_;
1927 newMap->numLocalElements_ = this->numLocalElements_;
1928 newMap->minMyGID_ = this->minMyGID_;
1929 newMap->maxMyGID_ = this->maxMyGID_;
1930 newMap->minAllGID_ = this->minMyGID_;
1931 newMap->maxAllGID_ = this->maxMyGID_;
1932 newMap->firstContiguousGID_ = this->firstContiguousGID_;
1933 newMap->lastContiguousGID_ = this->lastContiguousGID_;
1934 // Since the new communicator has only one process, neither
1935 // uniformity nor contiguity have changed.
1936 newMap->uniform_ = this->uniform_;
1937 newMap->contiguous_ = this->contiguous_;
1938 // The new communicator only has one process, so the new Map is
1939 // not distributed.
1940 newMap->distributed_ = false;
1941 newMap->lgMap_ = this->lgMap_;
1942 newMap->lgMapHost_ = this->lgMapHost_;
1943 newMap->glMap_ = this->glMap_;
1944 newMap->glMapHost_ = this->glMapHost_;
1945 // It's OK not to initialize the new Map's Directory.
1946 // This is initialized lazily, on first call to getRemoteIndexList.
1947
1948 return newMap;
1949 } else { // newComm->getSize() != 1
1950 // Even if the original Map is contiguous, the new Map might not
1951 // be, especially if the excluded processes have ranks != 0 or
1952 // newComm->getSize()-1. The common case for this method is to
1953 // exclude many (possibly even all but one) processes, so it
1954 // likely doesn't pay to do the global communication (over the
1955 // original communicator) to figure out whether we can optimize
1956 // the result Map. Thus, we just set up the result Map as
1957 // noncontiguous.
1958 //
1959 // TODO (mfh 07 Oct 2016) We don't actually need to reconstruct
1960 // the global-to-local table, etc. Optimize this code path to
1961 // avoid unnecessary local work.
1962
1963 // Make Map (re)compute the global number of elements.
1964 const GST RECOMPUTE = Tpetra::Details::OrdinalTraits<GST>::invalid();
1965 // TODO (mfh 07 Oct 2016) If we use any Map constructor, we have
1966 // to use the noncontiguous Map constructor, since the new Map
1967 // might not be contiguous. Even if the old Map was contiguous,
1968 // some process in the "middle" might have been excluded. If we
1969 // want to avoid local work, we either have to do the setup by
1970 // hand, or write a new Map constructor.
1971#if 1
1972 // The disabled code here throws the following exception in
1973 // Map's replaceCommWithSubset test:
1974 //
1975 // Throw test that evaluated to true: static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::ArithTraits<ValueType>::max ())
1976 // 10:
1977 // 10: Tpetra::Details::FixedHashTable: The number of keys -3 is greater than the maximum representable ValueType value 2147483647. This means that it is not possible to use this constructor.
1978 // 10: Process 3: origComm->replaceCommWithSubset(subsetComm) threw an exception: /scratch/prj/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Details_FixedHashTable_def.hpp:1044:
1979
1980 auto lgMap = this->getMyGlobalIndices();
1981 using size_type =
1982 typename std::decay<decltype(lgMap.extent(0))>::type;
1983 const size_type lclNumInds =
1984 static_cast<size_type>(this->getLocalNumElements());
1985 using Teuchos::TypeNameTraits;
1986 TEUCHOS_TEST_FOR_EXCEPTION(lgMap.extent(0) != lclNumInds, std::logic_error,
1987 "Tpetra::Map::replaceCommWithSubset: Result of getMyGlobalIndices() "
1988 "has length "
1989 << lgMap.extent(0) << " (of type " << TypeNameTraits<size_type>::name() << ") != this->getLocalNumElements()"
1990 " = "
1991 << this->getLocalNumElements() << ". The latter, upon being "
1992 "cast to size_type = "
1994 "becomes "
1995 << lclNumInds << ". Please report this bug to the Tpetra "
1996 "developers.");
1997#else
1998 Teuchos::ArrayView<const GO> lgMap = this->getLocalElementList();
1999#endif // 1
2000
2001 const GO indexBase = this->getIndexBase();
2002 // map stores HostSpace of CudaSpace but constructor is still CudaUVMSpace
2003 auto lgMap_device = Kokkos::create_mirror_view_and_copy(device_type(), lgMap);
2004 return rcp(new map_type(RECOMPUTE, lgMap_device, indexBase, newComm));
2005 }
2006}
2007
2008template <class LocalOrdinal, class GlobalOrdinal, class Node>
2009Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>>
2011 removeEmptyProcesses() const {
2012 using Teuchos::Comm;
2013 using Teuchos::null;
2014 using Teuchos::outArg;
2015 using Teuchos::RCP;
2016 using Teuchos::rcp;
2017 using Teuchos::REDUCE_MIN;
2018 using Teuchos::reduceAll;
2019
2020 // Create the new communicator. split() returns a valid
2021 // communicator on all processes. On processes where color == 0,
2022 // ignore the result. Passing key == 0 tells MPI to order the
2023 // processes in the new communicator by their rank in the old
2024 // communicator.
2025 const int color = (numLocalElements_ == 0) ? 0 : 1;
2026 // MPI_Comm_split must be called collectively over the original
2027 // communicator. We can't just call it on processes with color
2028 // one, even though we will ignore its result on processes with
2029 // color zero.
2030 RCP<const Comm<int>> newComm = comm_->split(color, 0);
2031 if (color == 0) {
2032 newComm = null;
2033 }
2034
2035 // Create the Map to return.
2036 if (newComm.is_null()) {
2037 return null; // my process does not participate in the new Map
2038 } else {
2039 RCP<Map> map = rcp(new Map());
2040
2041 map->comm_ = newComm;
2042 map->indexBase_ = indexBase_;
2043 map->numGlobalElements_ = numGlobalElements_;
2044 map->numLocalElements_ = numLocalElements_;
2045 map->minMyGID_ = minMyGID_;
2046 map->maxMyGID_ = maxMyGID_;
2047 map->minAllGID_ = minAllGID_;
2048 map->maxAllGID_ = maxAllGID_;
2049 map->firstContiguousGID_ = firstContiguousGID_;
2050 map->lastContiguousGID_ = lastContiguousGID_;
2051
2052 // Uniformity and contiguity have not changed. The directory
2053 // has changed, but we've taken care of that above.
2054 map->uniform_ = uniform_;
2055 map->contiguous_ = contiguous_;
2056
2057 // If the original Map was NOT distributed, then the new Map
2058 // cannot be distributed.
2059 //
2060 // If the number of processes in the new communicator is 1, then
2061 // the new Map is not distributed.
2062 //
2063 // Otherwise, we have to check the new Map using an all-reduce
2064 // (over the new communicator). For example, the original Map
2065 // may have had some processes with zero elements, and all other
2066 // processes with the same number of elements as in the whole
2067 // Map. That Map is technically distributed, because of the
2068 // processes with zero elements. Removing those processes would
2069 // make the new Map locally replicated.
2070 if (!distributed_ || newComm->getSize() == 1) {
2071 map->distributed_ = false;
2072 } else {
2073 const int iOwnAllGids = (numLocalElements_ == numGlobalElements_) ? 1 : 0;
2074 int allProcsOwnAllGids = 0;
2076 map->distributed_ = (allProcsOwnAllGids == 1) ? false : true;
2077 }
2078
2079 map->lgMap_ = lgMap_;
2080 map->lgMapHost_ = lgMapHost_;
2081 map->glMap_ = glMap_;
2082 map->glMapHost_ = glMapHost_;
2083
2084 // Map's default constructor creates an uninitialized Directory.
2085 // The Directory will be initialized on demand in
2086 // getRemoteIndexList().
2087 //
2088 // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the
2089 // directory more efficiently than just recreating it. If
2090 // directory recreation proves a bottleneck, we can always
2091 // revisit this. On the other hand, Directory creation is only
2092 // collective over the new, presumably much smaller
2093 // communicator, so it may not be worth the effort to optimize.
2094
2095 return map;
2096 }
2097}
2098
2099template <class LocalOrdinal, class GlobalOrdinal, class Node>
2102 directory_.is_null(), std::logic_error,
2103 "Tpetra::Map::setupDirectory: "
2104 "The Directory is null. "
2105 "Please report this bug to the Tpetra developers.");
2106
2107 // Only create the Directory if it hasn't been created yet.
2108 // This is a collective operation.
2109 if (!directory_->initialized()) {
2110 directory_->initialize(*this);
2111 }
2112}
2113
2114template <class LocalOrdinal, class GlobalOrdinal, class Node>
2117 getRemoteIndexList(const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
2118 const Teuchos::ArrayView<int>& PIDs,
2119 const Teuchos::ArrayView<LocalOrdinal>& LIDs) const {
2121 using std::endl;
2122 using Tpetra::Details::OrdinalTraits;
2123 using size_type = Teuchos::ArrayView<int>::size_type;
2124
2125 const bool verbose = Details::Behavior::verbose("Map");
2127 std::unique_ptr<std::string> prefix;
2128 if (verbose) {
2129 prefix = Details::createPrefix(comm_.getRawPtr(),
2130 "Map", "getRemoteIndexList(GIDs,PIDs,LIDs)");
2131 std::ostringstream os;
2132 os << *prefix << "Start: ";
2133 verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2134 os << endl;
2135 std::cerr << os.str();
2136 }
2137
2138 // Empty Maps (i.e., containing no indices on any processes in the
2139 // Map's communicator) are perfectly valid. In that case, if the
2140 // input GID list is nonempty, we fill the output arrays with
2141 // invalid values, and return IDNotPresent to notify the caller.
2142 // It's perfectly valid to give getRemoteIndexList GIDs that the
2143 // Map doesn't own. SubmapImport test 2 needs this functionality.
2144 if (getGlobalNumElements() == 0) {
2145 if (GIDs.size() == 0) {
2146 if (verbose) {
2147 std::ostringstream os;
2148 os << *prefix << "Done; both Map & input are empty" << endl;
2149 std::cerr << os.str();
2150 }
2151 return AllIDsPresent; // trivially
2152 } else {
2153 if (verbose) {
2154 std::ostringstream os;
2155 os << *prefix << "Done: Map is empty on all processes, "
2156 "so all output PIDs & LIDs are invalid (-1)."
2157 << endl;
2158 std::cerr << os.str();
2159 }
2160 for (size_type k = 0; k < PIDs.size(); ++k) {
2162 }
2163 for (size_type k = 0; k < LIDs.size(); ++k) {
2165 }
2166 return IDNotPresent;
2167 }
2168 }
2169
2170 // getRemoteIndexList must be called collectively, and Directory
2171 // initialization is collective too, so it's OK to initialize the
2172 // Directory on demand.
2173
2174 if (verbose) {
2175 std::ostringstream os;
2176 os << *prefix << "Call setupDirectory" << endl;
2177 std::cerr << os.str();
2178 }
2179 setupDirectory();
2180 if (verbose) {
2181 std::ostringstream os;
2182 os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2183 std::cerr << os.str();
2184 }
2186 directory_->getDirectoryEntries(*this, GIDs, PIDs, LIDs);
2187 if (verbose) {
2188 std::ostringstream os;
2189 os << *prefix << "Done; getDirectoryEntries returned "
2190 << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2191 << "; ";
2192 verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2193 os << ", ";
2194 verbosePrintArray(os, LIDs, "LIDs", maxNumToPrint);
2195 os << endl;
2196 std::cerr << os.str();
2197 }
2198 return retVal;
2199}
2200
2201template <class LocalOrdinal, class GlobalOrdinal, class Node>
2204 getRemoteIndexList(const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
2205 const Teuchos::ArrayView<int>& PIDs) const {
2207 using std::endl;
2208
2209 const bool verbose = Details::Behavior::verbose("Map");
2211 std::unique_ptr<std::string> prefix;
2212 if (verbose) {
2213 prefix = Details::createPrefix(comm_.getRawPtr(),
2214 "Map", "getRemoteIndexList(GIDs,PIDs)");
2215 std::ostringstream os;
2216 os << *prefix << "Start: ";
2217 verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2218 os << endl;
2219 std::cerr << os.str();
2220 }
2221
2222 if (getGlobalNumElements() == 0) {
2223 if (GIDs.size() == 0) {
2224 if (verbose) {
2225 std::ostringstream os;
2226 os << *prefix << "Done; both Map & input are empty" << endl;
2227 std::cerr << os.str();
2228 }
2229 return AllIDsPresent; // trivially
2230 } else {
2231 if (verbose) {
2232 std::ostringstream os;
2233 os << *prefix << "Done: Map is empty on all processes, "
2234 "so all output PIDs are invalid (-1)."
2235 << endl;
2236 std::cerr << os.str();
2237 }
2238 for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size(); ++k) {
2239 PIDs[k] = Tpetra::Details::OrdinalTraits<int>::invalid();
2240 }
2241 return IDNotPresent;
2242 }
2243 }
2244
2245 // getRemoteIndexList must be called collectively, and Directory
2246 // initialization is collective too, so it's OK to initialize the
2247 // Directory on demand.
2248
2249 if (verbose) {
2250 std::ostringstream os;
2251 os << *prefix << "Call setupDirectory" << endl;
2252 std::cerr << os.str();
2253 }
2254 setupDirectory();
2255 if (verbose) {
2256 std::ostringstream os;
2257 os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2258 std::cerr << os.str();
2259 }
2261 directory_->getDirectoryEntries(*this, GIDs, PIDs);
2262 if (verbose) {
2263 std::ostringstream os;
2264 os << *prefix << "Done; getDirectoryEntries returned "
2265 << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2266 << "; ";
2267 verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2268 os << endl;
2269 std::cerr << os.str();
2270 }
2271 return retVal;
2272}
2273
2274template <class LocalOrdinal, class GlobalOrdinal, class Node>
2276 using exec_space = typename Node::device_type::execution_space;
2277 if (lgMap_.extent(0) != lgMapHost_.extent(0)) {
2278 Tpetra::Details::ProfilingRegion pr("Map::lazyPushToHost() - pushing data");
2279 // NOTE: We check lgMap_ and not glMap_, since the latter can
2280 // be somewhat error prone for contiguous maps
2281
2282 // create_mirror_view preserves const-ness. create_mirror does not
2283 auto lgMap_host = Kokkos::create_mirror(Kokkos::HostSpace(), lgMap_);
2284
2285 // Since this was computed on the default stream, we can copy on the stream and then fence
2286 // the stream
2287 Kokkos::deep_copy(exec_space(), lgMap_host, lgMap_);
2288 exec_space().fence();
2289 lgMapHost_ = lgMap_host;
2290
2291 // Make host version - when memory spaces match these just do trivial assignment
2292 glMapHost_ = global_to_local_table_host_type(glMap_);
2293 }
2294}
2295
2296template <class LocalOrdinal, class GlobalOrdinal, class Node>
2297Teuchos::RCP<const Teuchos::Comm<int>>
2299 return comm_;
2300}
2301
2302template <class LocalOrdinal, class GlobalOrdinal, class Node>
2304 checkIsDist() const {
2305 using std::endl;
2306 using Teuchos::as;
2307 using Teuchos::outArg;
2308 using Teuchos::REDUCE_MIN;
2309 using Teuchos::reduceAll;
2310
2311 const bool verbose = Details::Behavior::verbose("Map");
2312 std::unique_ptr<std::string> prefix;
2313 if (verbose) {
2315 comm_.getRawPtr(), "Map", "checkIsDist");
2316 std::ostringstream os;
2317 os << *prefix << "Start" << endl;
2318 std::cerr << os.str();
2319 }
2320
2321 bool global = false;
2322 if (comm_->getSize() > 1) {
2323 // The communicator has more than one process, but that doesn't
2324 // necessarily mean the Map is distributed.
2325 int localRep = 0;
2326 if (numGlobalElements_ == as<global_size_t>(numLocalElements_)) {
2327 // The number of local elements on this process equals the
2328 // number of global elements.
2329 //
2330 // NOTE (mfh 22 Nov 2011) Does this still work if there were
2331 // duplicates in the global ID list on input (the third Map
2332 // constructor), so that the number of local elements (which
2333 // are not duplicated) on this process could be less than the
2334 // number of global elements, even if this process owns all
2335 // the elements?
2336 localRep = 1;
2337 }
2338 int allLocalRep;
2339 reduceAll<int, int>(*comm_, REDUCE_MIN, localRep, outArg(allLocalRep));
2340 if (allLocalRep != 1) {
2341 // At least one process does not own all the elements.
2342 // This makes the Map a distributed Map.
2343 global = true;
2344 }
2345 }
2346 // If the communicator has only one process, then the Map is not
2347 // distributed.
2348
2349 if (verbose) {
2350 std::ostringstream os;
2351 os << *prefix << "Done; global=" << (global ? "true" : "false")
2352 << endl;
2353 std::cerr << os.str();
2354 }
2355 return global;
2356}
2357
2358} // namespace Tpetra
2359
2360template <class LocalOrdinal, class GlobalOrdinal>
2361Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal>>
2362Tpetra::createLocalMap(const size_t numElements,
2363 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2364 typedef LocalOrdinal LO;
2365 typedef GlobalOrdinal GO;
2366 using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2367 return createLocalMapWithNode<LO, GO, NT>(numElements, comm);
2368}
2369
2370template <class LocalOrdinal, class GlobalOrdinal>
2371Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal>>
2372Tpetra::createUniformContigMap(const global_size_t numElements,
2373 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2374 typedef LocalOrdinal LO;
2375 typedef GlobalOrdinal GO;
2376 using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2377 return createUniformContigMapWithNode<LO, GO, NT>(numElements, comm);
2378}
2379
2380template <class LocalOrdinal, class GlobalOrdinal, class Node>
2381Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2382Tpetra::createUniformContigMapWithNode(const global_size_t numElements,
2383 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2384 using Teuchos::rcp;
2386 const GlobalOrdinal indexBase = static_cast<GlobalOrdinal>(0);
2387
2388 return rcp(new map_type(numElements, indexBase, comm, GloballyDistributed));
2389}
2390
2391template <class LocalOrdinal, class GlobalOrdinal, class Node>
2392Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2393Tpetra::createLocalMapWithNode(const size_t numElements,
2394 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2395 using Teuchos::rcp;
2398 const GlobalOrdinal indexBase = 0;
2399 const global_size_t globalNumElts = static_cast<global_size_t>(numElements);
2400
2401 return rcp(new map_type(globalNumElts, indexBase, comm, LocallyReplicated));
2402}
2403
2404template <class LocalOrdinal, class GlobalOrdinal, class Node>
2405Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2407 const size_t localNumElements,
2408 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2409 using Teuchos::rcp;
2411 const GlobalOrdinal indexBase = 0;
2412
2413 return rcp(new map_type(numElements, localNumElements, indexBase, comm));
2414}
2415
2416template <class LocalOrdinal, class GlobalOrdinal>
2417Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal>>
2419 const size_t localNumElements,
2420 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2421 typedef LocalOrdinal LO;
2422 typedef GlobalOrdinal GO;
2423 using NT = typename Tpetra::Map<LO, GO>::node_type;
2424
2425 return Tpetra::createContigMapWithNode<LO, GO, NT>(numElements, localNumElements, comm);
2426}
2427
2428template <class LocalOrdinal, class GlobalOrdinal>
2429Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal>>
2430Tpetra::createNonContigMap(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2431 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2432 typedef LocalOrdinal LO;
2433 typedef GlobalOrdinal GO;
2434 using NT = typename Tpetra::Map<LO, GO>::node_type;
2435
2436 return Tpetra::createNonContigMapWithNode<LO, GO, NT>(elementList, comm);
2437}
2438
2439template <class LocalOrdinal, class GlobalOrdinal, class Node>
2440Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2441Tpetra::createNonContigMapWithNode(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2442 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2443 using Teuchos::rcp;
2445 using GST = Tpetra::global_size_t;
2446 const GST INV = Tpetra::Details::OrdinalTraits<GST>::invalid();
2447 // FIXME (mfh 22 Jul 2016) This is what I found here, but maybe this
2448 // shouldn't be zero, given that the index base is supposed to equal
2449 // the globally min global index?
2450 const GlobalOrdinal indexBase = 0;
2451
2452 return rcp(new map_type(INV, elementList, indexBase, comm));
2453}
2454
2455template <class LO, class GO, class NT>
2456Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>
2457Tpetra::createOneToOne(const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& M) {
2458 using Details::verbosePrintArray;
2459 using std::cerr;
2460 using std::endl;
2461 using Teuchos::Array;
2462 using Teuchos::ArrayView;
2463 using Teuchos::as;
2464 using Teuchos::rcp;
2465 using map_type = Tpetra::Map<LO, GO, NT>;
2466 using GST = global_size_t;
2467
2468 const bool verbose = Details::Behavior::verbose("Map");
2469 std::unique_ptr<std::string> prefix;
2470 if (verbose) {
2471 auto comm = M.is_null() ? Teuchos::null : M->getComm();
2472 prefix = Details::createPrefix(
2473 comm.getRawPtr(), "createOneToOne(Map)");
2474 std::ostringstream os;
2475 os << *prefix << "Start" << endl;
2476 cerr << os.str();
2477 }
2478 const size_t maxNumToPrint = verbose ? Details::Behavior::verbosePrintCountThreshold() : size_t(0);
2479 const GST GINV = Tpetra::Details::OrdinalTraits<GST>::invalid();
2480 const int myRank = M->getComm()->getRank();
2481
2482 // Bypasses for special cases where either M is known to be
2483 // one-to-one, or the one-to-one version of M is easy to compute.
2484 // This is why we take M as an RCP, not as a const reference -- so
2485 // that we can return M itself if it is 1-to-1.
2486 if (!M->isDistributed()) {
2487 // For a locally replicated Map, we assume that users want to push
2488 // all the GIDs to Process 0.
2489
2490 // mfh 05 Nov 2013: getGlobalNumElements() does indeed return what
2491 // you think it should return, in this special case of a locally
2492 // replicated contiguous Map.
2493 const GST numGlobalEntries = M->getGlobalNumElements();
2494 if (M->isContiguous()) {
2495 const size_t numLocalEntries =
2496 (myRank == 0) ? as<size_t>(numGlobalEntries) : size_t(0);
2497 if (verbose) {
2498 std::ostringstream os;
2499 os << *prefix << "Input is locally replicated & contiguous; "
2500 "numLocalEntries="
2501 << numLocalEntries << endl;
2502 cerr << os.str();
2503 }
2504 auto retMap =
2505 rcp(new map_type(numGlobalEntries, numLocalEntries,
2506 M->getIndexBase(), M->getComm()));
2507 if (verbose) {
2508 std::ostringstream os;
2509 os << *prefix << "Done" << endl;
2510 cerr << os.str();
2511 }
2512 return retMap;
2513 } else {
2514 if (verbose) {
2515 std::ostringstream os;
2516 os << *prefix << "Input is locally replicated & noncontiguous"
2517 << endl;
2518 cerr << os.str();
2519 }
2520 ArrayView<const GO> myGids =
2521 (myRank == 0) ? M->getLocalElementList() : Teuchos::null;
2522 auto retMap =
2523 rcp(new map_type(GINV, myGids(), M->getIndexBase(),
2524 M->getComm()));
2525 if (verbose) {
2526 std::ostringstream os;
2527 os << *prefix << "Done" << endl;
2528 cerr << os.str();
2529 }
2530 return retMap;
2531 }
2532 } else if (M->isContiguous()) {
2533 if (verbose) {
2534 std::ostringstream os;
2535 os << *prefix << "Input is distributed & contiguous" << endl;
2536 cerr << os.str();
2537 }
2538 // Contiguous, distributed Maps are one-to-one by construction.
2539 // (Locally replicated Maps can be contiguous.)
2540 return M;
2541 } else {
2542 if (verbose) {
2543 std::ostringstream os;
2544 os << *prefix << "Input is distributed & noncontiguous" << endl;
2545 cerr << os.str();
2546 }
2548 const size_t numMyElems = M->getLocalNumElements();
2549 ArrayView<const GO> myElems = M->getLocalElementList();
2550 Array<int> owner_procs_vec(numMyElems);
2551
2552 if (verbose) {
2553 std::ostringstream os;
2554 os << *prefix << "Call Directory::getDirectoryEntries: ";
2555 verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2556 os << endl;
2557 cerr << os.str();
2558 }
2559 directory.getDirectoryEntries(*M, myElems, owner_procs_vec());
2560 if (verbose) {
2561 std::ostringstream os;
2562 os << *prefix << "getDirectoryEntries result: ";
2563 verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2564 os << endl;
2565 cerr << os.str();
2566 }
2567
2568 Array<GO> myOwned_vec(numMyElems);
2569 size_t numMyOwnedElems = 0;
2570 for (size_t i = 0; i < numMyElems; ++i) {
2571 const GO GID = myElems[i];
2572 const int owner = owner_procs_vec[i];
2573
2574 if (myRank == owner) {
2575 myOwned_vec[numMyOwnedElems++] = GID;
2576 }
2577 }
2578 myOwned_vec.resize(numMyOwnedElems);
2579
2580 if (verbose) {
2581 std::ostringstream os;
2582 os << *prefix << "Create Map: ";
2583 verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2584 os << endl;
2585 cerr << os.str();
2586 }
2587 auto retMap = rcp(new map_type(GINV, myOwned_vec(),
2588 M->getIndexBase(), M->getComm()));
2589 if (verbose) {
2590 std::ostringstream os;
2591 os << *prefix << "Done" << endl;
2592 cerr << os.str();
2593 }
2594 return retMap;
2595 }
2596}
2597
2598template <class LocalOrdinal, class GlobalOrdinal, class Node>
2599Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2602 using Details::Behavior;
2603 using Details::verbosePrintArray;
2604 using std::cerr;
2605 using std::endl;
2606 using Teuchos::Array;
2607 using Teuchos::ArrayView;
2608 using Teuchos::RCP;
2609 using Teuchos::rcp;
2610 using Teuchos::toString;
2611 using LO = LocalOrdinal;
2612 using GO = GlobalOrdinal;
2613 using map_type = Tpetra::Map<LO, GO, Node>;
2614
2615 const bool verbose = Behavior::verbose("Map");
2616 std::unique_ptr<std::string> prefix;
2617 if (verbose) {
2618 auto comm = M.is_null() ? Teuchos::null : M->getComm();
2619 prefix = Details::createPrefix(
2620 comm.getRawPtr(), "createOneToOne(Map,TieBreak)");
2621 std::ostringstream os;
2622 os << *prefix << "Start" << endl;
2623 cerr << os.str();
2624 }
2625 const size_t maxNumToPrint = verbose ? Behavior::verbosePrintCountThreshold() : size_t(0);
2626
2627 // FIXME (mfh 20 Feb 2013) We should have a bypass for contiguous
2628 // Maps (which are 1-to-1 by construction).
2629
2631 if (verbose) {
2632 std::ostringstream os;
2633 os << *prefix << "Initialize Directory" << endl;
2634 cerr << os.str();
2635 }
2636 directory.initialize(*M, tie_break);
2637 if (verbose) {
2638 std::ostringstream os;
2639 os << *prefix << "Done initializing Directory" << endl;
2640 cerr << os.str();
2641 }
2642 size_t numMyElems = M->getLocalNumElements();
2643 ArrayView<const GO> myElems = M->getLocalElementList();
2644 Array<int> owner_procs_vec(numMyElems);
2645 if (verbose) {
2646 std::ostringstream os;
2647 os << *prefix << "Call Directory::getDirectoryEntries: ";
2648 verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2649 os << endl;
2650 cerr << os.str();
2651 }
2652 directory.getDirectoryEntries(*M, myElems, owner_procs_vec());
2653 if (verbose) {
2654 std::ostringstream os;
2655 os << *prefix << "getDirectoryEntries result: ";
2656 verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2657 os << endl;
2658 cerr << os.str();
2659 }
2660
2661 const int myRank = M->getComm()->getRank();
2662 Array<GO> myOwned_vec(numMyElems);
2663 size_t numMyOwnedElems = 0;
2664 for (size_t i = 0; i < numMyElems; ++i) {
2665 const GO GID = myElems[i];
2666 const int owner = owner_procs_vec[i];
2667 if (myRank == owner) {
2668 myOwned_vec[numMyOwnedElems++] = GID;
2669 }
2670 }
2671 myOwned_vec.resize(numMyOwnedElems);
2672
2673 // FIXME (mfh 08 May 2014) The above Directory should be perfectly
2674 // valid for the new Map. Why can't we reuse it?
2675 const global_size_t GINV =
2676 Tpetra::Details::OrdinalTraits<global_size_t>::invalid();
2677 if (verbose) {
2678 std::ostringstream os;
2679 os << *prefix << "Create Map: ";
2680 verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2681 os << endl;
2682 cerr << os.str();
2683 }
2684 RCP<const map_type> retMap(new map_type(GINV, myOwned_vec(), M->getIndexBase(),
2685 M->getComm()));
2686 if (verbose) {
2687 std::ostringstream os;
2688 os << *prefix << "Done" << endl;
2689 cerr << os.str();
2690 }
2691 return retMap;
2692}
2693
2694//
2695// Explicit instantiation macro
2696//
2697// Must be expanded from within the Tpetra namespace!
2698//
2699
2701
2702#define TPETRA_MAP_INSTANT(LO, GO, NODE) \
2703 \
2704 template class Map<LO, GO, NODE>; \
2705 \
2706 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2707 createLocalMapWithNode<LO, GO, NODE>(const size_t numElements, \
2708 const Teuchos::RCP<const Teuchos::Comm<int>>& comm); \
2709 \
2710 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2711 createContigMapWithNode<LO, GO, NODE>(const global_size_t numElements, \
2712 const size_t localNumElements, \
2713 const Teuchos::RCP<const Teuchos::Comm<int>>& comm); \
2714 \
2715 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2716 createNonContigMapWithNode(const Teuchos::ArrayView<const GO>& elementList, \
2717 const Teuchos::RCP<const Teuchos::Comm<int>>& comm); \
2718 \
2719 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2720 createUniformContigMapWithNode<LO, GO, NODE>(const global_size_t numElements, \
2721 const Teuchos::RCP<const Teuchos::Comm<int>>& comm); \
2722 \
2723 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2724 createOneToOne(const Teuchos::RCP<const Map<LO, GO, NODE>>& M); \
2725 \
2726 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2727 createOneToOne(const Teuchos::RCP<const Map<LO, GO, NODE>>& M, \
2728 const Tpetra::Details::TieBreak<LO, GO>& tie_break);
2729
2731#define TPETRA_MAP_INSTANT_DEFAULTNODE(LO, GO) \
2732 template Teuchos::RCP<const Map<LO, GO>> \
2733 createLocalMap<LO, GO>(const size_t, const Teuchos::RCP<const Teuchos::Comm<int>>&); \
2734 \
2735 template Teuchos::RCP<const Map<LO, GO>> \
2736 createContigMap<LO, GO>(global_size_t, size_t, \
2737 const Teuchos::RCP<const Teuchos::Comm<int>>&); \
2738 \
2739 template Teuchos::RCP<const Map<LO, GO>> \
2740 createNonContigMap(const Teuchos::ArrayView<const GO>&, \
2741 const Teuchos::RCP<const Teuchos::Comm<int>>&); \
2742 \
2743 template Teuchos::RCP<const Map<LO, GO>> \
2744 createUniformContigMap<LO, GO>(const global_size_t, \
2745 const Teuchos::RCP<const Teuchos::Comm<int>>&);
2746
2747#endif // TPETRA_MAP_DEF_HPP
Functions for initializing and finalizing Tpetra.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration of Tpetra::Details::extractMpiCommFromTeuchos.
Declaration of a function that prints strings from each process.
Declaration of Tpetra::Details::initializeKokkos.
Declaration of Tpetra::Details::printOnce.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
Description of Tpetra's behavior.
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
"Local" part of Map suitable for Kokkos kernels.
Interface for breaking ties in ownership.
Implement mapping from global ID to process ID and local ID.
A parallel distribution of indices over processes.
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
bool isOneToOne() const
Whether the Map is one to one.
std::string description() const
Implementation of Teuchos::Describable.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
Node node_type
Legacy typedef that will go away at some point.
Map()
Default constructor (that does nothing).
GlobalOrdinal global_ordinal_type
The type of global indices.
LookupStatus getRemoteIndexList(const Teuchos::ArrayView< const global_ordinal_type > &GIDList, const Teuchos::ArrayView< int > &nodeIDList, const Teuchos::ArrayView< local_ordinal_type > &LIDList) const
Return the process ranks and corresponding local indices for the given global indices.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
bool isUniform() const
Whether the range of global indices is uniform.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
typename device_type::execution_space execution_space
The Kokkos execution space.
LO local_ordinal_type
The type of local indices.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > removeEmptyProcesses() const
Advanced methods.
void lazyPushToHost() const
Push the device data to host, if needed.
bool isCompatible(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is compatible with this Map.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
bool isLocallyFitted(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is locally fitted to this Map.
virtual ~Map()
Destructor (virtual for memory safety of derived classes).
global_indices_array_device_type getMyGlobalIndicesDevice() const
Return a view of the global indices owned by this process on the Map's device.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > replaceCommWithSubset(const Teuchos::RCP< const Teuchos::Comm< int > > &newComm) const
Replace this Map's communicator with a subset communicator.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
bool isSameAs(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is identical to this Map.
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
global_indices_array_type getMyGlobalIndices() const
Return a view of the global indices owned by this process.
typename Node::device_type device_type
This class' Kokkos::Device specialization.
Implementation details of Tpetra.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void printOnce(std::ostream &out, const std::string &s, const Teuchos::Comm< int > *comm)
Print on one process of the given communicator, or at least try to do so (if MPI is not initialized).
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
bool congruent(const Teuchos::Comm< int > &comm1, const Teuchos::Comm< int > &comm2)
Whether the two communicators are congruent.
void initializeKokkos()
Initialize Kokkos, using command-line arguments (if any) given to Teuchos::GlobalMPISession.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createLocalMap(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with the default Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createUniformContigMap(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with the default Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createNonContigMap(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a non-contiguous Map using the default Kokkos::Device type.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createUniformContigMapWithNode(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with a user-specified Kokkos Node.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createContigMap(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map using the defaul...
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createLocalMapWithNode(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with a specified Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createNonContigMapWithNode(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a noncontiguous Map with a user-specified, possibly nondefault Kokkos Node ...
LocalGlobal
Enum for local versus global allocation of Map entries.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a (potentially) nonuniformly distributed, contiguous Map for a user-specifi...