Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Map_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
14
15#ifndef TPETRA_MAP_DEF_HPP
16#define TPETRA_MAP_DEF_HPP
17
18#include <memory>
19#include <sstream>
20#include <stdexcept>
21#include <typeinfo>
22
23#include "Teuchos_as.hpp"
24#include "Teuchos_TypeNameTraits.hpp"
25#include "Teuchos_CommHelpers.hpp"
26
27#include "Tpetra_Directory.hpp" // must include for implicit instantiation to work
29#include "Tpetra_Details_FixedHashTable.hpp"
32#include "Tpetra_Core.hpp"
33#include "Tpetra_Util.hpp"
34#include "Tpetra_Details_mpiIsInitialized.hpp"
35#include "Tpetra_Details_extractMpiCommFromTeuchos.hpp" // teuchosCommIsAnMpiComm
38
39namespace { // (anonymous)
40
41void checkMapInputArray(const char ctorName[],
42 const void* indexList,
43 const size_t indexListSize,
44 const Teuchos::Comm<int>* const comm) {
46
47 const bool debug = Behavior::debug("Map");
48 if (debug) {
49 using std::endl;
50 using Teuchos::outArg;
51 using Teuchos::REDUCE_MIN;
52 using Teuchos::reduceAll;
53
54 const int myRank = comm == nullptr ? 0 : comm->getRank();
55 const bool verbose = Behavior::verbose("Map");
56 std::ostringstream lclErrStrm;
57 int lclSuccess = 1;
58
59 if (indexListSize != 0 && indexList == nullptr) {
60 lclSuccess = 0;
61 if (verbose) {
62 lclErrStrm << "Proc " << myRank << ": indexList is null, "
63 "but indexListSize="
64 << indexListSize << " != 0." << endl;
65 }
66 }
67 int gblSuccess = 0; // output argument
68 reduceAll(*comm, REDUCE_MIN, lclSuccess, outArg(gblSuccess));
69 if (gblSuccess != 1) {
70 std::ostringstream gblErrStrm;
71 gblErrStrm << "Tpetra::Map constructor " << ctorName << " detected a problem with the input array "
72 "(raw array, Teuchos::ArrayView, or Kokkos::View) "
73 "of global indices."
74 << endl;
75 if (verbose) {
76 using ::Tpetra::Details::gathervPrint;
77 gathervPrint(gblErrStrm, lclErrStrm.str(), *comm);
78 }
79 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, gblErrStrm.str());
80 }
81 }
82}
83
84template <class LocalOrdinal, class GlobalOrdinal, class ViewType>
85void computeConstantsOnDevice(const ViewType& entryList, GlobalOrdinal& minMyGID, GlobalOrdinal& maxMyGID, GlobalOrdinal& firstContiguousGID, GlobalOrdinal& lastContiguousGID_val, LocalOrdinal& lastContiguousGID_loc) {
86 using LO = LocalOrdinal;
87 using GO = GlobalOrdinal;
88 using exec_space = typename ViewType::device_type::execution_space;
89 using range_policy = Kokkos::RangePolicy<exec_space, Kokkos::IndexType<LO>>;
90 const LO numLocalElements = entryList.extent(0);
91
92 // We're going to use the minloc backwards because we need to have it sort on the "location" and have the "value" along for the
93 // ride, rather than the other way around
94 typedef typename Kokkos::MinLoc<LO, GO>::value_type minloc_type;
95 minloc_type myMinLoc;
96
97 // Find the initial sequence of parallel gids
98 // To find the lastContiguousGID_, we find the first guy where entryList[i] - entryList[0] != i-0. That's the first non-contiguous guy.
99 // We want the one *before* that guy.
100 Kokkos::parallel_reduce(
101 range_policy(0, numLocalElements), KOKKOS_LAMBDA(const LO& i, GO& l_myMin, GO& l_myMax, GO& l_firstCont, minloc_type& l_lastCont) {
102 GO entry_0 = entryList[0];
103 GO entry_i = entryList[i];
104
105 // Easy stuff
106 l_myMin = (l_myMin < entry_i) ? l_myMin : entry_i;
107 l_myMax = (l_myMax > entry_i) ? l_myMax : entry_i;
108 l_firstCont = entry_0;
109
110 if (entry_i - entry_0 != i && l_lastCont.val >= i) {
111 // We're non-contiguous, so the guy before us could be the last contiguous guy
112 l_lastCont.val = i - 1;
113 l_lastCont.loc = entryList[i - 1];
114 } else if (i == numLocalElements - 1 && i < l_lastCont.val) {
115 // If we're last, we always think we're the last contiguous guy, unless someone non-contiguous is already here
116 l_lastCont.val = i;
117 l_lastCont.loc = entry_i;
118 }
119 },
120 Kokkos::Min<GO>(minMyGID), Kokkos::Max<GO>(maxMyGID), Kokkos::Min<GO>(firstContiguousGID), Kokkos::MinLoc<LO, GO>(myMinLoc));
121
122 // This switch is intentional, since we're using MinLoc backwards
123 lastContiguousGID_val = myMinLoc.loc;
124 lastContiguousGID_loc = myMinLoc.val;
125}
126
127} // namespace
128
129namespace Tpetra {
130
131template <class LocalOrdinal, class GlobalOrdinal, class Node>
133 Map()
134 : comm_(new Teuchos::SerialComm<int>())
135 , indexBase_(0)
136 , numGlobalElements_(0)
137 , numLocalElements_(0)
142 , firstContiguousGID_(Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid())
143 , lastContiguousGID_(Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid())
144 , uniform_(false)
145 , // trivially
146 contiguous_(false)
147 , distributed_(false)
148 , // no communicator yet
149 directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
152}
153
154template <class LocalOrdinal, class GlobalOrdinal, class Node>
158 const Teuchos::RCP<const Teuchos::Comm<int>>& comm,
159 const LocalGlobal lOrG)
160 : comm_(comm)
161 , uniform_(true)
162 , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
163 using std::endl;
164 using Teuchos::as;
165 using Teuchos::broadcast;
166 using Teuchos::outArg;
167 using Teuchos::REDUCE_MAX;
168 using Teuchos::REDUCE_MIN;
169 using Teuchos::reduceAll;
170 using Teuchos::typeName;
171 using GO = global_ordinal_type;
172 using GST = global_size_t;
173 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
174 const char funcName[] = "Map(gblNumInds,indexBase,comm,LG)";
175 const char exPfx[] =
176 "Tpetra::Map::Map(gblNumInds,indexBase,comm,LG): ";
177
178 const bool debug = Details::Behavior::debug("Map");
179 const bool verbose = Details::Behavior::verbose("Map");
180 std::unique_ptr<std::string> prefix;
181 if (verbose) {
183 comm_.getRawPtr(), "Map", funcName);
184 std::ostringstream os;
185 os << *prefix << "Start" << endl;
186 std::cerr << os.str();
187 }
190
191 // In debug mode only, check whether numGlobalElements and
192 // indexBase are the same over all processes in the communicator.
193 if (debug) {
204 std::invalid_argument, exPfx << "All processes must "
205 "provide the same number of global elements. Process 0 set "
206 "numGlobalElements="
207 << proc0NumGlobalElements << ". The "
208 "calling process "
209 << comm->getRank() << " set "
210 "numGlobalElements="
211 << numGlobalElements << ". The min "
212 "and max values over all processes are "
213 << minNumGlobalElements << " resp. " << maxNumGlobalElements << ".");
214
222 std::invalid_argument, exPfx << "All processes must "
223 "provide the same indexBase argument. Process 0 set "
224 "indexBase="
225 << proc0IndexBase << ". The calling process " << comm->getRank() << " set indexBase=" << indexBase << ". The min and max values over all processes are " << minIndexBase << " resp. " << maxIndexBase << ".");
226 }
227
228 // Distribute the elements across the processes in the given
229 // communicator so that global IDs (GIDs) are
230 //
231 // - Nonoverlapping (only one process owns each GID)
232 // - Contiguous (the sequence of GIDs is nondecreasing, and no two
233 // adjacent GIDs differ by more than one)
234 // - As evenly distributed as possible (the numbers of GIDs on two
235 // different processes do not differ by more than one)
236
237 // All processes have the same numGlobalElements, but we still
238 // need to check that it is valid. numGlobalElements must be
239 // positive and not the "invalid" value (GSTI).
240 //
241 // This comparison looks funny, but it avoids compiler warnings
242 // for comparing unsigned integers (numGlobalElements_in is a
243 // GST, which is unsigned) while still working if we
244 // later decide to make GST signed.
247 std::invalid_argument, exPfx << "numGlobalElements (= " << numGlobalElements << ") must be nonnegative.");
248
249 TEUCHOS_TEST_FOR_EXCEPTION(numGlobalElements == GSTI, std::invalid_argument, exPfx << "You provided numGlobalElements = Teuchos::OrdinalTraits<"
250 "Tpetra::global_size_t>::invalid(). This version of the "
251 "constructor requires a valid value of numGlobalElements. "
252 "You probably mistook this constructor for the \"contiguous "
253 "nonuniform\" constructor, which can compute the global "
254 "number of elements for you if you set numGlobalElements to "
255 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid().");
256
257 size_t numLocalElements = 0; // will set below
258 if (lOrG == GloballyDistributed) {
259 // Compute numLocalElements:
260 //
261 // If numGlobalElements == numProcs * B + remainder,
262 // then Proc r gets B+1 elements if r < remainder,
263 // and B elements if r >= remainder.
264 //
265 // This strategy is valid for any value of numGlobalElements and
266 // numProcs, including the following border cases:
267 // - numProcs == 1
268 // - numLocalElements < numProcs
269 //
270 // In the former case, remainder == 0 && numGlobalElements ==
271 // numLocalElements. In the latter case, remainder ==
272 // numGlobalElements && numLocalElements is either 0 or 1.
273 const GST numProcs = static_cast<GST>(comm_->getSize());
274 const GST myRank = static_cast<GST>(comm_->getRank());
277
278 GO startIndex;
279 if (myRank < remainder) {
280 numLocalElements = static_cast<size_t>(1) + static_cast<size_t>(quotient);
281 // myRank was originally an int, so it should never overflow
282 // reasonable GO types.
284 } else {
288 }
289
290 minMyGID_ = indexBase + startIndex;
291 maxMyGID_ = indexBase + startIndex + numLocalElements - 1;
292 minAllGID_ = indexBase;
293 maxAllGID_ = indexBase + numGlobalElements - 1;
294 distributed_ = (numProcs > 1);
295 } else { // lOrG == LocallyReplicated
297 minMyGID_ = indexBase;
298 maxMyGID_ = indexBase + numGlobalElements - 1;
299 distributed_ = false;
300 }
301
302 minAllGID_ = indexBase;
303 maxAllGID_ = indexBase + numGlobalElements - 1;
304 indexBase_ = indexBase;
305 numGlobalElements_ = numGlobalElements;
306 numLocalElements_ = numLocalElements;
307 firstContiguousGID_ = minMyGID_;
308 lastContiguousGID_ = maxMyGID_;
309 contiguous_ = true;
310
311 // Create the Directory on demand in getRemoteIndexList().
312 // setupDirectory ();
313
314 if (verbose) {
315 std::ostringstream os;
316 os << *prefix << "Done" << endl;
317 std::cerr << os.str();
318 }
319}
320
321template <class LocalOrdinal, class GlobalOrdinal, class Node>
324 const size_t numLocalElements,
326 const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
327 : comm_(comm)
328 , uniform_(false)
329 , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
330 using std::endl;
331 using Teuchos::as;
332 using Teuchos::broadcast;
333 using Teuchos::outArg;
334 using Teuchos::REDUCE_MAX;
335 using Teuchos::REDUCE_MIN;
336 using Teuchos::REDUCE_SUM;
337 using Teuchos::reduceAll;
338 using Teuchos::scan;
339 using GO = global_ordinal_type;
340 using GST = global_size_t;
341 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
342 const char funcName[] =
343 "Map(gblNumInds,lclNumInds,indexBase,comm)";
344 const char exPfx[] =
345 "Tpetra::Map::Map(gblNumInds,lclNumInds,indexBase,comm): ";
346 const char suffix[] =
347 ". Please report this bug to the Tpetra developers.";
348
349 const bool debug = Details::Behavior::debug("Map");
350 const bool verbose = Details::Behavior::verbose("Map");
351 std::unique_ptr<std::string> prefix;
352 if (verbose) {
354 comm_.getRawPtr(), "Map", funcName);
355 std::ostringstream os;
356 os << *prefix << "Start" << endl;
357 std::cerr << os.str();
358 }
361
362 // Global sum of numLocalElements over all processes.
363 // Keep this for later debug checks.
365 if (debug) {
366 debugGlobalSum = initialNonuniformDebugCheck(exPfx,
367 numGlobalElements, numLocalElements, indexBase, comm);
368 }
369
370 // Distribute the elements across the nodes so that they are
371 // - non-overlapping
372 // - contiguous
373
374 // This differs from the first Map constructor (that only takes a
375 // global number of elements) in that the user has specified the
376 // number of local elements, so that the elements are not
377 // (necessarily) evenly distributed over the processes.
378
379 // Compute my local offset. This is an inclusive scan, so to get
380 // the final offset, we subtract off the input.
381 GO scanResult = 0;
384
385 if (numGlobalElements != GSTI) {
386 numGlobalElements_ = numGlobalElements; // Use the user's value.
387 } else {
388 // Inclusive scan means that the last process has the final sum.
389 // Rather than doing a reduceAll to get the sum of
390 // numLocalElements, we can just have the last process broadcast
391 // its result. That saves us a round of log(numProcs) messages.
392 const int numProcs = comm->getSize();
394 if (numProcs > 1) {
395 broadcast(*comm, numProcs - 1, outArg(globalSum));
396 }
397 numGlobalElements_ = globalSum;
398
399 if (debug) {
400 // No need for an all-reduce here; both come from collectives.
401 TEUCHOS_TEST_FOR_EXCEPTION(globalSum != debugGlobalSum, std::logic_error, exPfx << "globalSum = " << globalSum << " != debugGlobalSum = " << debugGlobalSum << suffix);
402 }
403 }
404 numLocalElements_ = numLocalElements;
405 indexBase_ = indexBase;
406 minAllGID_ = (numGlobalElements_ == 0) ? std::numeric_limits<GO>::max() : indexBase;
407 maxAllGID_ = (numGlobalElements_ == 0) ? std::numeric_limits<GO>::lowest() : indexBase + GO(numGlobalElements_) - GO(1);
408 minMyGID_ = (numLocalElements_ == 0) ? std::numeric_limits<GO>::max() : indexBase + GO(myOffset);
409 maxMyGID_ = (numLocalElements_ == 0) ? std::numeric_limits<GO>::lowest() : indexBase + myOffset + GO(numLocalElements) - GO(1);
410 firstContiguousGID_ = minMyGID_;
411 lastContiguousGID_ = maxMyGID_;
412 contiguous_ = true;
413 distributed_ = checkIsDist();
414
415 // Create the Directory on demand in getRemoteIndexList().
416 // setupDirectory ();
417
418 if (verbose) {
419 std::ostringstream os;
420 os << *prefix << "Done" << endl;
421 std::cerr << os.str();
422 }
423}
424
425template <class LocalOrdinal, class GlobalOrdinal, class Node>
429 const char errorMessagePrefix[],
431 const size_t numLocalElements,
433 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) const {
434 const bool debug = Details::Behavior::debug("Map");
435 if (!debug) {
436 return global_size_t(0);
437 }
438
439 using Teuchos::broadcast;
440 using Teuchos::outArg;
441 using Teuchos::ptr;
442 using Teuchos::REDUCE_MAX;
443 using Teuchos::REDUCE_MIN;
444 using Teuchos::REDUCE_SUM;
445 using Teuchos::reduceAll;
446 using GO = global_ordinal_type;
447 using GST = global_size_t;
448 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
449
450 // The user has specified the distribution of indices over the
451 // processes. The distribution is not necessarily contiguous or
452 // equally shared over the processes.
453 //
454 // We assume that the number of local elements can be stored in a
455 // size_t. The instance member numLocalElements_ is a size_t, so
456 // this variable and that should have the same type.
457
458 GST debugGlobalSum = 0; // Will be global sum of numLocalElements
461 // In debug mode only, check whether numGlobalElements and
462 // indexBase are the same over all processes in the communicator.
463 {
474 std::invalid_argument, errorMessagePrefix << "All processes "
475 "must provide the same number of global elements, even if "
476 "that argument is "
477 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
478 "(which signals that the Map should compute the global "
479 "number of elements). Process 0 set numGlobalElements"
480 "="
481 << proc0NumGlobalElements << ". The calling process " << comm->getRank() << " set numGlobalElements=" << numGlobalElements << ". The min and max values over all "
482 "processes are "
483 << minNumGlobalElements << " resp. " << maxNumGlobalElements << ".");
484
492 std::invalid_argument, errorMessagePrefix << "All processes must provide the same indexBase argument. "
493 "Process 0 set indexBase = "
494 << proc0IndexBase << ". The "
495 "calling process "
496 << comm->getRank() << " set indexBase=" << indexBase << ". The min and max values over all "
497 "processes are "
498 << minIndexBase << " resp. " << maxIndexBase << ".");
499
500 // Make sure that the sum of numLocalElements over all processes
501 // equals numGlobalElements.
504 std::invalid_argument,
505 errorMessagePrefix << "The sum of each process' number of "
506 "indices over all processes, "
507 << debugGlobalSum << ", != "
508 << "numGlobalElements=" << numGlobalElements << ". If you "
509 "would like this constructor to compute numGlobalElements "
510 "for you, you may set numGlobalElements="
511 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
512 "on input. Please note that this is NOT necessarily -1.");
513 }
514 return debugGlobalSum;
515}
517template <class LocalOrdinal, class GlobalOrdinal, class Node>
520 const char errorMessagePrefix[],
522 const Kokkos::View<const global_ordinal_type*,
523 Kokkos::LayoutLeft,
524 Kokkos::HostSpace,
525 Kokkos::MemoryUnmanaged>& entryList_host,
527 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
528 Tpetra::Details::ProfilingRegion pr("Map::initWithNonownedHostIndexList()");
529
530 using Kokkos::LayoutLeft;
531 using Kokkos::subview;
532 using Kokkos::View;
533 using Kokkos::view_alloc;
534 using Kokkos::WithoutInitializing;
535 using Teuchos::as;
536 using Teuchos::broadcast;
537 using Teuchos::outArg;
538 using Teuchos::ptr;
539 using Teuchos::REDUCE_MAX;
540 using Teuchos::REDUCE_MIN;
541 using Teuchos::REDUCE_SUM;
542 using Teuchos::reduceAll;
543 using LO = local_ordinal_type;
544 using GO = global_ordinal_type;
545 using GST = global_size_t;
546 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
547 // Make sure that Kokkos has been initialized (Github Issue #513).
548 TEUCHOS_TEST_FOR_EXCEPTION(!Kokkos::is_initialized(), std::runtime_error,
549 errorMessagePrefix << "The Kokkos execution space "
550 << Teuchos::TypeNameTraits<execution_space>::name()
551 << " has not been initialized. "
552 "Please initialize it before creating a Map.")
553
554 // The user has specified the distribution of indices over the
555 // processes, via the input array of global indices on each
556 // process. The distribution is not necessarily contiguous or
557 // equally shared over the processes.
558
559 // The length of the input array on this process is the number of
560 // local indices to associate with this process, even though the
561 // input array contains global indices. We assume that the number
562 // of local indices on a process can be stored in a size_t;
563 // numLocalElements_ is a size_t, so this variable and that should
564 // have the same type.
565 const size_t numLocalElements(entryList_host.size());
566
567 initialNonuniformDebugCheck(errorMessagePrefix, numGlobalElements,
569
570 // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
571 // reduction is redundant, since the directory Map will have to do
572 // the same thing. Thus, we could do the scan and broadcast for
573 // the directory Map here, and give the computed offsets to the
574 // directory Map's constructor. However, a reduction costs less
575 // than a scan and broadcast, so this still saves time if users of
576 // this Map don't ever need the Directory (i.e., if they never
577 // call getRemoteIndexList on this Map).
578 if (numGlobalElements != GSTI) {
579 numGlobalElements_ = numGlobalElements; // Use the user's value.
580 } else { // The user wants us to compute the sum.
581 reduceAll(*comm, REDUCE_SUM,
582 static_cast<GST>(numLocalElements),
583 outArg(numGlobalElements_));
584 }
585
586 // mfh 20 Feb 2013: We've never quite done the right thing for
587 // duplicate GIDs here. Duplicate GIDs have always been counted
588 // distinctly in numLocalElements_, and thus should get a
589 // different LID. However, we've always used std::map or a hash
590 // table for the GID -> LID lookup table, so distinct GIDs always
591 // map to the same LID. Furthermore, the order of the input GID
592 // list matters, so it's not desirable to sort for determining
593 // uniqueness.
594 //
595 // I've chosen for now to write this code as if the input GID list
596 // contains no duplicates. If this is not desired, we could use
597 // the lookup table itself to determine uniqueness: If we haven't
598 // seen the GID before, it gets a new LID and it's added to the
599 // LID -> GID and GID -> LID tables. If we have seen the GID
600 // before, it doesn't get added to either table. I would
601 // implement this, but it would cost more to do the double lookups
602 // in the table (one to check, and one to insert).
603 //
604 // More importantly, since we build the GID -> LID table in (a
605 // thread-) parallel (way), the order in which duplicate GIDs may
606 // get inserted is not defined. This would make the assignment of
607 // LID to GID nondeterministic.
608
609 numLocalElements_ = numLocalElements;
610 indexBase_ = indexBase;
611
612 minMyGID_ = indexBase_;
613 maxMyGID_ = indexBase_;
614
615 // NOTE (mfh 27 May 2015): While finding the initial contiguous
616 // GID range requires looking at all the GIDs in the range,
617 // dismissing an interval of GIDs only requires looking at the
618 // first and last GIDs. Thus, we could do binary search backwards
619 // from the end in order to catch the common case of a contiguous
620 // interval followed by noncontiguous entries. On the other hand,
621 // we could just expose this case explicitly as yet another Map
622 // constructor, and avoid the trouble of detecting it.
623 if (numLocalElements_ > 0) {
624 // Find contiguous GID range, with the restriction that the
625 // beginning of the range starts with the first entry. While
626 // doing so, fill in the LID -> GID table.
627 typename decltype(lgMap_)::non_const_type lgMap(view_alloc("lgMap", WithoutInitializing), numLocalElements_);
628 auto lgMap_host =
629 Kokkos::create_mirror_view(Kokkos::HostSpace(), lgMap);
631 // The input array entryList_host is already on host, so we
632 // don't need to take a host view of it.
633 // auto entryList_host =
634 // Kokkos::create_mirror_view (Kokkos::HostSpace (), entryList);
635 // Kokkos::deep_copy (entryList_host, entryList);
636
637 firstContiguousGID_ = entryList_host[0];
638 lastContiguousGID_ = firstContiguousGID_ + 1;
639
640 // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
641 // anyway, so we have to look at them all. The logical way to
642 // find the first noncontiguous entry would thus be to "reduce,"
643 // where the local reduction result is whether entryList[i] + 1
644 // == entryList[i+1].
645
646 lgMap_host[0] = firstContiguousGID_;
647 size_t i = 1;
648 for (; i < numLocalElements_; ++i) {
649 const GO curGid = entryList_host[i];
650 const LO curLid = as<LO>(i);
651
652 if (lastContiguousGID_ != curGid) break;
653
654 // Add the entry to the LID->GID table only after we know that
655 // the current GID is in the initial contiguous sequence, so
656 // that we don't repeat adding it in the first iteration of
657 // the loop below over the remaining noncontiguous GIDs.
659 ++lastContiguousGID_;
660 }
661 --lastContiguousGID_;
662 // NOTE: i is the first non-contiguous index.
663
664 // [firstContiguousGID_, lastContigousGID_] is the initial
665 // sequence of contiguous GIDs. We can start the min and max
666 // GID using this range.
667 minMyGID_ = firstContiguousGID_;
668 maxMyGID_ = lastContiguousGID_;
669
670 // Compute the GID -> LID lookup table, _not_ including the
671 // initial sequence of contiguous GIDs.
673 {
674 const std::pair<size_t, size_t> ncRange(i, entryList_host.extent(0));
676 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(nonContigGids_host.extent(0)) !=
677 static_cast<size_t>(entryList_host.extent(0) - i),
678 std::logic_error,
679 "Tpetra::Map noncontiguous constructor: "
680 "nonContigGids_host.extent(0) = "
681 << nonContigGids_host.extent(0)
682 << " != entryList_host.extent(0) - i = "
683 << (entryList_host.extent(0) - i) << " = "
684 << entryList_host.extent(0) << " - " << i
685 << ". Please report this bug to the Tpetra developers.");
686
687 // FixedHashTable's constructor expects an owned device View,
688 // so we must deep-copy the subview of the input indices.
691 nonContigGids_host.size());
692
693 // DEEP_COPY REVIEW - HOST-TO-DEVICE
694 Kokkos::deep_copy(execution_space(), nonContigGids, nonContigGids_host);
695 Kokkos::fence("Map::initWithNonownedHostIndexList"); // for UVM issues below - which will be refatored soon so FixedHashTable can build as pure CudaSpace - then I think remove this fence
696
699 // Make host version - when memory spaces match these just do trivial assignment
700 glMapHost_ = global_to_local_table_host_type(glMap_);
701 }
702
703 // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
704 // table above, we have to look at all the (noncontiguous) input
705 // indices anyway. Thus, why not have the constructor compute
706 // and return the min and max?
707
708 for (; i < numLocalElements_; ++i) {
709 const GO curGid = entryList_host[i];
710 const LO curLid = static_cast<LO>(i);
711 lgMap_host[curLid] = curGid; // LID -> GID table
712
713 // While iterating through entryList, we compute its
714 // (process-local) min and max elements.
715 if (curGid < minMyGID_) {
716 minMyGID_ = curGid;
717 }
718 if (curGid > maxMyGID_) {
719 maxMyGID_ = curGid;
720 }
721 }
722
723 // We filled lgMap on host above; now sync back to device.
724 // DEEP_COPY REVIEW - HOST-TO-DEVICE
725 Kokkos::deep_copy(execution_space(), lgMap, lgMap_host);
726
727 // "Commit" the local-to-global lookup table we filled in above.
728 lgMap_ = lgMap;
729 // We've already created this, so use it.
730 lgMapHost_ = lgMap_host;
731
732 } else {
733 minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
734 maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
735 // This insures tests for GIDs in the range
736 // [firstContiguousGID_, lastContiguousGID_] fail for processes
737 // with no local elements.
738 firstContiguousGID_ = indexBase_ + 1;
739 lastContiguousGID_ = indexBase_;
740 // glMap_ was default constructed, so it's already empty.
741 }
742
743 // Compute the min and max of all processes' GIDs. If
744 // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
745 // are both indexBase_. This is wrong, but fixing it would
746 // require either a fancy sparse all-reduce, or a custom reduction
747 // operator that ignores invalid values ("invalid" means
748 // Tpetra::Details::OrdinalTraits<GO>::invalid()).
749 //
750 // Also, while we're at it, use the same all-reduce to figure out
751 // if the Map is distributed. "Distributed" means that there is
752 // at least one process with a number of local elements less than
753 // the number of global elements.
754 //
755 // We're computing the min and max of all processes' GIDs using a
756 // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
757 // and y are signed). (This lets us combine the min and max into
758 // a single all-reduce.) If each process sets localDist=1 if its
759 // number of local elements is strictly less than the number of
760 // global elements, and localDist=0 otherwise, then a MAX
761 // all-reduce on localDist tells us if the Map is distributed (1
762 // if yes, 0 if no). Thus, we can append localDist onto the end
763 // of the data and get the global result from the all-reduce.
764 if (std::numeric_limits<GO>::is_signed) {
765 // Does my process NOT own all the elements?
766 const GO localDist =
767 (as<GST>(numLocalElements_) < numGlobalElements_) ? 1 : 0;
768
769 GO minMaxInput[3];
770 minMaxInput[0] = -minMyGID_;
771 minMaxInput[1] = maxMyGID_;
773
774 GO minMaxOutput[3];
775 minMaxOutput[0] = 0;
776 minMaxOutput[1] = 0;
779 minAllGID_ = -minMaxOutput[0];
780 maxAllGID_ = minMaxOutput[1];
781 const GO globalDist = minMaxOutput[2];
782 distributed_ = (comm_->getSize() > 1 && globalDist == 1);
783 } else { // unsigned; use two reductions
784 // This is always correct, no matter the signedness of GO.
785 reduceAll<int, GO>(*comm_, REDUCE_MIN, minMyGID_, outArg(minAllGID_));
786 reduceAll<int, GO>(*comm_, REDUCE_MAX, maxMyGID_, outArg(maxAllGID_));
787 distributed_ = checkIsDist();
788 }
789
790 contiguous_ = false; // "Contiguous" is conservative.
791
793 minAllGID_ < indexBase_,
794 std::invalid_argument,
795 "Tpetra::Map constructor (noncontiguous): "
796 "Minimum global ID = "
797 << minAllGID_ << " over all process(es) is "
798 "less than the given indexBase = "
799 << indexBase_ << ".");
800
801 // Create the Directory on demand in getRemoteIndexList().
802 // setupDirectory ();
803}
804
805template <class LocalOrdinal, class GlobalOrdinal, class Node>
808 const GlobalOrdinal indexList[],
811 const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
812 : comm_(comm)
813 , uniform_(false)
814 , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
815 using std::endl;
816 const char funcName[] =
817 "Map(gblNumInds,indexList,indexListSize,indexBase,comm)";
818
819 const bool verbose = Details::Behavior::verbose("Map");
820 std::unique_ptr<std::string> prefix;
821 if (verbose) {
822 prefix = Details::createPrefix(
823 comm_.getRawPtr(), "Map", funcName);
824 std::ostringstream os;
825 os << *prefix << "Start" << endl;
826 std::cerr << os.str();
827 }
831 checkMapInputArray("(GST, const GO[], LO, GO, comm)",
832 indexList, static_cast<size_t>(indexListSize),
833 comm.getRawPtr());
834 // Not quite sure if I trust all code to behave correctly if the
835 // pointer is nonnull but the array length is nonzero, so I'll
836 // make sure the raw pointer is null if the length is zero.
837 const GlobalOrdinal* const indsRaw = indexListSize == 0 ? NULL : indexList;
838 Kokkos::View<const GlobalOrdinal*,
839 Kokkos::LayoutLeft,
840 Kokkos::HostSpace,
841 Kokkos::MemoryUnmanaged>
843 initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
844 indexBase, comm);
845 if (verbose) {
846 std::ostringstream os;
847 os << *prefix << "Done" << endl;
848 std::cerr << os.str();
849 }
850}
851
852template <class LocalOrdinal, class GlobalOrdinal, class Node>
855 const Teuchos::ArrayView<const GlobalOrdinal>& entryList,
857 const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
858 : comm_(comm)
859 , uniform_(false)
860 , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
861 using std::endl;
862 const char* funcName = "Map(gblNumInds,entryList(Teuchos::ArrayView),indexBase,comm)";
863
864 const bool verbose = Details::Behavior::verbose("Map");
865 std::unique_ptr<std::string> prefix;
866 if (verbose) {
867 prefix = Details::createPrefix(
868 comm_.getRawPtr(), "Map", funcName);
869 std::ostringstream os;
870 os << *prefix << "Start" << endl;
871 std::cerr << os.str();
872 }
876 const size_t numLclInds = static_cast<size_t>(entryList.size());
877 checkMapInputArray("(GST, ArrayView, GO, comm)",
878 entryList.getRawPtr(), numLclInds,
879 comm.getRawPtr());
880 // Not quite sure if I trust both ArrayView and View to behave
881 // correctly if the pointer is nonnull but the array length is
882 // nonzero, so I'll make sure it's null if the length is zero.
883 const GlobalOrdinal* const indsRaw =
884 numLclInds == 0 ? NULL : entryList.getRawPtr();
885 Kokkos::View<const GlobalOrdinal*,
886 Kokkos::LayoutLeft,
887 Kokkos::HostSpace,
888 Kokkos::MemoryUnmanaged>
890 initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
891 indexBase, comm);
892 if (verbose) {
893 std::ostringstream os;
894 os << *prefix << "Done" << endl;
895 std::cerr << os.str();
896 }
897}
898
899template <class LocalOrdinal, class GlobalOrdinal, class Node>
902 const Kokkos::View<const GlobalOrdinal*, device_type>& entryList,
904 const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
905 : comm_(comm)
906 , uniform_(false)
907 , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
908 using Kokkos::LayoutLeft;
909 using Kokkos::subview;
910 using Kokkos::View;
911 using Kokkos::view_alloc;
912 using Kokkos::WithoutInitializing;
913 using std::endl;
914 using Teuchos::arcp;
915 using Teuchos::ArrayView;
916 using Teuchos::as;
917 using Teuchos::broadcast;
918 using Teuchos::outArg;
919 using Teuchos::ptr;
920 using Teuchos::REDUCE_MAX;
921 using Teuchos::REDUCE_MIN;
922 using Teuchos::REDUCE_SUM;
923 using Teuchos::reduceAll;
924 using Teuchos::typeName;
925 using LO = local_ordinal_type;
926 using GO = global_ordinal_type;
927 using GST = global_size_t;
928 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
929 const char funcName[] =
930 "Map(gblNumInds,entryList(Kokkos::View),indexBase,comm)";
931
932 const bool verbose = Details::Behavior::verbose("Map");
933 std::unique_ptr<std::string> prefix;
934 if (verbose) {
936 comm_.getRawPtr(), "Map", funcName);
937 std::ostringstream os;
938 os << *prefix << "Start" << endl;
939 std::cerr << os.str();
940 }
944 checkMapInputArray("(GST, Kokkos::View, GO, comm)",
945 entryList.data(),
946 static_cast<size_t>(entryList.extent(0)),
947 comm.getRawPtr());
948
949 // The user has specified the distribution of indices over the
950 // processes, via the input array of global indices on each
951 // process. The distribution is not necessarily contiguous or
952 // equally shared over the processes.
953
954 // The length of the input array on this process is the number of
955 // local indices to associate with this process, even though the
956 // input array contains global indices. We assume that the number
957 // of local indices on a process can be stored in a size_t;
958 // numLocalElements_ is a size_t, so this variable and that should
959 // have the same type.
960 const size_t numLocalElements(entryList.size());
961
962 initialNonuniformDebugCheck(funcName, numGlobalElements,
964
965 // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
966 // reduction is redundant, since the directory Map will have to do
967 // the same thing. Thus, we could do the scan and broadcast for
968 // the directory Map here, and give the computed offsets to the
969 // directory Map's constructor. However, a reduction costs less
970 // than a scan and broadcast, so this still saves time if users of
971 // this Map don't ever need the Directory (i.e., if they never
972 // call getRemoteIndexList on this Map).
973 if (numGlobalElements != GSTI) {
974 numGlobalElements_ = numGlobalElements; // Use the user's value.
975 } else { // The user wants us to compute the sum.
976 reduceAll(*comm, REDUCE_SUM,
977 static_cast<GST>(numLocalElements),
978 outArg(numGlobalElements_));
979 }
980
981 // mfh 20 Feb 2013: We've never quite done the right thing for
982 // duplicate GIDs here. Duplicate GIDs have always been counted
983 // distinctly in numLocalElements_, and thus should get a
984 // different LID. However, we've always used std::map or a hash
985 // table for the GID -> LID lookup table, so distinct GIDs always
986 // map to the same LID. Furthermore, the order of the input GID
987 // list matters, so it's not desirable to sort for determining
988 // uniqueness.
989 //
990 // I've chosen for now to write this code as if the input GID list
991 // contains no duplicates. If this is not desired, we could use
992 // the lookup table itself to determine uniqueness: If we haven't
993 // seen the GID before, it gets a new LID and it's added to the
994 // LID -> GID and GID -> LID tables. If we have seen the GID
995 // before, it doesn't get added to either table. I would
996 // implement this, but it would cost more to do the double lookups
997 // in the table (one to check, and one to insert).
998 //
999 // More importantly, since we build the GID -> LID table in (a
1000 // thread-) parallel (way), the order in which duplicate GIDs may
1001 // get inserted is not defined. This would make the assignment of
1002 // LID to GID nondeterministic.
1004 numLocalElements_ = numLocalElements;
1005 indexBase_ = indexBase;
1006
1007 minMyGID_ = indexBase_;
1008 maxMyGID_ = indexBase_;
1009
1010 // NOTE (mfh 27 May 2015): While finding the initial contiguous
1011 // GID range requires looking at all the GIDs in the range,
1012 // dismissing an interval of GIDs only requires looking at the
1013 // first and last GIDs. Thus, we could do binary search backwards
1014 // from the end in order to catch the common case of a contiguous
1015 // interval followed by noncontiguous entries. On the other hand,
1016 // we could just expose this case explicitly as yet another Map
1017 // constructor, and avoid the trouble of detecting it.
1018 if (numLocalElements_ > 0) {
1019 // Find contiguous GID range, with the restriction that the
1020 // beginning of the range starts with the first entry. While
1021 // doing so, fill in the LID -> GID table.
1022 typename decltype(lgMap_)::non_const_type lgMap(view_alloc("lgMap", WithoutInitializing), numLocalElements_);
1023
1024 // Because you can't use lambdas in constructors on CUDA. Or using private/protected data.
1025 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
1026 Kokkos::deep_copy(typename device_type::execution_space(), lgMap, entryList);
1028 computeConstantsOnDevice(entryList, minMyGID_, maxMyGID_, firstContiguousGID_, lastContiguousGID_, lastContiguousGID_loc);
1030 auto nonContigGids = Kokkos::subview(entryList, std::pair<size_t, size_t>(firstNonContiguous_loc, entryList.extent(0)));
1031
1032 // NOTE: We do not fill the glMapHost_ and lgMapHost_ views here. They will be filled lazily later
1035
1036 // "Commit" the local-to-global lookup table we filled in above.
1037 lgMap_ = lgMap;
1038
1039 } else {
1040 minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
1041 maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
1042 // This insures tests for GIDs in the range
1043 // [firstContiguousGID_, lastContiguousGID_] fail for processes
1044 // with no local elements.
1045 firstContiguousGID_ = indexBase_ + 1;
1046 lastContiguousGID_ = indexBase_;
1047 // glMap_ was default constructed, so it's already empty.
1048 }
1049
1050 // Compute the min and max of all processes' GIDs. If
1051 // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
1052 // are both indexBase_. This is wrong, but fixing it would
1053 // require either a fancy sparse all-reduce, or a custom reduction
1054 // operator that ignores invalid values ("invalid" means
1055 // Tpetra::Details::OrdinalTraits<GO>::invalid()).
1056 //
1057 // Also, while we're at it, use the same all-reduce to figure out
1058 // if the Map is distributed. "Distributed" means that there is
1059 // at least one process with a number of local elements less than
1060 // the number of global elements.
1061 //
1062 // We're computing the min and max of all processes' GIDs using a
1063 // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
1064 // and y are signed). (This lets us combine the min and max into
1065 // a single all-reduce.) If each process sets localDist=1 if its
1066 // number of local elements is strictly less than the number of
1067 // global elements, and localDist=0 otherwise, then a MAX
1068 // all-reduce on localDist tells us if the Map is distributed (1
1069 // if yes, 0 if no). Thus, we can append localDist onto the end
1070 // of the data and get the global result from the all-reduce.
1071 if (std::numeric_limits<GO>::is_signed) {
1072 // Does my process NOT own all the elements?
1073 const GO localDist =
1074 (as<GST>(numLocalElements_) < numGlobalElements_) ? 1 : 0;
1075
1076 GO minMaxInput[3];
1077 minMaxInput[0] = -minMyGID_;
1078 minMaxInput[1] = maxMyGID_;
1079 minMaxInput[2] = localDist;
1080
1081 GO minMaxOutput[3];
1082 minMaxOutput[0] = 0;
1083 minMaxOutput[1] = 0;
1084 minMaxOutput[2] = 0;
1085 reduceAll<int, GO>(*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
1086 minAllGID_ = -minMaxOutput[0];
1087 maxAllGID_ = minMaxOutput[1];
1088 const GO globalDist = minMaxOutput[2];
1089 distributed_ = (comm_->getSize() > 1 && globalDist == 1);
1090 } else { // unsigned; use two reductions
1091 // This is always correct, no matter the signedness of GO.
1092 reduceAll<int, GO>(*comm_, REDUCE_MIN, minMyGID_, outArg(minAllGID_));
1093 reduceAll<int, GO>(*comm_, REDUCE_MAX, maxMyGID_, outArg(maxAllGID_));
1094 distributed_ = checkIsDist();
1095 }
1096
1097 contiguous_ = false; // "Contiguous" is conservative.
1098
1099 TEUCHOS_TEST_FOR_EXCEPTION(
1100 minAllGID_ < indexBase_,
1101 std::invalid_argument,
1102 "Tpetra::Map constructor (noncontiguous): "
1103 "Minimum global ID = "
1104 << minAllGID_ << " over all process(es) is "
1105 "less than the given indexBase = "
1106 << indexBase_ << ".");
1107
1108 // Create the Directory on demand in getRemoteIndexList().
1109 // setupDirectory ();
1110
1111 if (verbose) {
1112 std::ostringstream os;
1113 os << *prefix << "Done" << endl;
1114 std::cerr << os.str();
1115 }
1116}
1117
1118template <class LocalOrdinal, class GlobalOrdinal, class Node>
1120 if (!Kokkos::is_initialized()) {
1121 std::ostringstream os;
1122 os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1123 "Kokkos::finalize() has been called. This is user error! There are "
1124 "two likely causes: "
1125 << std::endl
1126 << " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1127 << std::endl
1128 << " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1129 "of a Tpetra::Map) at the same scope in main() as Kokkos::finalize() "
1130 "or Tpetra::finalize()."
1131 << std::endl
1132 << std::endl
1133 << "Don't do either of these! Please refer to GitHib Issue #2372."
1134 << std::endl;
1135 ::Tpetra::Details::printOnce(std::cerr, os.str(),
1136 this->getComm().getRawPtr());
1137 } else {
1138 using ::Tpetra::Details::mpiIsFinalized;
1139 using ::Tpetra::Details::mpiIsInitialized;
1140 using ::Tpetra::Details::teuchosCommIsAnMpiComm;
1141
1142 Teuchos::RCP<const Teuchos::Comm<int>> comm = this->getComm();
1143 if (!comm.is_null() && teuchosCommIsAnMpiComm(*comm) &&
1144 mpiIsInitialized() && mpiIsFinalized()) {
1145 // Tpetra itself does not require MPI, even if building with
1146 // MPI. It is legal to create Tpetra objects that do not use
1147 // MPI, even in an MPI program. However, calling Tpetra stuff
1148 // after MPI_Finalize() has been called is a bad idea, since
1149 // some Tpetra defaults may use MPI if available.
1150 std::ostringstream os;
1151 os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1152 "MPI_Finalize() has been called. This is user error! There are "
1153 "two likely causes: "
1154 << std::endl
1155 << " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1156 << std::endl
1157 << " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1158 "of a Tpetra::Map) at the same scope in main() as MPI_finalize() or "
1159 "Tpetra::finalize()."
1160 << std::endl
1161 << std::endl
1162 << "Don't do either of these! Please refer to GitHib Issue #2372."
1163 << std::endl;
1164 ::Tpetra::Details::printOnce(std::cerr, os.str(), comm.getRawPtr());
1165 }
1166 }
1167 // mfh 20 Mar 2018: We can't check Tpetra::isInitialized() yet,
1168 // because Tpetra does not yet require Tpetra::initialize /
1169 // Tpetra::finalize.
1170}
1171
1172template <class LocalOrdinal, class GlobalOrdinal, class Node>
1175 getComm().is_null(), std::logic_error,
1176 "Tpetra::Map::isOneToOne: "
1177 "getComm() returns null. Please report this bug to the Tpetra "
1178 "developers.");
1179
1180 // This is a collective operation, if it hasn't been called before.
1181 setupDirectory();
1182 return directory_->isOneToOne(*this);
1183}
1184
1185template <class LocalOrdinal, class GlobalOrdinal, class Node>
1189 if (isContiguous()) {
1190 if (globalIndex < getMinGlobalIndex() ||
1191 globalIndex > getMaxGlobalIndex()) {
1192 return Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid();
1193 }
1194 return static_cast<LocalOrdinal>(globalIndex - getMinGlobalIndex());
1195 } else if (globalIndex >= firstContiguousGID_ &&
1196 globalIndex <= lastContiguousGID_) {
1197 return static_cast<LocalOrdinal>(globalIndex - firstContiguousGID_);
1198 } else {
1199 // If the given global index is not in the table, this returns
1200 // the same value as OrdinalTraits<LocalOrdinal>::invalid().
1201 // glMapHost_ is Host and does not assume UVM
1202 lazyPushToHost();
1203 return glMapHost_.get(globalIndex);
1204 }
1205}
1206
1207template <class LocalOrdinal, class GlobalOrdinal, class Node>
1211 if (localIndex < getMinLocalIndex() || localIndex > getMaxLocalIndex()) {
1212 return Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid();
1213 }
1214 if (isContiguous()) {
1215 return getMinGlobalIndex() + localIndex;
1216 } else {
1217 // This is a host Kokkos::View access, with no RCP or ArrayRCP
1218 // involvement. As a result, it is thread safe.
1219 //
1220 // lgMapHost_ is a host pointer; this does NOT assume UVM.
1221 lazyPushToHost();
1222 return lgMapHost_[localIndex];
1223 }
1224}
1225
1226template <class LocalOrdinal, class GlobalOrdinal, class Node>
1229 if (localIndex < getMinLocalIndex() || localIndex > getMaxLocalIndex()) {
1230 return false;
1231 } else {
1232 return true;
1233 }
1234}
1235
1236template <class LocalOrdinal, class GlobalOrdinal, class Node>
1239 return this->getLocalElement(globalIndex) !=
1240 Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid();
1241}
1242
1243template <class LocalOrdinal, class GlobalOrdinal, class Node>
1245 return uniform_;
1246}
1247
1248template <class LocalOrdinal, class GlobalOrdinal, class Node>
1250 return contiguous_;
1251}
1252
1253template <class LocalOrdinal, class GlobalOrdinal, class Node>
1256 getLocalMap() const {
1257 return local_map_type(glMap_, lgMap_, getIndexBase(),
1258 getMinGlobalIndex(), getMaxGlobalIndex(),
1259 firstContiguousGID_, lastContiguousGID_,
1260 getLocalNumElements(), isContiguous());
1261}
1262
1263template <class LocalOrdinal, class GlobalOrdinal, class Node>
1266 using Teuchos::outArg;
1267 using Teuchos::REDUCE_MIN;
1268 using Teuchos::reduceAll;
1269 //
1270 // Tests that avoid the Boolean all-reduce below by using
1271 // globally consistent quantities.
1272 //
1273 if (this == &map) {
1274 // Pointer equality on one process always implies pointer
1275 // equality on all processes, since Map is immutable.
1276 return true;
1277 } else if (getComm()->getSize() != map.getComm()->getSize()) {
1278 // The two communicators have different numbers of processes.
1279 // It's not correct to call isCompatible() in that case. This
1280 // may result in the all-reduce hanging below.
1281 return false;
1282 } else if (getGlobalNumElements() != map.getGlobalNumElements()) {
1283 // Two Maps are definitely NOT compatible if they have different
1284 // global numbers of indices.
1285 return false;
1286 } else if (isContiguous() && isUniform() &&
1287 map.isContiguous() && map.isUniform()) {
1288 // Contiguous uniform Maps with the same number of processes in
1289 // their communicators, and with the same global numbers of
1290 // indices, are always compatible.
1291 return true;
1292 } else if (!isContiguous() && !map.isContiguous() &&
1293 lgMap_.extent(0) != 0 && map.lgMap_.extent(0) != 0 &&
1294 lgMap_.data() == map.lgMap_.data()) {
1295 // Noncontiguous Maps whose global index lists are nonempty and
1296 // have the same pointer must be the same (and therefore
1297 // contiguous).
1298 //
1299 // Nonempty is important. For example, consider a communicator
1300 // with two processes, and two Maps that share this
1301 // communicator, with zero global indices on the first process,
1302 // and different nonzero numbers of global indices on the second
1303 // process. In that case, on the first process, the pointers
1304 // would both be NULL.
1305 return true;
1306 }
1307
1309 getGlobalNumElements() != map.getGlobalNumElements(), std::logic_error,
1310 "Tpetra::Map::isCompatible: There's a bug in this method. We've already "
1311 "checked that this condition is true above, but it's false here. "
1312 "Please report this bug to the Tpetra developers.");
1313
1314 // Do both Maps have the same number of indices on each process?
1315 const int locallyCompat =
1316 (getLocalNumElements() == map.getLocalNumElements()) ? 1 : 0;
1317
1318 int globallyCompat = 0;
1320 return (globallyCompat == 1);
1321}
1322
1323template <class LocalOrdinal, class GlobalOrdinal, class Node>
1326 using Teuchos::ArrayView;
1327 using GO = global_ordinal_type;
1328 using size_type = typename ArrayView<const GO>::size_type;
1329
1330 // If both Maps are contiguous, we can compare their GID ranges
1331 // easily by looking at the min and max GID on this process.
1332 // Otherwise, we'll compare their GID lists. If only one Map is
1333 // contiguous, then we only have to call getLocalElementList() on
1334 // the noncontiguous Map. (It's best to avoid calling it on a
1335 // contiguous Map, since it results in unnecessary storage that
1336 // persists for the lifetime of the Map.)
1337
1338 if (this == &map) {
1339 // Pointer equality on one process always implies pointer
1340 // equality on all processes, since Map is immutable.
1341 return true;
1342 } else if (getLocalNumElements() != map.getLocalNumElements()) {
1343 return false;
1344 } else if (getMinGlobalIndex() != map.getMinGlobalIndex() ||
1345 getMaxGlobalIndex() != map.getMaxGlobalIndex()) {
1346 return false;
1347 } else {
1348 if (isContiguous()) {
1349 if (map.isContiguous()) {
1350 return true; // min and max match, so the ranges match.
1351 } else { // *this is contiguous, but map is not contiguous
1353 !this->isContiguous() || map.isContiguous(), std::logic_error,
1354 "Tpetra::Map::locallySameAs: BUG");
1355 ArrayView<const GO> rhsElts = map.getLocalElementList();
1356 const GO minLhsGid = this->getMinGlobalIndex();
1357 const size_type numRhsElts = rhsElts.size();
1358 for (size_type k = 0; k < numRhsElts; ++k) {
1359 const GO curLhsGid = minLhsGid + static_cast<GO>(k);
1360 if (curLhsGid != rhsElts[k]) {
1361 return false; // stop on first mismatch
1362 }
1363 }
1364 return true;
1365 }
1366 } else if (map.isContiguous()) { // *this is not contiguous, but map is
1368 this->isContiguous() || !map.isContiguous(), std::logic_error,
1369 "Tpetra::Map::locallySameAs: BUG");
1370 ArrayView<const GO> lhsElts = this->getLocalElementList();
1371 const GO minRhsGid = map.getMinGlobalIndex();
1372 const size_type numLhsElts = lhsElts.size();
1373 for (size_type k = 0; k < numLhsElts; ++k) {
1374 const GO curRhsGid = minRhsGid + static_cast<GO>(k);
1375 if (curRhsGid != lhsElts[k]) {
1376 return false; // stop on first mismatch
1377 }
1378 }
1379 return true;
1380 } else if (this->lgMap_.data() == map.lgMap_.data()) {
1381 // Pointers to LID->GID "map" (actually just an array) are the
1382 // same, and the number of GIDs are the same.
1383 return this->getLocalNumElements() == map.getLocalNumElements();
1384 } else { // we actually have to compare the GIDs
1385 if (this->getLocalNumElements() != map.getLocalNumElements()) {
1386 return false; // We already checked above, but check just in case
1387 } else {
1388 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename node_type::execution_space>;
1389
1390 auto lhsLclMap = getLocalMap();
1391 auto rhsLclMap = map.getLocalMap();
1392
1394 Kokkos::parallel_reduce(
1395 "Tpetra::Map::locallySameAs",
1396 range_type(0, this->getLocalNumElements()),
1398 if (lhsLclMap.getGlobalElement(lid) != rhsLclMap.getGlobalElement(lid))
1399 ++numMismatches;
1400 },
1402
1403 return (numMismatchedElements == 0);
1404 }
1405 }
1406 }
1407}
1408
1409template <class LocalOrdinal, class GlobalOrdinal, class Node>
1412 if (this == &map)
1413 return true;
1414
1415 // We are going to check if lmap1 is fitted into lmap2:
1416 // Is lmap1 (map) a subset of lmap2 (this)?
1417 // And do the first lmap1.getLocalNumElements() global elements
1418 // of lmap1,lmap2 owned on each process exactly match?
1419 auto lmap1 = map.getLocalMap();
1420 auto lmap2 = this->getLocalMap();
1421
1422 auto numLocalElements1 = lmap1.getLocalNumElements();
1423 auto numLocalElements2 = lmap2.getLocalNumElements();
1424
1426 // There are more indices in the first map on this process than in second map.
1427 return false;
1428 }
1429
1430 if (lmap1.isContiguous() && lmap2.isContiguous()) {
1431 // When both Maps are contiguous, just check the interval inclusion.
1432 return ((lmap1.getMinGlobalIndex() == lmap2.getMinGlobalIndex()) &&
1433 (lmap1.getMaxGlobalIndex() <= lmap2.getMaxGlobalIndex()));
1434 }
1435
1436 if (lmap1.getMinGlobalIndex() < lmap2.getMinGlobalIndex() ||
1437 lmap1.getMaxGlobalIndex() > lmap2.getMaxGlobalIndex()) {
1438 // The second map does not include the first map bounds, and thus some of
1439 // the first map global indices are not in the second map.
1440 return false;
1441 }
1442
1443 using LO = local_ordinal_type;
1444 using range_type =
1445 Kokkos::RangePolicy<LO, typename node_type::execution_space>;
1446
1447 // Check all elements.
1448 LO numDiff = 0;
1449 Kokkos::parallel_reduce(
1450 "isLocallyFitted",
1451 range_type(0, numLocalElements1),
1452 KOKKOS_LAMBDA(const LO i, LO& diff) {
1453 diff += (lmap1.getGlobalElement(i) != lmap2.getGlobalElement(i));
1454 },
1455 numDiff);
1456
1457 return (numDiff == 0);
1458}
1459
1460template <class LocalOrdinal, class GlobalOrdinal, class Node>
1463 using Teuchos::outArg;
1464 using Teuchos::REDUCE_MIN;
1465 using Teuchos::reduceAll;
1466 //
1467 // Tests that avoid the Boolean all-reduce below by using
1468 // globally consistent quantities.
1469 //
1470 if (this == &map) {
1471 // Pointer equality on one process always implies pointer
1472 // equality on all processes, since Map is immutable.
1473 return true;
1474 } else if (getComm()->getSize() != map.getComm()->getSize()) {
1475 // The two communicators have different numbers of processes.
1476 // It's not correct to call isSameAs() in that case. This
1477 // may result in the all-reduce hanging below.
1478 return false;
1479 } else if (getGlobalNumElements() != map.getGlobalNumElements()) {
1480 // Two Maps are definitely NOT the same if they have different
1481 // global numbers of indices.
1482 return false;
1483 } else if (getMinAllGlobalIndex() != map.getMinAllGlobalIndex() ||
1484 getMaxAllGlobalIndex() != map.getMaxAllGlobalIndex() ||
1485 getIndexBase() != map.getIndexBase()) {
1486 // If the global min or max global index doesn't match, or if
1487 // the index base doesn't match, then the Maps aren't the same.
1488 return false;
1489 } else if (isDistributed() != map.isDistributed()) {
1490 // One Map is distributed and the other is not, which means that
1491 // the Maps aren't the same.
1492 return false;
1493 } else if (isContiguous() && isUniform() &&
1494 map.isContiguous() && map.isUniform()) {
1495 // Contiguous uniform Maps with the same number of processes in
1496 // their communicators, with the same global numbers of indices,
1497 // and with matching index bases and ranges, must be the same.
1498 return true;
1499 }
1500
1501 // The two communicators must have the same number of processes,
1502 // with process ranks occurring in the same order. This uses
1503 // MPI_COMM_COMPARE. The MPI 3.1 standard (Section 6.4) says:
1504 // "Operations that access communicators are local and their
1505 // execution does not require interprocess communication."
1506 // However, just to be sure, I'll put this call after the above
1507 // tests that don't communicate.
1508 if (!::Tpetra::Details::congruent(*comm_, *(map.getComm()))) {
1509 return false;
1510 }
1511
1512 // If we get this far, we need to check local properties and then
1513 // communicate local sameness across all processes.
1514 const int isSame_lcl = locallySameAs(map) ? 1 : 0;
1515
1516 // Return true if and only if all processes report local sameness.
1517 int isSame_gbl = 0;
1519 return isSame_gbl == 1;
1520}
1521
1522namespace { // (anonymous)
1523template <class LO, class GO, class DT>
1524class FillLgMap {
1525 public:
1526 FillLgMap(const Kokkos::View<GO*, DT>& lgMap,
1527 const GO startGid)
1528 : lgMap_(lgMap)
1529 , startGid_(startGid) {
1530 Kokkos::RangePolicy<LO, typename DT::execution_space>
1531 range(static_cast<LO>(0), static_cast<LO>(lgMap.size()));
1532 Kokkos::parallel_for(range, *this);
1533 }
1534
1535 KOKKOS_INLINE_FUNCTION void operator()(const LO& lid) const {
1536 lgMap_(lid) = startGid_ + static_cast<GO>(lid);
1537 }
1538
1539 private:
1540 const Kokkos::View<GO*, DT> lgMap_;
1541 const GO startGid_;
1542};
1543
1544} // namespace
1545
1546template <class LocalOrdinal, class GlobalOrdinal, class Node>
1547typename Map<LocalOrdinal, GlobalOrdinal, Node>::global_indices_array_type
1549 using std::endl;
1550 using LO = local_ordinal_type;
1551 using GO = global_ordinal_type;
1552 using const_lg_view_type = decltype(lgMap_);
1553 using lg_view_type = typename const_lg_view_type::non_const_type;
1554 const bool debug = Details::Behavior::debug("Map");
1555 const bool verbose = Details::Behavior::verbose("Map");
1556
1557 std::unique_ptr<std::string> prefix;
1558 if (verbose) {
1560 comm_.getRawPtr(), "Map", "getMyGlobalIndices");
1561 std::ostringstream os;
1562 os << *prefix << "Start" << endl;
1563 std::cerr << os.str();
1564 }
1565
1566 // If the local-to-global mapping doesn't exist yet, and if we
1567 // have local entries, then create and fill the local-to-global
1568 // mapping.
1570 lgMap_.extent(0) == 0 && numLocalElements_ > 0;
1571
1573 if (verbose) {
1574 std::ostringstream os;
1575 os << *prefix << "Need to create lgMap" << endl;
1576 std::cerr << os.str();
1577 }
1578 if (debug) {
1579 // The local-to-global mapping should have been set up already
1580 // for a noncontiguous map.
1581 TEUCHOS_TEST_FOR_EXCEPTION(!isContiguous(), std::logic_error,
1582 "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1583 "mapping (lgMap_) should have been set up already for a "
1584 "noncontiguous Map. Please report this bug to the Tpetra "
1585 "developers.");
1586 }
1587 const LO numElts = static_cast<LO>(getLocalNumElements());
1588
1589 using Kokkos::view_alloc;
1590 using Kokkos::WithoutInitializing;
1591 lg_view_type lgMap("lgMap", numElts);
1592 if (verbose) {
1593 std::ostringstream os;
1594 os << *prefix << "Fill lgMap" << endl;
1595 std::cerr << os.str();
1596 }
1598
1599 if (verbose) {
1600 std::ostringstream os;
1601 os << *prefix << "Copy lgMap to lgMapHost" << endl;
1602 std::cerr << os.str();
1603 }
1604
1605 auto lgMapHost = Kokkos::create_mirror_view(Kokkos::HostSpace(), lgMap);
1606 // DEEP_COPY REVIEW - DEVICE-TO-HOST
1608 Kokkos::deep_copy(exec_instance, lgMapHost, lgMap);
1609
1610 // There's a non-trivial chance we'll grab this on the host,
1611 // so let's make sure the copy finishes
1612 exec_instance.fence();
1613
1614 // "Commit" the local-to-global lookup table we filled in above.
1615 lgMap_ = lgMap;
1616 lgMapHost_ = lgMapHost;
1617 } else {
1618 lazyPushToHost();
1619 }
1620
1621 if (verbose) {
1622 std::ostringstream os;
1623 os << *prefix << "Done" << endl;
1624 std::cerr << os.str();
1625 }
1626 return lgMapHost_;
1627}
1628
1629template <class LocalOrdinal, class GlobalOrdinal, class Node>
1630typename Map<LocalOrdinal, GlobalOrdinal, Node>::global_indices_array_device_type
1632 using std::endl;
1633 using LO = local_ordinal_type;
1634 using GO = global_ordinal_type;
1635 using const_lg_view_type = decltype(lgMap_);
1636 using lg_view_type = typename const_lg_view_type::non_const_type;
1637 const bool debug = Details::Behavior::debug("Map");
1638 const bool verbose = Details::Behavior::verbose("Map");
1639
1640 std::unique_ptr<std::string> prefix;
1641 if (verbose) {
1643 comm_.getRawPtr(), "Map", "getMyGlobalIndicesDevice");
1644 std::ostringstream os;
1645 os << *prefix << "Start" << endl;
1646 std::cerr << os.str();
1647 }
1648
1649 // If the local-to-global mapping doesn't exist yet, and if we
1650 // have local entries, then create and fill the local-to-global
1651 // mapping.
1653 lgMap_.extent(0) == 0 && numLocalElements_ > 0;
1654
1656 if (verbose) {
1657 std::ostringstream os;
1658 os << *prefix << "Need to create lgMap" << endl;
1659 std::cerr << os.str();
1660 }
1661 if (debug) {
1662 // The local-to-global mapping should have been set up already
1663 // for a noncontiguous map.
1664 TEUCHOS_TEST_FOR_EXCEPTION(!isContiguous(), std::logic_error,
1665 "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1666 "mapping (lgMap_) should have been set up already for a "
1667 "noncontiguous Map. Please report this bug to the Tpetra "
1668 "developers.");
1669 }
1670 const LO numElts = static_cast<LO>(getLocalNumElements());
1671
1672 using Kokkos::view_alloc;
1673 using Kokkos::WithoutInitializing;
1674 lg_view_type lgMap("lgMap", numElts);
1675 if (verbose) {
1676 std::ostringstream os;
1677 os << *prefix << "Fill lgMap" << endl;
1678 std::cerr << os.str();
1679 }
1681
1682 // "Commit" the local-to-global lookup table we filled in above.
1683 lgMap_ = lgMap;
1684 }
1685
1686 if (verbose) {
1687 std::ostringstream os;
1688 os << *prefix << "Done" << endl;
1689 std::cerr << os.str();
1690 }
1691 return lgMap_;
1692}
1693
1694template <class LocalOrdinal, class GlobalOrdinal, class Node>
1695Teuchos::ArrayView<const GlobalOrdinal>
1697 using GO = global_ordinal_type;
1698
1699 // If the local-to-global mapping doesn't exist yet, and if we
1700 // have local entries, then create and fill the local-to-global
1701 // mapping.
1702 (void)this->getMyGlobalIndices();
1703
1704 // This does NOT assume UVM; lgMapHost_ is a host pointer.
1705 lazyPushToHost();
1706 const GO* lgMapHostRawPtr = lgMapHost_.data();
1707 // The third argument forces ArrayView not to try to track memory
1708 // in a debug build. We have to use it because the memory does
1709 // not belong to a Teuchos memory management class.
1710 return Teuchos::ArrayView<const GO>(
1712 lgMapHost_.extent(0),
1713 Teuchos::RCP_DISABLE_NODE_LOOKUP);
1714}
1715
1716template <class LocalOrdinal, class GlobalOrdinal, class Node>
1718 return distributed_;
1719}
1720
1721template <class LocalOrdinal, class GlobalOrdinal, class Node>
1723 using Teuchos::TypeNameTraits;
1724 std::ostringstream os;
1725
1726 os << "Tpetra::Map: {"
1727 << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name()
1728 << ", GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name()
1729 << ", NodeType: " << TypeNameTraits<Node>::name();
1730 if (this->getObjectLabel() != "") {
1731 os << ", Label: \"" << this->getObjectLabel() << "\"";
1732 }
1733 os << ", Global number of entries: " << getGlobalNumElements()
1734 << ", Number of processes: " << getComm()->getSize()
1735 << ", Uniform: " << (isUniform() ? "true" : "false")
1736 << ", Contiguous: " << (isContiguous() ? "true" : "false")
1737 << ", Distributed: " << (isDistributed() ? "true" : "false")
1738 << "}";
1739 return os.str();
1740}
1741
1746template <class LocalOrdinal, class GlobalOrdinal, class Node>
1747std::string
1749 localDescribeToString(const Teuchos::EVerbosityLevel vl) const {
1750 using LO = local_ordinal_type;
1751 using std::endl;
1752
1753 // This preserves current behavior of Map.
1754 if (vl < Teuchos::VERB_HIGH) {
1755 return std::string();
1756 }
1757 auto outStringP = Teuchos::rcp(new std::ostringstream());
1758 Teuchos::RCP<Teuchos::FancyOStream> outp =
1759 Teuchos::getFancyOStream(outStringP);
1760 Teuchos::FancyOStream& out = *outp;
1761
1762 auto comm = this->getComm();
1763 const int myRank = comm->getRank();
1764 const int numProcs = comm->getSize();
1765 out << "Process " << myRank << " of " << numProcs << ":" << endl;
1766 Teuchos::OSTab tab1(out);
1767
1768 const LO numEnt = static_cast<LO>(this->getLocalNumElements());
1769 out << "My number of entries: " << numEnt << endl
1770 << "My minimum global index: " << this->getMinGlobalIndex() << endl
1771 << "My maximum global index: " << this->getMaxGlobalIndex() << endl;
1772
1773 if (vl == Teuchos::VERB_EXTREME) {
1774 out << "My global indices: [";
1775 const LO minLclInd = this->getMinLocalIndex();
1776 for (LO k = 0; k < numEnt; ++k) {
1777 out << minLclInd + this->getGlobalElement(k);
1778 if (k + 1 < numEnt) {
1779 out << ", ";
1780 }
1781 }
1782 out << "]" << endl;
1783 }
1784
1785 out.flush(); // make sure the ostringstream got everything
1786 return outStringP->str();
1787}
1788
1789template <class LocalOrdinal, class GlobalOrdinal, class Node>
1791 describe(Teuchos::FancyOStream& out,
1792 const Teuchos::EVerbosityLevel verbLevel) const {
1793 using std::endl;
1794 using Teuchos::TypeNameTraits;
1795 using Teuchos::VERB_DEFAULT;
1796 using Teuchos::VERB_HIGH;
1797 using Teuchos::VERB_LOW;
1798 using Teuchos::VERB_NONE;
1799 using LO = local_ordinal_type;
1800 using GO = global_ordinal_type;
1801 const Teuchos::EVerbosityLevel vl =
1803
1804 if (vl == VERB_NONE) {
1805 return; // don't print anything
1806 }
1807 // If this Map's Comm is null, then the Map does not participate
1808 // in collective operations with the other processes. In that
1809 // case, it is not even legal to call this method. The reasonable
1810 // thing to do in that case is nothing.
1811 auto comm = this->getComm();
1812 if (comm.is_null()) {
1813 return;
1814 }
1815 const int myRank = comm->getRank();
1816 const int numProcs = comm->getSize();
1817
1818 // Only Process 0 should touch the output stream, but this method
1819 // in general may need to do communication. Thus, we may need to
1820 // preserve the current tab level across multiple "if (myRank ==
1821 // 0) { ... }" inner scopes. This is why we sometimes create
1822 // OSTab instances by pointer, instead of by value. We only need
1823 // to create them by pointer if the tab level must persist through
1824 // multiple inner scopes.
1825 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
1826
1827 if (myRank == 0) {
1828 // At every verbosity level but VERB_NONE, Process 0 prints.
1829 // By convention, describe() always begins with a tab before
1830 // printing.
1831 tab0 = Teuchos::rcp(new Teuchos::OSTab(out));
1832 out << "\"Tpetra::Map\":" << endl;
1833 tab1 = Teuchos::rcp(new Teuchos::OSTab(out));
1834 {
1835 out << "Template parameters:" << endl;
1836 Teuchos::OSTab tab2(out);
1837 out << "LocalOrdinal: " << TypeNameTraits<LO>::name() << endl
1838 << "GlobalOrdinal: " << TypeNameTraits<GO>::name() << endl
1839 << "Node: " << TypeNameTraits<Node>::name() << endl;
1840 }
1841 const std::string label = this->getObjectLabel();
1842 if (label != "") {
1843 out << "Label: \"" << label << "\"" << endl;
1844 }
1845 out << "Global number of entries: " << getGlobalNumElements() << endl
1846 << "Minimum global index: " << getMinAllGlobalIndex() << endl
1847 << "Maximum global index: " << getMaxAllGlobalIndex() << endl
1848 << "Index base: " << getIndexBase() << endl
1849 << "Number of processes: " << numProcs << endl
1850 << "Uniform: " << (isUniform() ? "true" : "false") << endl
1851 << "Contiguous: " << (isContiguous() ? "true" : "false") << endl
1852 << "Distributed: " << (isDistributed() ? "true" : "false") << endl;
1853 }
1854
1855 // This is collective over the Map's communicator.
1856 if (vl >= VERB_HIGH) { // VERB_HIGH or VERB_EXTREME
1857 const std::string lclStr = this->localDescribeToString(vl);
1859 }
1860}
1861
1862template <class LocalOrdinal, class GlobalOrdinal, class Node>
1863Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>>
1865 replaceCommWithSubset(const Teuchos::RCP<const Teuchos::Comm<int>>& newComm) const {
1866 using Teuchos::RCP;
1867 using Teuchos::rcp;
1868 using GST = global_size_t;
1869 using LO = local_ordinal_type;
1870 using GO = global_ordinal_type;
1871 using map_type = Map<LO, GO, Node>;
1872
1873 // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
1874 // the Map by calling its ordinary public constructor, using the
1875 // original Map's data. This only involves O(1) all-reduces over
1876 // the new communicator, which in the common case only includes a
1877 // small number of processes.
1878
1879 // Create the Map to return.
1880 if (newComm.is_null() || newComm->getSize() < 1) {
1881 return Teuchos::null; // my process does not participate in the new Map
1882 } else if (newComm->getSize() == 1) {
1883 lazyPushToHost();
1884
1885 // The case where the new communicator has only one process is
1886 // easy. We don't have to communicate to get all the
1887 // information we need. Use the default comm to create the new
1888 // Map, then fill in all the fields directly.
1889 RCP<map_type> newMap(new map_type());
1890
1891 newMap->comm_ = newComm;
1892 // mfh 07 Oct 2016: Preserve original behavior, even though the
1893 // original index base may no longer be the globally min global
1894 // index. See #616 for why this doesn't matter so much anymore.
1895 newMap->indexBase_ = this->indexBase_;
1896 newMap->numGlobalElements_ = this->numLocalElements_;
1897 newMap->numLocalElements_ = this->numLocalElements_;
1898 newMap->minMyGID_ = this->minMyGID_;
1899 newMap->maxMyGID_ = this->maxMyGID_;
1900 newMap->minAllGID_ = this->minMyGID_;
1901 newMap->maxAllGID_ = this->maxMyGID_;
1902 newMap->firstContiguousGID_ = this->firstContiguousGID_;
1903 newMap->lastContiguousGID_ = this->lastContiguousGID_;
1904 // Since the new communicator has only one process, neither
1905 // uniformity nor contiguity have changed.
1906 newMap->uniform_ = this->uniform_;
1907 newMap->contiguous_ = this->contiguous_;
1908 // The new communicator only has one process, so the new Map is
1909 // not distributed.
1910 newMap->distributed_ = false;
1911 newMap->lgMap_ = this->lgMap_;
1912 newMap->lgMapHost_ = this->lgMapHost_;
1913 newMap->glMap_ = this->glMap_;
1914 newMap->glMapHost_ = this->glMapHost_;
1915 // It's OK not to initialize the new Map's Directory.
1916 // This is initialized lazily, on first call to getRemoteIndexList.
1917
1918 return newMap;
1919 } else { // newComm->getSize() != 1
1920 // Even if the original Map is contiguous, the new Map might not
1921 // be, especially if the excluded processes have ranks != 0 or
1922 // newComm->getSize()-1. The common case for this method is to
1923 // exclude many (possibly even all but one) processes, so it
1924 // likely doesn't pay to do the global communication (over the
1925 // original communicator) to figure out whether we can optimize
1926 // the result Map. Thus, we just set up the result Map as
1927 // noncontiguous.
1928 //
1929 // TODO (mfh 07 Oct 2016) We don't actually need to reconstruct
1930 // the global-to-local table, etc. Optimize this code path to
1931 // avoid unnecessary local work.
1932
1933 // Make Map (re)compute the global number of elements.
1934 const GST RECOMPUTE = Tpetra::Details::OrdinalTraits<GST>::invalid();
1935 // TODO (mfh 07 Oct 2016) If we use any Map constructor, we have
1936 // to use the noncontiguous Map constructor, since the new Map
1937 // might not be contiguous. Even if the old Map was contiguous,
1938 // some process in the "middle" might have been excluded. If we
1939 // want to avoid local work, we either have to do the setup by
1940 // hand, or write a new Map constructor.
1941#if 1
1942 // The disabled code here throws the following exception in
1943 // Map's replaceCommWithSubset test:
1944 //
1945 // Throw test that evaluated to true: static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::ArithTraits<ValueType>::max ())
1946 // 10:
1947 // 10: Tpetra::Details::FixedHashTable: The number of keys -3 is greater than the maximum representable ValueType value 2147483647. This means that it is not possible to use this constructor.
1948 // 10: Process 3: origComm->replaceCommWithSubset(subsetComm) threw an exception: /scratch/prj/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Details_FixedHashTable_def.hpp:1044:
1949
1950 auto lgMap = this->getMyGlobalIndices();
1951 using size_type =
1952 typename std::decay<decltype(lgMap.extent(0))>::type;
1953 const size_type lclNumInds =
1954 static_cast<size_type>(this->getLocalNumElements());
1955 using Teuchos::TypeNameTraits;
1956 TEUCHOS_TEST_FOR_EXCEPTION(lgMap.extent(0) != lclNumInds, std::logic_error,
1957 "Tpetra::Map::replaceCommWithSubset: Result of getMyGlobalIndices() "
1958 "has length "
1959 << lgMap.extent(0) << " (of type " << TypeNameTraits<size_type>::name() << ") != this->getLocalNumElements()"
1960 " = "
1961 << this->getLocalNumElements() << ". The latter, upon being "
1962 "cast to size_type = "
1964 "becomes "
1965 << lclNumInds << ". Please report this bug to the Tpetra "
1966 "developers.");
1967#else
1968 Teuchos::ArrayView<const GO> lgMap = this->getLocalElementList();
1969#endif // 1
1970
1971 const GO indexBase = this->getIndexBase();
1972 // map stores HostSpace of CudaSpace but constructor is still CudaUVMSpace
1973 auto lgMap_device = Kokkos::create_mirror_view_and_copy(device_type(), lgMap);
1974 return rcp(new map_type(RECOMPUTE, lgMap_device, indexBase, newComm));
1975 }
1976}
1977
1978template <class LocalOrdinal, class GlobalOrdinal, class Node>
1979Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>>
1981 removeEmptyProcesses() const {
1982 using Teuchos::Comm;
1983 using Teuchos::null;
1984 using Teuchos::outArg;
1985 using Teuchos::RCP;
1986 using Teuchos::rcp;
1987 using Teuchos::REDUCE_MIN;
1988 using Teuchos::reduceAll;
1989
1990 // Create the new communicator. split() returns a valid
1991 // communicator on all processes. On processes where color == 0,
1992 // ignore the result. Passing key == 0 tells MPI to order the
1993 // processes in the new communicator by their rank in the old
1994 // communicator.
1995 const int color = (numLocalElements_ == 0) ? 0 : 1;
1996 // MPI_Comm_split must be called collectively over the original
1997 // communicator. We can't just call it on processes with color
1998 // one, even though we will ignore its result on processes with
1999 // color zero.
2000 RCP<const Comm<int>> newComm = comm_->split(color, 0);
2001 if (color == 0) {
2002 newComm = null;
2003 }
2004
2005 // Create the Map to return.
2006 if (newComm.is_null()) {
2007 return null; // my process does not participate in the new Map
2008 } else {
2009 RCP<Map> map = rcp(new Map());
2010
2011 map->comm_ = newComm;
2012 map->indexBase_ = indexBase_;
2013 map->numGlobalElements_ = numGlobalElements_;
2014 map->numLocalElements_ = numLocalElements_;
2015 map->minMyGID_ = minMyGID_;
2016 map->maxMyGID_ = maxMyGID_;
2017 map->minAllGID_ = minAllGID_;
2018 map->maxAllGID_ = maxAllGID_;
2019 map->firstContiguousGID_ = firstContiguousGID_;
2020 map->lastContiguousGID_ = lastContiguousGID_;
2021
2022 // Uniformity and contiguity have not changed. The directory
2023 // has changed, but we've taken care of that above.
2024 map->uniform_ = uniform_;
2025 map->contiguous_ = contiguous_;
2026
2027 // If the original Map was NOT distributed, then the new Map
2028 // cannot be distributed.
2029 //
2030 // If the number of processes in the new communicator is 1, then
2031 // the new Map is not distributed.
2032 //
2033 // Otherwise, we have to check the new Map using an all-reduce
2034 // (over the new communicator). For example, the original Map
2035 // may have had some processes with zero elements, and all other
2036 // processes with the same number of elements as in the whole
2037 // Map. That Map is technically distributed, because of the
2038 // processes with zero elements. Removing those processes would
2039 // make the new Map locally replicated.
2040 if (!distributed_ || newComm->getSize() == 1) {
2041 map->distributed_ = false;
2042 } else {
2043 const int iOwnAllGids = (numLocalElements_ == numGlobalElements_) ? 1 : 0;
2044 int allProcsOwnAllGids = 0;
2046 map->distributed_ = (allProcsOwnAllGids == 1) ? false : true;
2047 }
2048
2049 map->lgMap_ = lgMap_;
2050 map->lgMapHost_ = lgMapHost_;
2051 map->glMap_ = glMap_;
2052 map->glMapHost_ = glMapHost_;
2053
2054 // Map's default constructor creates an uninitialized Directory.
2055 // The Directory will be initialized on demand in
2056 // getRemoteIndexList().
2057 //
2058 // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the
2059 // directory more efficiently than just recreating it. If
2060 // directory recreation proves a bottleneck, we can always
2061 // revisit this. On the other hand, Directory creation is only
2062 // collective over the new, presumably much smaller
2063 // communicator, so it may not be worth the effort to optimize.
2064
2065 return map;
2066 }
2067}
2068
2069template <class LocalOrdinal, class GlobalOrdinal, class Node>
2072 directory_.is_null(), std::logic_error,
2073 "Tpetra::Map::setupDirectory: "
2074 "The Directory is null. "
2075 "Please report this bug to the Tpetra developers.");
2076
2077 // Only create the Directory if it hasn't been created yet.
2078 // This is a collective operation.
2079 if (!directory_->initialized()) {
2080 directory_->initialize(*this);
2081 }
2082}
2083
2084template <class LocalOrdinal, class GlobalOrdinal, class Node>
2087 getRemoteIndexList(const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
2088 const Teuchos::ArrayView<int>& PIDs,
2089 const Teuchos::ArrayView<LocalOrdinal>& LIDs) const {
2091 using std::endl;
2092 using Tpetra::Details::OrdinalTraits;
2093 using size_type = Teuchos::ArrayView<int>::size_type;
2094
2095 const bool verbose = Details::Behavior::verbose("Map");
2097 std::unique_ptr<std::string> prefix;
2098 if (verbose) {
2099 prefix = Details::createPrefix(comm_.getRawPtr(),
2100 "Map", "getRemoteIndexList(GIDs,PIDs,LIDs)");
2101 std::ostringstream os;
2102 os << *prefix << "Start: ";
2103 verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2104 os << endl;
2105 std::cerr << os.str();
2106 }
2107
2108 // Empty Maps (i.e., containing no indices on any processes in the
2109 // Map's communicator) are perfectly valid. In that case, if the
2110 // input GID list is nonempty, we fill the output arrays with
2111 // invalid values, and return IDNotPresent to notify the caller.
2112 // It's perfectly valid to give getRemoteIndexList GIDs that the
2113 // Map doesn't own. SubmapImport test 2 needs this functionality.
2114 if (getGlobalNumElements() == 0) {
2115 if (GIDs.size() == 0) {
2116 if (verbose) {
2117 std::ostringstream os;
2118 os << *prefix << "Done; both Map & input are empty" << endl;
2119 std::cerr << os.str();
2120 }
2121 return AllIDsPresent; // trivially
2122 } else {
2123 if (verbose) {
2124 std::ostringstream os;
2125 os << *prefix << "Done: Map is empty on all processes, "
2126 "so all output PIDs & LIDs are invalid (-1)."
2127 << endl;
2128 std::cerr << os.str();
2129 }
2130 for (size_type k = 0; k < PIDs.size(); ++k) {
2132 }
2133 for (size_type k = 0; k < LIDs.size(); ++k) {
2135 }
2136 return IDNotPresent;
2137 }
2138 }
2139
2140 // getRemoteIndexList must be called collectively, and Directory
2141 // initialization is collective too, so it's OK to initialize the
2142 // Directory on demand.
2143
2144 if (verbose) {
2145 std::ostringstream os;
2146 os << *prefix << "Call setupDirectory" << endl;
2147 std::cerr << os.str();
2148 }
2149 setupDirectory();
2150 if (verbose) {
2151 std::ostringstream os;
2152 os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2153 std::cerr << os.str();
2154 }
2156 directory_->getDirectoryEntries(*this, GIDs, PIDs, LIDs);
2157 if (verbose) {
2158 std::ostringstream os;
2159 os << *prefix << "Done; getDirectoryEntries returned "
2160 << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2161 << "; ";
2162 verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2163 os << ", ";
2164 verbosePrintArray(os, LIDs, "LIDs", maxNumToPrint);
2165 os << endl;
2166 std::cerr << os.str();
2167 }
2168 return retVal;
2169}
2170
2171template <class LocalOrdinal, class GlobalOrdinal, class Node>
2174 getRemoteIndexList(const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
2175 const Teuchos::ArrayView<int>& PIDs) const {
2177 using std::endl;
2178
2179 const bool verbose = Details::Behavior::verbose("Map");
2181 std::unique_ptr<std::string> prefix;
2182 if (verbose) {
2183 prefix = Details::createPrefix(comm_.getRawPtr(),
2184 "Map", "getRemoteIndexList(GIDs,PIDs)");
2185 std::ostringstream os;
2186 os << *prefix << "Start: ";
2187 verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2188 os << endl;
2189 std::cerr << os.str();
2190 }
2191
2192 if (getGlobalNumElements() == 0) {
2193 if (GIDs.size() == 0) {
2194 if (verbose) {
2195 std::ostringstream os;
2196 os << *prefix << "Done; both Map & input are empty" << endl;
2197 std::cerr << os.str();
2198 }
2199 return AllIDsPresent; // trivially
2200 } else {
2201 if (verbose) {
2202 std::ostringstream os;
2203 os << *prefix << "Done: Map is empty on all processes, "
2204 "so all output PIDs are invalid (-1)."
2205 << endl;
2206 std::cerr << os.str();
2207 }
2208 for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size(); ++k) {
2209 PIDs[k] = Tpetra::Details::OrdinalTraits<int>::invalid();
2210 }
2211 return IDNotPresent;
2212 }
2213 }
2214
2215 // getRemoteIndexList must be called collectively, and Directory
2216 // initialization is collective too, so it's OK to initialize the
2217 // Directory on demand.
2218
2219 if (verbose) {
2220 std::ostringstream os;
2221 os << *prefix << "Call setupDirectory" << endl;
2222 std::cerr << os.str();
2223 }
2224 setupDirectory();
2225 if (verbose) {
2226 std::ostringstream os;
2227 os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2228 std::cerr << os.str();
2229 }
2231 directory_->getDirectoryEntries(*this, GIDs, PIDs);
2232 if (verbose) {
2233 std::ostringstream os;
2234 os << *prefix << "Done; getDirectoryEntries returned "
2235 << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2236 << "; ";
2237 verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2238 os << endl;
2239 std::cerr << os.str();
2240 }
2241 return retVal;
2242}
2243
2244template <class LocalOrdinal, class GlobalOrdinal, class Node>
2246 using exec_space = typename Node::device_type::execution_space;
2247 if (lgMap_.extent(0) != lgMapHost_.extent(0)) {
2248 Tpetra::Details::ProfilingRegion pr("Map::lazyPushToHost() - pushing data");
2249 // NOTE: We check lgMap_ and not glMap_, since the latter can
2250 // be somewhat error prone for contiguous maps
2251
2252 // create_mirror_view preserves const-ness. create_mirror does not
2253 auto lgMap_host = Kokkos::create_mirror(Kokkos::HostSpace(), lgMap_);
2254
2255 // Since this was computed on the default stream, we can copy on the stream and then fence
2256 // the stream
2257 Kokkos::deep_copy(exec_space(), lgMap_host, lgMap_);
2258 exec_space().fence();
2259 lgMapHost_ = lgMap_host;
2260
2261 // Make host version - when memory spaces match these just do trivial assignment
2262 glMapHost_ = global_to_local_table_host_type(glMap_);
2263 }
2264}
2265
2266template <class LocalOrdinal, class GlobalOrdinal, class Node>
2267Teuchos::RCP<const Teuchos::Comm<int>>
2269 return comm_;
2270}
2271
2272template <class LocalOrdinal, class GlobalOrdinal, class Node>
2274 checkIsDist() const {
2275 using std::endl;
2276 using Teuchos::as;
2277 using Teuchos::outArg;
2278 using Teuchos::REDUCE_MIN;
2279 using Teuchos::reduceAll;
2280
2281 const bool verbose = Details::Behavior::verbose("Map");
2282 std::unique_ptr<std::string> prefix;
2283 if (verbose) {
2285 comm_.getRawPtr(), "Map", "checkIsDist");
2286 std::ostringstream os;
2287 os << *prefix << "Start" << endl;
2288 std::cerr << os.str();
2289 }
2290
2291 bool global = false;
2292 if (comm_->getSize() > 1) {
2293 // The communicator has more than one process, but that doesn't
2294 // necessarily mean the Map is distributed.
2295 int localRep = 0;
2296 if (numGlobalElements_ == as<global_size_t>(numLocalElements_)) {
2297 // The number of local elements on this process equals the
2298 // number of global elements.
2299 //
2300 // NOTE (mfh 22 Nov 2011) Does this still work if there were
2301 // duplicates in the global ID list on input (the third Map
2302 // constructor), so that the number of local elements (which
2303 // are not duplicated) on this process could be less than the
2304 // number of global elements, even if this process owns all
2305 // the elements?
2306 localRep = 1;
2307 }
2308 int allLocalRep;
2309 reduceAll<int, int>(*comm_, REDUCE_MIN, localRep, outArg(allLocalRep));
2310 if (allLocalRep != 1) {
2311 // At least one process does not own all the elements.
2312 // This makes the Map a distributed Map.
2313 global = true;
2314 }
2315 }
2316 // If the communicator has only one process, then the Map is not
2317 // distributed.
2318
2319 if (verbose) {
2320 std::ostringstream os;
2321 os << *prefix << "Done; global=" << (global ? "true" : "false")
2322 << endl;
2323 std::cerr << os.str();
2324 }
2325 return global;
2326}
2327
2328} // namespace Tpetra
2329
2330template <class LocalOrdinal, class GlobalOrdinal>
2331Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal>>
2332Tpetra::createLocalMap(const size_t numElements,
2333 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2334 typedef LocalOrdinal LO;
2335 typedef GlobalOrdinal GO;
2336 using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2337 return createLocalMapWithNode<LO, GO, NT>(numElements, comm);
2338}
2339
2340template <class LocalOrdinal, class GlobalOrdinal>
2341Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal>>
2342Tpetra::createUniformContigMap(const global_size_t numElements,
2343 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2344 typedef LocalOrdinal LO;
2345 typedef GlobalOrdinal GO;
2346 using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2347 return createUniformContigMapWithNode<LO, GO, NT>(numElements, comm);
2348}
2349
2350template <class LocalOrdinal, class GlobalOrdinal, class Node>
2351Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2352Tpetra::createUniformContigMapWithNode(const global_size_t numElements,
2353 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2354 using Teuchos::rcp;
2356 const GlobalOrdinal indexBase = static_cast<GlobalOrdinal>(0);
2357
2358 return rcp(new map_type(numElements, indexBase, comm, GloballyDistributed));
2359}
2360
2361template <class LocalOrdinal, class GlobalOrdinal, class Node>
2362Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2363Tpetra::createLocalMapWithNode(const size_t numElements,
2364 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2365 using Teuchos::rcp;
2368 const GlobalOrdinal indexBase = 0;
2369 const global_size_t globalNumElts = static_cast<global_size_t>(numElements);
2370
2371 return rcp(new map_type(globalNumElts, indexBase, comm, LocallyReplicated));
2372}
2373
2374template <class LocalOrdinal, class GlobalOrdinal, class Node>
2375Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2377 const size_t localNumElements,
2378 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2379 using Teuchos::rcp;
2381 const GlobalOrdinal indexBase = 0;
2382
2383 return rcp(new map_type(numElements, localNumElements, indexBase, comm));
2384}
2385
2386template <class LocalOrdinal, class GlobalOrdinal>
2387Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal>>
2389 const size_t localNumElements,
2390 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2391 typedef LocalOrdinal LO;
2392 typedef GlobalOrdinal GO;
2393 using NT = typename Tpetra::Map<LO, GO>::node_type;
2394
2395 return Tpetra::createContigMapWithNode<LO, GO, NT>(numElements, localNumElements, comm);
2396}
2397
2398template <class LocalOrdinal, class GlobalOrdinal>
2399Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal>>
2400Tpetra::createNonContigMap(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2401 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2402 typedef LocalOrdinal LO;
2403 typedef GlobalOrdinal GO;
2404 using NT = typename Tpetra::Map<LO, GO>::node_type;
2405
2406 return Tpetra::createNonContigMapWithNode<LO, GO, NT>(elementList, comm);
2407}
2408
2409template <class LocalOrdinal, class GlobalOrdinal, class Node>
2410Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2411Tpetra::createNonContigMapWithNode(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2412 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2413 using Teuchos::rcp;
2415 using GST = Tpetra::global_size_t;
2416 const GST INV = Tpetra::Details::OrdinalTraits<GST>::invalid();
2417 // FIXME (mfh 22 Jul 2016) This is what I found here, but maybe this
2418 // shouldn't be zero, given that the index base is supposed to equal
2419 // the globally min global index?
2420 const GlobalOrdinal indexBase = 0;
2421
2422 return rcp(new map_type(INV, elementList, indexBase, comm));
2423}
2424
2425template <class LO, class GO, class NT>
2426Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>
2427Tpetra::createOneToOne(const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& M) {
2428 using Details::verbosePrintArray;
2429 using std::cerr;
2430 using std::endl;
2431 using Teuchos::Array;
2432 using Teuchos::ArrayView;
2433 using Teuchos::as;
2434 using Teuchos::rcp;
2435 using map_type = Tpetra::Map<LO, GO, NT>;
2436 using GST = global_size_t;
2437
2438 const bool verbose = Details::Behavior::verbose("Map");
2439 std::unique_ptr<std::string> prefix;
2440 if (verbose) {
2441 auto comm = M.is_null() ? Teuchos::null : M->getComm();
2442 prefix = Details::createPrefix(
2443 comm.getRawPtr(), "createOneToOne(Map)");
2444 std::ostringstream os;
2445 os << *prefix << "Start" << endl;
2446 cerr << os.str();
2447 }
2448 const size_t maxNumToPrint = verbose ? Details::Behavior::verbosePrintCountThreshold() : size_t(0);
2449 const GST GINV = Tpetra::Details::OrdinalTraits<GST>::invalid();
2450 const int myRank = M->getComm()->getRank();
2451
2452 // Bypasses for special cases where either M is known to be
2453 // one-to-one, or the one-to-one version of M is easy to compute.
2454 // This is why we take M as an RCP, not as a const reference -- so
2455 // that we can return M itself if it is 1-to-1.
2456 if (!M->isDistributed()) {
2457 // For a locally replicated Map, we assume that users want to push
2458 // all the GIDs to Process 0.
2459
2460 // mfh 05 Nov 2013: getGlobalNumElements() does indeed return what
2461 // you think it should return, in this special case of a locally
2462 // replicated contiguous Map.
2463 const GST numGlobalEntries = M->getGlobalNumElements();
2464 if (M->isContiguous()) {
2465 const size_t numLocalEntries =
2466 (myRank == 0) ? as<size_t>(numGlobalEntries) : size_t(0);
2467 if (verbose) {
2468 std::ostringstream os;
2469 os << *prefix << "Input is locally replicated & contiguous; "
2470 "numLocalEntries="
2471 << numLocalEntries << endl;
2472 cerr << os.str();
2473 }
2474 auto retMap =
2475 rcp(new map_type(numGlobalEntries, numLocalEntries,
2476 M->getIndexBase(), M->getComm()));
2477 if (verbose) {
2478 std::ostringstream os;
2479 os << *prefix << "Done" << endl;
2480 cerr << os.str();
2481 }
2482 return retMap;
2483 } else {
2484 if (verbose) {
2485 std::ostringstream os;
2486 os << *prefix << "Input is locally replicated & noncontiguous"
2487 << endl;
2488 cerr << os.str();
2489 }
2490 ArrayView<const GO> myGids =
2491 (myRank == 0) ? M->getLocalElementList() : Teuchos::null;
2492 auto retMap =
2493 rcp(new map_type(GINV, myGids(), M->getIndexBase(),
2494 M->getComm()));
2495 if (verbose) {
2496 std::ostringstream os;
2497 os << *prefix << "Done" << endl;
2498 cerr << os.str();
2499 }
2500 return retMap;
2501 }
2502 } else if (M->isContiguous()) {
2503 if (verbose) {
2504 std::ostringstream os;
2505 os << *prefix << "Input is distributed & contiguous" << endl;
2506 cerr << os.str();
2507 }
2508 // Contiguous, distributed Maps are one-to-one by construction.
2509 // (Locally replicated Maps can be contiguous.)
2510 return M;
2511 } else {
2512 if (verbose) {
2513 std::ostringstream os;
2514 os << *prefix << "Input is distributed & noncontiguous" << endl;
2515 cerr << os.str();
2516 }
2518 const size_t numMyElems = M->getLocalNumElements();
2519 ArrayView<const GO> myElems = M->getLocalElementList();
2520 Array<int> owner_procs_vec(numMyElems);
2521
2522 if (verbose) {
2523 std::ostringstream os;
2524 os << *prefix << "Call Directory::getDirectoryEntries: ";
2525 verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2526 os << endl;
2527 cerr << os.str();
2528 }
2529 directory.getDirectoryEntries(*M, myElems, owner_procs_vec());
2530 if (verbose) {
2531 std::ostringstream os;
2532 os << *prefix << "getDirectoryEntries result: ";
2533 verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2534 os << endl;
2535 cerr << os.str();
2536 }
2537
2538 Array<GO> myOwned_vec(numMyElems);
2539 size_t numMyOwnedElems = 0;
2540 for (size_t i = 0; i < numMyElems; ++i) {
2541 const GO GID = myElems[i];
2542 const int owner = owner_procs_vec[i];
2543
2544 if (myRank == owner) {
2545 myOwned_vec[numMyOwnedElems++] = GID;
2546 }
2547 }
2548 myOwned_vec.resize(numMyOwnedElems);
2549
2550 if (verbose) {
2551 std::ostringstream os;
2552 os << *prefix << "Create Map: ";
2553 verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2554 os << endl;
2555 cerr << os.str();
2556 }
2557 auto retMap = rcp(new map_type(GINV, myOwned_vec(),
2558 M->getIndexBase(), M->getComm()));
2559 if (verbose) {
2560 std::ostringstream os;
2561 os << *prefix << "Done" << endl;
2562 cerr << os.str();
2563 }
2564 return retMap;
2565 }
2566}
2567
2568template <class LocalOrdinal, class GlobalOrdinal, class Node>
2569Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2572 using Details::Behavior;
2573 using Details::verbosePrintArray;
2574 using std::cerr;
2575 using std::endl;
2576 using Teuchos::Array;
2577 using Teuchos::ArrayView;
2578 using Teuchos::RCP;
2579 using Teuchos::rcp;
2580 using Teuchos::toString;
2581 using LO = LocalOrdinal;
2582 using GO = GlobalOrdinal;
2583 using map_type = Tpetra::Map<LO, GO, Node>;
2584
2585 const bool verbose = Behavior::verbose("Map");
2586 std::unique_ptr<std::string> prefix;
2587 if (verbose) {
2588 auto comm = M.is_null() ? Teuchos::null : M->getComm();
2589 prefix = Details::createPrefix(
2590 comm.getRawPtr(), "createOneToOne(Map,TieBreak)");
2591 std::ostringstream os;
2592 os << *prefix << "Start" << endl;
2593 cerr << os.str();
2594 }
2595 const size_t maxNumToPrint = verbose ? Behavior::verbosePrintCountThreshold() : size_t(0);
2596
2597 // FIXME (mfh 20 Feb 2013) We should have a bypass for contiguous
2598 // Maps (which are 1-to-1 by construction).
2599
2601 if (verbose) {
2602 std::ostringstream os;
2603 os << *prefix << "Initialize Directory" << endl;
2604 cerr << os.str();
2605 }
2606 directory.initialize(*M, tie_break);
2607 if (verbose) {
2608 std::ostringstream os;
2609 os << *prefix << "Done initializing Directory" << endl;
2610 cerr << os.str();
2611 }
2612 size_t numMyElems = M->getLocalNumElements();
2613 ArrayView<const GO> myElems = M->getLocalElementList();
2614 Array<int> owner_procs_vec(numMyElems);
2615 if (verbose) {
2616 std::ostringstream os;
2617 os << *prefix << "Call Directory::getDirectoryEntries: ";
2618 verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2619 os << endl;
2620 cerr << os.str();
2621 }
2622 directory.getDirectoryEntries(*M, myElems, owner_procs_vec());
2623 if (verbose) {
2624 std::ostringstream os;
2625 os << *prefix << "getDirectoryEntries result: ";
2626 verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2627 os << endl;
2628 cerr << os.str();
2629 }
2630
2631 const int myRank = M->getComm()->getRank();
2632 Array<GO> myOwned_vec(numMyElems);
2633 size_t numMyOwnedElems = 0;
2634 for (size_t i = 0; i < numMyElems; ++i) {
2635 const GO GID = myElems[i];
2636 const int owner = owner_procs_vec[i];
2637 if (myRank == owner) {
2638 myOwned_vec[numMyOwnedElems++] = GID;
2639 }
2640 }
2641 myOwned_vec.resize(numMyOwnedElems);
2642
2643 // FIXME (mfh 08 May 2014) The above Directory should be perfectly
2644 // valid for the new Map. Why can't we reuse it?
2645 const global_size_t GINV =
2646 Tpetra::Details::OrdinalTraits<global_size_t>::invalid();
2647 if (verbose) {
2648 std::ostringstream os;
2649 os << *prefix << "Create Map: ";
2650 verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2651 os << endl;
2652 cerr << os.str();
2653 }
2654 RCP<const map_type> retMap(new map_type(GINV, myOwned_vec(), M->getIndexBase(),
2655 M->getComm()));
2656 if (verbose) {
2657 std::ostringstream os;
2658 os << *prefix << "Done" << endl;
2659 cerr << os.str();
2660 }
2661 return retMap;
2662}
2663
2664//
2665// Explicit instantiation macro
2666//
2667// Must be expanded from within the Tpetra namespace!
2668//
2669
2671
2672#define TPETRA_MAP_INSTANT(LO, GO, NODE) \
2673 \
2674 template class Map<LO, GO, NODE>; \
2675 \
2676 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2677 createLocalMapWithNode<LO, GO, NODE>(const size_t numElements, \
2678 const Teuchos::RCP<const Teuchos::Comm<int>>& comm); \
2679 \
2680 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2681 createContigMapWithNode<LO, GO, NODE>(const global_size_t numElements, \
2682 const size_t localNumElements, \
2683 const Teuchos::RCP<const Teuchos::Comm<int>>& comm); \
2684 \
2685 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2686 createNonContigMapWithNode(const Teuchos::ArrayView<const GO>& elementList, \
2687 const Teuchos::RCP<const Teuchos::Comm<int>>& comm); \
2688 \
2689 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2690 createUniformContigMapWithNode<LO, GO, NODE>(const global_size_t numElements, \
2691 const Teuchos::RCP<const Teuchos::Comm<int>>& comm); \
2692 \
2693 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2694 createOneToOne(const Teuchos::RCP<const Map<LO, GO, NODE>>& M); \
2695 \
2696 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2697 createOneToOne(const Teuchos::RCP<const Map<LO, GO, NODE>>& M, \
2698 const Tpetra::Details::TieBreak<LO, GO>& tie_break);
2699
2701#define TPETRA_MAP_INSTANT_DEFAULTNODE(LO, GO) \
2702 template Teuchos::RCP<const Map<LO, GO>> \
2703 createLocalMap<LO, GO>(const size_t, const Teuchos::RCP<const Teuchos::Comm<int>>&); \
2704 \
2705 template Teuchos::RCP<const Map<LO, GO>> \
2706 createContigMap<LO, GO>(global_size_t, size_t, \
2707 const Teuchos::RCP<const Teuchos::Comm<int>>&); \
2708 \
2709 template Teuchos::RCP<const Map<LO, GO>> \
2710 createNonContigMap(const Teuchos::ArrayView<const GO>&, \
2711 const Teuchos::RCP<const Teuchos::Comm<int>>&); \
2712 \
2713 template Teuchos::RCP<const Map<LO, GO>> \
2714 createUniformContigMap<LO, GO>(const global_size_t, \
2715 const Teuchos::RCP<const Teuchos::Comm<int>>&);
2716
2717#endif // TPETRA_MAP_DEF_HPP
Functions for initializing and finalizing Tpetra.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration of Tpetra::Details::extractMpiCommFromTeuchos.
Declaration of a function that prints strings from each process.
Declaration of Tpetra::Details::initializeKokkos.
Declaration of Tpetra::Details::printOnce.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
Description of Tpetra's behavior.
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
"Local" part of Map suitable for Kokkos kernels.
Interface for breaking ties in ownership.
Implement mapping from global ID to process ID and local ID.
A parallel distribution of indices over processes.
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
bool isOneToOne() const
Whether the Map is one to one.
std::string description() const
Implementation of Teuchos::Describable.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
Node node_type
Legacy typedef that will go away at some point.
Map()
Default constructor (that does nothing).
GlobalOrdinal global_ordinal_type
The type of global indices.
LookupStatus getRemoteIndexList(const Teuchos::ArrayView< const global_ordinal_type > &GIDList, const Teuchos::ArrayView< int > &nodeIDList, const Teuchos::ArrayView< local_ordinal_type > &LIDList) const
Return the process ranks and corresponding local indices for the given global indices.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
bool isUniform() const
Whether the range of global indices is uniform.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
typename device_type::execution_space execution_space
The Kokkos execution space.
LO local_ordinal_type
The type of local indices.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > removeEmptyProcesses() const
Advanced methods.
void lazyPushToHost() const
Push the device data to host, if needed.
bool isCompatible(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is compatible with this Map.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
bool isLocallyFitted(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is locally fitted to this Map.
virtual ~Map()
Destructor (virtual for memory safety of derived classes).
global_indices_array_device_type getMyGlobalIndicesDevice() const
Return a view of the global indices owned by this process on the Map's device.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > replaceCommWithSubset(const Teuchos::RCP< const Teuchos::Comm< int > > &newComm) const
Replace this Map's communicator with a subset communicator.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
bool isSameAs(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is identical to this Map.
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
global_indices_array_type getMyGlobalIndices() const
Return a view of the global indices owned by this process.
typename Node::device_type device_type
This class' Kokkos::Device specialization.
Implementation details of Tpetra.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void printOnce(std::ostream &out, const std::string &s, const Teuchos::Comm< int > *comm)
Print on one process of the given communicator, or at least try to do so (if MPI is not initialized).
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
bool congruent(const Teuchos::Comm< int > &comm1, const Teuchos::Comm< int > &comm2)
Whether the two communicators are congruent.
void initializeKokkos()
Initialize Kokkos, using command-line arguments (if any) given to Teuchos::GlobalMPISession.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createLocalMap(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with the default Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createUniformContigMap(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with the default Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createNonContigMap(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a non-contiguous Map using the default Kokkos::Device type.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createUniformContigMapWithNode(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with a user-specified Kokkos Node.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createContigMap(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map using the defaul...
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createLocalMapWithNode(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with a specified Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createNonContigMapWithNode(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a noncontiguous Map with a user-specified, possibly nondefault Kokkos Node ...
LocalGlobal
Enum for local versus global allocation of Map entries.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a (potentially) nonuniformly distributed, contiguous Map for a user-specifi...