Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Map_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
14
15#ifndef TPETRA_MAP_DEF_HPP
16#define TPETRA_MAP_DEF_HPP
17
18#include <memory>
19#include <sstream>
20#include <stdexcept>
21#include <typeinfo>
22
23#include "Teuchos_as.hpp"
24#include "Teuchos_TypeNameTraits.hpp"
25#include "Teuchos_CommHelpers.hpp"
26
27#include "Tpetra_Directory.hpp" // must include for implicit instantiation to work
29#include "Tpetra_Details_FixedHashTable.hpp"
32#include "Tpetra_Core.hpp"
33#include "Tpetra_Util.hpp"
34#include "Tpetra_Details_mpiIsInitialized.hpp"
35#include "Tpetra_Details_extractMpiCommFromTeuchos.hpp" // teuchosCommIsAnMpiComm
38
39namespace { // (anonymous)
40
41void checkMapInputArray(const char ctorName[],
42 const void* indexList,
43 const size_t indexListSize,
44 const Teuchos::Comm<int>* const comm) {
46
47 const bool debug = Behavior::debug("Map");
48 if (debug) {
49 using std::endl;
50 using Teuchos::outArg;
51 using Teuchos::REDUCE_MIN;
52 using Teuchos::reduceAll;
53
54 const int myRank = comm == nullptr ? 0 : comm->getRank();
55 const bool verbose = Behavior::verbose("Map");
56 std::ostringstream lclErrStrm;
57 int lclSuccess = 1;
58
59 if (indexListSize != 0 && indexList == nullptr) {
60 lclSuccess = 0;
61 if (verbose) {
62 lclErrStrm << "Proc " << myRank << ": indexList is null, "
63 "but indexListSize="
64 << indexListSize << " != 0." << endl;
65 }
66 }
67 int gblSuccess = 0; // output argument
68 reduceAll(*comm, REDUCE_MIN, lclSuccess, outArg(gblSuccess));
69 if (gblSuccess != 1) {
70 std::ostringstream gblErrStrm;
71 gblErrStrm << "Tpetra::Map constructor " << ctorName << " detected a problem with the input array "
72 "(raw array, Teuchos::ArrayView, or Kokkos::View) "
73 "of global indices."
74 << endl;
75 if (verbose) {
76 using ::Tpetra::Details::gathervPrint;
77 gathervPrint(gblErrStrm, lclErrStrm.str(), *comm);
78 }
79 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, gblErrStrm.str());
80 }
81 }
82}
83
84template <class LocalOrdinal, class GlobalOrdinal, class ViewType>
85void computeConstantsOnDevice(const ViewType& entryList, GlobalOrdinal& minMyGID, GlobalOrdinal& maxMyGID, GlobalOrdinal& firstContiguousGID, GlobalOrdinal& lastContiguousGID_val, LocalOrdinal& lastContiguousGID_loc) {
86 using LO = LocalOrdinal;
87 using GO = GlobalOrdinal;
88 using exec_space = typename ViewType::device_type::execution_space;
89 using range_policy = Kokkos::RangePolicy<exec_space, Kokkos::IndexType<LO>>;
90 const LO numLocalElements = entryList.extent(0);
91
92 // We're going to use the minloc backwards because we need to have it sort on the "location" and have the "value" along for the
93 // ride, rather than the other way around
94 typedef typename Kokkos::MinLoc<LO, GO>::value_type minloc_type;
95 minloc_type myMinLoc;
96
97 // Find the initial sequence of parallel gids
98 // To find the lastContiguousGID_, we find the first guy where entryList[i] - entryList[0] != i-0. That's the first non-contiguous guy.
99 // We want the one *before* that guy.
100 Kokkos::parallel_reduce(
101 range_policy(0, numLocalElements), KOKKOS_LAMBDA(const LO& i, GO& l_myMin, GO& l_myMax, GO& l_firstCont, minloc_type& l_lastCont) {
102 GO entry_0 = entryList[0];
103 GO entry_i = entryList[i];
104
105 // Easy stuff
106 l_myMin = (l_myMin < entry_i) ? l_myMin : entry_i;
107 l_myMax = (l_myMax > entry_i) ? l_myMax : entry_i;
108 l_firstCont = entry_0;
109
110 if (entry_i - entry_0 != i && l_lastCont.val >= i) {
111 // We're non-contiguous, so the guy before us could be the last contiguous guy
112 l_lastCont.val = i - 1;
113 l_lastCont.loc = entryList[i - 1];
114 } else if (i == numLocalElements - 1 && i < l_lastCont.val) {
115 // If we're last, we always think we're the last contiguous guy, unless someone non-contiguous is already here
116 l_lastCont.val = i;
117 l_lastCont.loc = entry_i;
118 }
119 },
120 Kokkos::Min<GO>(minMyGID), Kokkos::Max<GO>(maxMyGID), Kokkos::Min<GO>(firstContiguousGID), Kokkos::MinLoc<LO, GO>(myMinLoc));
121
122 // This switch is intentional, since we're using MinLoc backwards
123 lastContiguousGID_val = myMinLoc.loc;
124 lastContiguousGID_loc = myMinLoc.val;
125}
126
127} // namespace
128
129namespace Tpetra {
130
131template <class LocalOrdinal, class GlobalOrdinal, class Node>
133 Map()
134 : comm_(new Teuchos::SerialComm<int>())
135 , indexBase_(0)
136 , numGlobalElements_(0)
137 , numLocalElements_(0)
142 , firstContiguousGID_(Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid())
143 , lastContiguousGID_(Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid())
144 , uniform_(false)
145 , // trivially
146 contiguous_(false)
147 , distributed_(false)
148 , // no communicator yet
149 directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
152}
153
154template <class LocalOrdinal, class GlobalOrdinal, class Node>
158 const Teuchos::RCP<const Teuchos::Comm<int>>& comm,
159 const LocalGlobal lOrG)
160 : comm_(comm)
161 , uniform_(true)
162 , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
163 using std::endl;
164 using Teuchos::as;
165 using Teuchos::broadcast;
166 using Teuchos::outArg;
167 using Teuchos::REDUCE_MAX;
168 using Teuchos::REDUCE_MIN;
169 using Teuchos::reduceAll;
170 using Teuchos::typeName;
171 using GO = global_ordinal_type;
172 using GST = global_size_t;
173 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
174 const char funcName[] = "Map(gblNumInds,indexBase,comm,LG)";
175 const char exPfx[] =
176 "Tpetra::Map::Map(gblNumInds,indexBase,comm,LG): ";
177
178 const bool debug = Details::Behavior::debug("Map");
179 const bool verbose = Details::Behavior::verbose("Map");
180 std::unique_ptr<std::string> prefix;
181 if (verbose) {
183 comm_.getRawPtr(), "Map", funcName);
184 std::ostringstream os;
185 os << *prefix << "Start" << endl;
186 std::cerr << os.str();
187 }
190
191 // In debug mode only, check whether numGlobalElements and
192 // indexBase are the same over all processes in the communicator.
193 if (debug) {
204 std::invalid_argument, exPfx << "All processes must "
205 "provide the same number of global elements. Process 0 set "
206 "numGlobalElements="
207 << proc0NumGlobalElements << ". The "
208 "calling process "
209 << comm->getRank() << " set "
210 "numGlobalElements="
211 << numGlobalElements << ". The min "
212 "and max values over all processes are "
213 << minNumGlobalElements << " resp. " << maxNumGlobalElements << ".");
214
222 std::invalid_argument, exPfx << "All processes must "
223 "provide the same indexBase argument. Process 0 set "
224 "indexBase="
225 << proc0IndexBase << ". The calling process " << comm->getRank() << " set indexBase=" << indexBase << ". The min and max values over all processes are " << minIndexBase << " resp. " << maxIndexBase << ".");
226 }
227
228 // Distribute the elements across the processes in the given
229 // communicator so that global IDs (GIDs) are
230 //
231 // - Nonoverlapping (only one process owns each GID)
232 // - Contiguous (the sequence of GIDs is nondecreasing, and no two
233 // adjacent GIDs differ by more than one)
234 // - As evenly distributed as possible (the numbers of GIDs on two
235 // different processes do not differ by more than one)
236
237 // All processes have the same numGlobalElements, but we still
238 // need to check that it is valid. numGlobalElements must be
239 // positive and not the "invalid" value (GSTI).
240 //
241 // This comparison looks funny, but it avoids compiler warnings
242 // for comparing unsigned integers (numGlobalElements_in is a
243 // GST, which is unsigned) while still working if we
244 // later decide to make GST signed.
247 std::invalid_argument, exPfx << "numGlobalElements (= " << numGlobalElements << ") must be nonnegative.");
248
249 TEUCHOS_TEST_FOR_EXCEPTION(numGlobalElements == GSTI, std::invalid_argument, exPfx << "You provided numGlobalElements = Teuchos::OrdinalTraits<"
250 "Tpetra::global_size_t>::invalid(). This version of the "
251 "constructor requires a valid value of numGlobalElements. "
252 "You probably mistook this constructor for the \"contiguous "
253 "nonuniform\" constructor, which can compute the global "
254 "number of elements for you if you set numGlobalElements to "
255 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid().");
256
257 size_t numLocalElements = 0; // will set below
258 if (lOrG == GloballyDistributed) {
259 // Compute numLocalElements:
260 //
261 // If numGlobalElements == numProcs * B + remainder,
262 // then Proc r gets B+1 elements if r < remainder,
263 // and B elements if r >= remainder.
264 //
265 // This strategy is valid for any value of numGlobalElements and
266 // numProcs, including the following border cases:
267 // - numProcs == 1
268 // - numLocalElements < numProcs
269 //
270 // In the former case, remainder == 0 && numGlobalElements ==
271 // numLocalElements. In the latter case, remainder ==
272 // numGlobalElements && numLocalElements is either 0 or 1.
273 const GST numProcs = static_cast<GST>(comm_->getSize());
274 const GST myRank = static_cast<GST>(comm_->getRank());
277
278 GO startIndex;
279 if (myRank < remainder) {
280 numLocalElements = static_cast<size_t>(1) + static_cast<size_t>(quotient);
281 // myRank was originally an int, so it should never overflow
282 // reasonable GO types.
284 } else {
288 }
289
290 minMyGID_ = indexBase + startIndex;
291 maxMyGID_ = indexBase + startIndex + numLocalElements - 1;
292 minAllGID_ = indexBase;
293 maxAllGID_ = indexBase + numGlobalElements - 1;
294 distributed_ = (numProcs > 1);
295 } else { // lOrG == LocallyReplicated
297 minMyGID_ = indexBase;
298 maxMyGID_ = indexBase + numGlobalElements - 1;
299 distributed_ = false;
300 }
301
302 minAllGID_ = indexBase;
303 maxAllGID_ = indexBase + numGlobalElements - 1;
304 indexBase_ = indexBase;
305 numGlobalElements_ = numGlobalElements;
306 numLocalElements_ = numLocalElements;
307 firstContiguousGID_ = minMyGID_;
308 lastContiguousGID_ = maxMyGID_;
309 contiguous_ = true;
310
311 // Create the Directory on demand in getRemoteIndexList().
312 // setupDirectory ();
313
314 if (verbose) {
315 std::ostringstream os;
316 os << *prefix << "Done" << endl;
317 std::cerr << os.str();
318 }
319}
320
321template <class LocalOrdinal, class GlobalOrdinal, class Node>
324 const size_t numLocalElements,
326 const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
327 : comm_(comm)
328 , uniform_(false)
329 , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
330 using std::endl;
331 using Teuchos::as;
332 using Teuchos::broadcast;
333 using Teuchos::outArg;
334 using Teuchos::REDUCE_MAX;
335 using Teuchos::REDUCE_MIN;
336 using Teuchos::REDUCE_SUM;
337 using Teuchos::reduceAll;
338 using Teuchos::scan;
339 using GO = global_ordinal_type;
340 using GST = global_size_t;
341 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
342 const char funcName[] =
343 "Map(gblNumInds,lclNumInds,indexBase,comm)";
344 const char exPfx[] =
345 "Tpetra::Map::Map(gblNumInds,lclNumInds,indexBase,comm): ";
346 const char suffix[] =
347 ". Please report this bug to the Tpetra developers.";
348
349 const bool debug = Details::Behavior::debug("Map");
350 const bool verbose = Details::Behavior::verbose("Map");
351 std::unique_ptr<std::string> prefix;
352 if (verbose) {
354 comm_.getRawPtr(), "Map", funcName);
355 std::ostringstream os;
356 os << *prefix << "Start" << endl;
357 std::cerr << os.str();
358 }
361
362 // Global sum of numLocalElements over all processes.
363 // Keep this for later debug checks.
365 if (debug) {
366 debugGlobalSum = initialNonuniformDebugCheck(exPfx,
367 numGlobalElements, numLocalElements, indexBase, comm);
368 }
369
370 // Distribute the elements across the nodes so that they are
371 // - non-overlapping
372 // - contiguous
373
374 // This differs from the first Map constructor (that only takes a
375 // global number of elements) in that the user has specified the
376 // number of local elements, so that the elements are not
377 // (necessarily) evenly distributed over the processes.
378
379 // Compute my local offset. This is an inclusive scan, so to get
380 // the final offset, we subtract off the input.
381 GO scanResult = 0;
384
385 if (numGlobalElements != GSTI) {
386 numGlobalElements_ = numGlobalElements; // Use the user's value.
387 } else {
388 // Inclusive scan means that the last process has the final sum.
389 // Rather than doing a reduceAll to get the sum of
390 // numLocalElements, we can just have the last process broadcast
391 // its result. That saves us a round of log(numProcs) messages.
392 const int numProcs = comm->getSize();
394 if (numProcs > 1) {
395 broadcast(*comm, numProcs - 1, outArg(globalSum));
396 }
397 numGlobalElements_ = globalSum;
398
399 if (debug) {
400 // No need for an all-reduce here; both come from collectives.
401 TEUCHOS_TEST_FOR_EXCEPTION(globalSum != debugGlobalSum, std::logic_error, exPfx << "globalSum = " << globalSum << " != debugGlobalSum = " << debugGlobalSum << suffix);
402 }
403 }
404 numLocalElements_ = numLocalElements;
405 indexBase_ = indexBase;
406 minAllGID_ = (numGlobalElements_ == 0) ? std::numeric_limits<GO>::max() : indexBase;
407 maxAllGID_ = (numGlobalElements_ == 0) ? std::numeric_limits<GO>::lowest() : indexBase + GO(numGlobalElements_) - GO(1);
408 minMyGID_ = (numLocalElements_ == 0) ? std::numeric_limits<GO>::max() : indexBase + GO(myOffset);
409 maxMyGID_ = (numLocalElements_ == 0) ? std::numeric_limits<GO>::lowest() : indexBase + myOffset + GO(numLocalElements) - GO(1);
410 firstContiguousGID_ = minMyGID_;
411 lastContiguousGID_ = maxMyGID_;
412 contiguous_ = true;
413 distributed_ = checkIsDist();
414
415 // Create the Directory on demand in getRemoteIndexList().
416 // setupDirectory ();
417
418 if (verbose) {
419 std::ostringstream os;
420 os << *prefix << "Done" << endl;
421 std::cerr << os.str();
422 }
423}
424
425template <class LocalOrdinal, class GlobalOrdinal, class Node>
429 const char errorMessagePrefix[],
431 const size_t numLocalElements,
433 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) const {
434 const bool debug = Details::Behavior::debug("Map");
435 if (!debug) {
436 return global_size_t(0);
437 }
438
439 using Teuchos::broadcast;
440 using Teuchos::outArg;
441 using Teuchos::ptr;
442 using Teuchos::REDUCE_MAX;
443 using Teuchos::REDUCE_MIN;
444 using Teuchos::REDUCE_SUM;
445 using Teuchos::reduceAll;
446 using GO = global_ordinal_type;
447 using GST = global_size_t;
448 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
449
450 // The user has specified the distribution of indices over the
451 // processes. The distribution is not necessarily contiguous or
452 // equally shared over the processes.
453 //
454 // We assume that the number of local elements can be stored in a
455 // size_t. The instance member numLocalElements_ is a size_t, so
456 // this variable and that should have the same type.
457
458 GST debugGlobalSum = 0; // Will be global sum of numLocalElements
461 // In debug mode only, check whether numGlobalElements and
462 // indexBase are the same over all processes in the communicator.
463 {
474 std::invalid_argument, errorMessagePrefix << "All processes "
475 "must provide the same number of global elements, even if "
476 "that argument is "
477 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
478 "(which signals that the Map should compute the global "
479 "number of elements). Process 0 set numGlobalElements"
480 "="
481 << proc0NumGlobalElements << ". The calling process " << comm->getRank() << " set numGlobalElements=" << numGlobalElements << ". The min and max values over all "
482 "processes are "
483 << minNumGlobalElements << " resp. " << maxNumGlobalElements << ".");
484
492 std::invalid_argument, errorMessagePrefix << "All processes must provide the same indexBase argument. "
493 "Process 0 set indexBase = "
494 << proc0IndexBase << ". The "
495 "calling process "
496 << comm->getRank() << " set indexBase=" << indexBase << ". The min and max values over all "
497 "processes are "
498 << minIndexBase << " resp. " << maxIndexBase << ".");
499
500 // Make sure that the sum of numLocalElements over all processes
501 // equals numGlobalElements.
504 std::invalid_argument,
505 errorMessagePrefix << "The sum of each process' number of "
506 "indices over all processes, "
507 << debugGlobalSum << ", != "
508 << "numGlobalElements=" << numGlobalElements << ". If you "
509 "would like this constructor to compute numGlobalElements "
510 "for you, you may set numGlobalElements="
511 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
512 "on input. Please note that this is NOT necessarily -1.");
513 }
514 return debugGlobalSum;
515}
517template <class LocalOrdinal, class GlobalOrdinal, class Node>
520 const char errorMessagePrefix[],
522 const Kokkos::View<const global_ordinal_type*,
523 Kokkos::LayoutLeft,
524 Kokkos::HostSpace,
525 Kokkos::MemoryUnmanaged>& entryList_host,
527 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
528 Tpetra::Details::ProfilingRegion pr("Map::initWithNonownedHostIndexList()");
529
530 using Kokkos::LayoutLeft;
531 using Kokkos::subview;
532 using Kokkos::View;
533 using Kokkos::view_alloc;
534 using Kokkos::WithoutInitializing;
535 using Teuchos::as;
536 using Teuchos::broadcast;
537 using Teuchos::outArg;
538 using Teuchos::ptr;
539 using Teuchos::REDUCE_MAX;
540 using Teuchos::REDUCE_MIN;
541 using Teuchos::REDUCE_SUM;
542 using Teuchos::reduceAll;
543 using LO = local_ordinal_type;
544 using GO = global_ordinal_type;
545 using GST = global_size_t;
546 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
547 // Make sure that Kokkos has been initialized (Github Issue #513).
548 TEUCHOS_TEST_FOR_EXCEPTION(!Kokkos::is_initialized(), std::runtime_error,
549 errorMessagePrefix << "The Kokkos execution space "
550 << Teuchos::TypeNameTraits<execution_space>::name()
551 << " has not been initialized. "
552 "Please initialize it before creating a Map.")
553
554 // The user has specified the distribution of indices over the
555 // processes, via the input array of global indices on each
556 // process. The distribution is not necessarily contiguous or
557 // equally shared over the processes.
558
559 // The length of the input array on this process is the number of
560 // local indices to associate with this process, even though the
561 // input array contains global indices. We assume that the number
562 // of local indices on a process can be stored in a size_t;
563 // numLocalElements_ is a size_t, so this variable and that should
564 // have the same type.
565 const size_t numLocalElements(entryList_host.size());
566
567 initialNonuniformDebugCheck(errorMessagePrefix, numGlobalElements,
569
570 // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
571 // reduction is redundant, since the directory Map will have to do
572 // the same thing. Thus, we could do the scan and broadcast for
573 // the directory Map here, and give the computed offsets to the
574 // directory Map's constructor. However, a reduction costs less
575 // than a scan and broadcast, so this still saves time if users of
576 // this Map don't ever need the Directory (i.e., if they never
577 // call getRemoteIndexList on this Map).
578 if (numGlobalElements != GSTI) {
579 numGlobalElements_ = numGlobalElements; // Use the user's value.
580 } else { // The user wants us to compute the sum.
581 reduceAll(*comm, REDUCE_SUM,
582 static_cast<GST>(numLocalElements),
583 outArg(numGlobalElements_));
584 }
585
586 // mfh 20 Feb 2013: We've never quite done the right thing for
587 // duplicate GIDs here. Duplicate GIDs have always been counted
588 // distinctly in numLocalElements_, and thus should get a
589 // different LID. However, we've always used std::map or a hash
590 // table for the GID -> LID lookup table, so distinct GIDs always
591 // map to the same LID. Furthermore, the order of the input GID
592 // list matters, so it's not desirable to sort for determining
593 // uniqueness.
594 //
595 // I've chosen for now to write this code as if the input GID list
596 // contains no duplicates. If this is not desired, we could use
597 // the lookup table itself to determine uniqueness: If we haven't
598 // seen the GID before, it gets a new LID and it's added to the
599 // LID -> GID and GID -> LID tables. If we have seen the GID
600 // before, it doesn't get added to either table. I would
601 // implement this, but it would cost more to do the double lookups
602 // in the table (one to check, and one to insert).
603 //
604 // More importantly, since we build the GID -> LID table in (a
605 // thread-) parallel (way), the order in which duplicate GIDs may
606 // get inserted is not defined. This would make the assignment of
607 // LID to GID nondeterministic.
608
609 numLocalElements_ = numLocalElements;
610 indexBase_ = indexBase;
611
612 minMyGID_ = indexBase_;
613 maxMyGID_ = indexBase_;
614
615 // NOTE (mfh 27 May 2015): While finding the initial contiguous
616 // GID range requires looking at all the GIDs in the range,
617 // dismissing an interval of GIDs only requires looking at the
618 // first and last GIDs. Thus, we could do binary search backwards
619 // from the end in order to catch the common case of a contiguous
620 // interval followed by noncontiguous entries. On the other hand,
621 // we could just expose this case explicitly as yet another Map
622 // constructor, and avoid the trouble of detecting it.
623 if (numLocalElements_ > 0) {
624 // Find contiguous GID range, with the restriction that the
625 // beginning of the range starts with the first entry. While
626 // doing so, fill in the LID -> GID table.
627 typename decltype(lgMap_)::non_const_type lgMap(view_alloc("lgMap", WithoutInitializing), numLocalElements_);
628 auto lgMap_host =
629 Kokkos::create_mirror_view(Kokkos::HostSpace(), lgMap);
631 // The input array entryList_host is already on host, so we
632 // don't need to take a host view of it.
633 // auto entryList_host =
634 // Kokkos::create_mirror_view (Kokkos::HostSpace (), entryList);
635 // Kokkos::deep_copy (entryList_host, entryList);
636
637 firstContiguousGID_ = entryList_host[0];
638 lastContiguousGID_ = firstContiguousGID_ + 1;
639
640 // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
641 // anyway, so we have to look at them all. The logical way to
642 // find the first noncontiguous entry would thus be to "reduce,"
643 // where the local reduction result is whether entryList[i] + 1
644 // == entryList[i+1].
645
646 lgMap_host[0] = firstContiguousGID_;
647 size_t i = 1;
648 for (; i < numLocalElements_; ++i) {
650 const LO curLid = as<LO>(i);
651
652 if (lastContiguousGID_ != curGid) break;
653
654 // Add the entry to the LID->GID table only after we know that
655 // the current GID is in the initial contiguous sequence, so
656 // that we don't repeat adding it in the first iteration of
657 // the loop below over the remaining noncontiguous GIDs.
659 ++lastContiguousGID_;
660 }
661 --lastContiguousGID_;
662 // NOTE: i is the first non-contiguous index.
663
664 // [firstContiguousGID_, lastContigousGID_] is the initial
665 // sequence of contiguous GIDs. We can start the min and max
666 // GID using this range.
667 minMyGID_ = firstContiguousGID_;
668 maxMyGID_ = lastContiguousGID_;
669
670 // Compute the GID -> LID lookup table, _not_ including the
671 // initial sequence of contiguous GIDs.
673 {
674 const std::pair<size_t, size_t> ncRange(i, entryList_host.extent(0));
676 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(nonContigGids_host.extent(0)) !=
677 static_cast<size_t>(entryList_host.extent(0) - i),
678 std::logic_error,
679 "Tpetra::Map noncontiguous constructor: "
680 "nonContigGids_host.extent(0) = "
682 << " != entryList_host.extent(0) - i = "
683 << (entryList_host.extent(0) - i) << " = "
684 << entryList_host.extent(0) << " - " << i
685 << ". Please report this bug to the Tpetra developers.");
686
687 // FixedHashTable's constructor expects an owned device View,
688 // so we must deep-copy the subview of the input indices.
691 nonContigGids_host.size());
692
693 // DEEP_COPY REVIEW - HOST-TO-DEVICE
694 Kokkos::deep_copy(execution_space(), nonContigGids, nonContigGids_host);
695 Kokkos::fence("Map::initWithNonownedHostIndexList"); // for UVM issues below - which will be refatored soon so FixedHashTable can build as pure CudaSpace - then I think remove this fence
696
699 // Make host version - when memory spaces match these just do trivial assignment
700 glMapHost_ = global_to_local_table_host_type(glMap_);
701 }
702
703 // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
704 // table above, we have to look at all the (noncontiguous) input
705 // indices anyway. Thus, why not have the constructor compute
706 // and return the min and max?
707
708 for (; i < numLocalElements_; ++i) {
710 const LO curLid = static_cast<LO>(i);
711 lgMap_host[curLid] = curGid; // LID -> GID table
712
713 // While iterating through entryList, we compute its
714 // (process-local) min and max elements.
715 if (curGid < minMyGID_) {
716 minMyGID_ = curGid;
717 }
718 if (curGid > maxMyGID_) {
719 maxMyGID_ = curGid;
720 }
721 }
722
723 // We filled lgMap on host above; now sync back to device.
724 // DEEP_COPY REVIEW - HOST-TO-DEVICE
725 Kokkos::deep_copy(execution_space(), lgMap, lgMap_host);
726
727 // "Commit" the local-to-global lookup table we filled in above.
728 lgMap_ = lgMap;
729 // We've already created this, so use it.
730 lgMapHost_ = lgMap_host;
731
732 } else {
733 minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
734 maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
735 // This insures tests for GIDs in the range
736 // [firstContiguousGID_, lastContiguousGID_] fail for processes
737 // with no local elements.
738 firstContiguousGID_ = indexBase_ + 1;
739 lastContiguousGID_ = indexBase_;
740 // glMap_ was default constructed, so it's already empty.
741 }
742
743 // Compute the min and max of all processes' GIDs. If
744 // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
745 // are both indexBase_. This is wrong, but fixing it would
746 // require either a fancy sparse all-reduce, or a custom reduction
747 // operator that ignores invalid values ("invalid" means
748 // Tpetra::Details::OrdinalTraits<GO>::invalid()).
749 //
750 // Also, while we're at it, use the same all-reduce to figure out
751 // if the Map is distributed. "Distributed" means that there is
752 // at least one process with a number of local elements less than
753 // the number of global elements.
754 //
755 // We're computing the min and max of all processes' GIDs using a
756 // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
757 // and y are signed). (This lets us combine the min and max into
758 // a single all-reduce.) If each process sets localDist=1 if its
759 // number of local elements is strictly less than the number of
760 // global elements, and localDist=0 otherwise, then a MAX
761 // all-reduce on localDist tells us if the Map is distributed (1
762 // if yes, 0 if no). Thus, we can append localDist onto the end
763 // of the data and get the global result from the all-reduce.
764 if (std::numeric_limits<GO>::is_signed) {
765 // Does my process NOT own all the elements?
766 const GO localDist =
767 (as<GST>(numLocalElements_) < numGlobalElements_) ? 1 : 0;
769 GO minMaxInput[3];
770 minMaxInput[0] = -minMyGID_;
771 minMaxInput[1] = maxMyGID_;
773
774 GO minMaxOutput[3];
775 minMaxOutput[0] = 0;
776 minMaxOutput[1] = 0;
777 minMaxOutput[2] = 0;
779 minAllGID_ = -minMaxOutput[0];
780 maxAllGID_ = minMaxOutput[1];
781 const GO globalDist = minMaxOutput[2];
782 distributed_ = (comm_->getSize() > 1 && globalDist == 1);
783 } else { // unsigned; use two reductions
784 // This is always correct, no matter the signedness of GO.
785 reduceAll<int, GO>(*comm_, REDUCE_MIN, minMyGID_, outArg(minAllGID_));
786 reduceAll<int, GO>(*comm_, REDUCE_MAX, maxMyGID_, outArg(maxAllGID_));
787 distributed_ = checkIsDist();
789
790 contiguous_ = false; // "Contiguous" is conservative.
791
793 minAllGID_ < indexBase_,
794 std::invalid_argument,
795 "Tpetra::Map constructor (noncontiguous): "
796 "Minimum global ID = "
797 << minAllGID_ << " over all process(es) is "
798 "less than the given indexBase = "
799 << indexBase_ << ".");
800
801 // Create the Directory on demand in getRemoteIndexList().
802 // setupDirectory ();
803}
804
805template <class LocalOrdinal, class GlobalOrdinal, class Node>
808 const GlobalOrdinal indexList[],
811 const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
812 : comm_(comm)
813 , uniform_(false)
814 , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
815 using std::endl;
816 const char funcName[] =
817 "Map(gblNumInds,indexList,indexListSize,indexBase,comm)";
818
819 const bool verbose = Details::Behavior::verbose("Map");
820 std::unique_ptr<std::string> prefix;
821 if (verbose) {
822 prefix = Details::createPrefix(
823 comm_.getRawPtr(), "Map", funcName);
824 std::ostringstream os;
825 os << *prefix << "Start" << endl;
826 std::cerr << os.str();
827 }
831 checkMapInputArray("(GST, const GO[], LO, GO, comm)",
832 indexList, static_cast<size_t>(indexListSize),
833 comm.getRawPtr());
834 // Not quite sure if I trust all code to behave correctly if the
835 // pointer is nonnull but the array length is nonzero, so I'll
836 // make sure the raw pointer is null if the length is zero.
837 const GlobalOrdinal* const indsRaw = indexListSize == 0 ? NULL : indexList;
838 Kokkos::View<const GlobalOrdinal*,
839 Kokkos::LayoutLeft,
840 Kokkos::HostSpace,
841 Kokkos::MemoryUnmanaged>
843 initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
844 indexBase, comm);
845 if (verbose) {
846 std::ostringstream os;
847 os << *prefix << "Done" << endl;
848 std::cerr << os.str();
849 }
850}
851
852template <class LocalOrdinal, class GlobalOrdinal, class Node>
855 const Teuchos::ArrayView<const GlobalOrdinal>& entryList,
857 const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
858 : comm_(comm)
859 , uniform_(false)
860 , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
861 using std::endl;
862 const char* funcName = "Map(gblNumInds,entryList(Teuchos::ArrayView),indexBase,comm)";
863
864 const bool verbose = Details::Behavior::verbose("Map");
865 std::unique_ptr<std::string> prefix;
866 if (verbose) {
867 prefix = Details::createPrefix(
868 comm_.getRawPtr(), "Map", funcName);
869 std::ostringstream os;
870 os << *prefix << "Start" << endl;
871 std::cerr << os.str();
872 }
876 const size_t numLclInds = static_cast<size_t>(entryList.size());
877 checkMapInputArray("(GST, ArrayView, GO, comm)",
878 entryList.getRawPtr(), numLclInds,
879 comm.getRawPtr());
880 // Not quite sure if I trust both ArrayView and View to behave
881 // correctly if the pointer is nonnull but the array length is
882 // nonzero, so I'll make sure it's null if the length is zero.
883 const GlobalOrdinal* const indsRaw =
884 numLclInds == 0 ? NULL : entryList.getRawPtr();
885 Kokkos::View<const GlobalOrdinal*,
886 Kokkos::LayoutLeft,
887 Kokkos::HostSpace,
888 Kokkos::MemoryUnmanaged>
890 initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
891 indexBase, comm);
892 if (verbose) {
893 std::ostringstream os;
894 os << *prefix << "Done" << endl;
895 std::cerr << os.str();
896 }
897}
898
899template <class LocalOrdinal, class GlobalOrdinal, class Node>
902 const Kokkos::View<const GlobalOrdinal*, device_type>& entryList,
904 const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
905 : comm_(comm)
906 , uniform_(false)
907 , directory_(new Directory<LocalOrdinal, GlobalOrdinal, Node>()) {
908 using Kokkos::LayoutLeft;
909 using Kokkos::subview;
910 using Kokkos::View;
911 using Kokkos::view_alloc;
912 using Kokkos::WithoutInitializing;
913 using std::endl;
914 using Teuchos::arcp;
915 using Teuchos::ArrayView;
916 using Teuchos::as;
917 using Teuchos::broadcast;
918 using Teuchos::outArg;
919 using Teuchos::ptr;
920 using Teuchos::REDUCE_MAX;
921 using Teuchos::REDUCE_MIN;
922 using Teuchos::REDUCE_SUM;
923 using Teuchos::reduceAll;
924 using Teuchos::typeName;
925 using LO = local_ordinal_type;
927 using GST = global_size_t;
928 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid();
929 const char funcName[] =
930 "Map(gblNumInds,entryList(Kokkos::View),indexBase,comm)";
931
932 const bool verbose = Details::Behavior::verbose("Map");
933 std::unique_ptr<std::string> prefix;
934 if (verbose) {
936 comm_.getRawPtr(), "Map", funcName);
937 std::ostringstream os;
938 os << *prefix << "Start" << endl;
939 std::cerr << os.str();
940 }
944 checkMapInputArray("(GST, Kokkos::View, GO, comm)",
945 entryList.data(),
946 static_cast<size_t>(entryList.extent(0)),
947 comm.getRawPtr());
948
949 // The user has specified the distribution of indices over the
950 // processes, via the input array of global indices on each
951 // process. The distribution is not necessarily contiguous or
952 // equally shared over the processes.
953
954 // The length of the input array on this process is the number of
955 // local indices to associate with this process, even though the
956 // input array contains global indices. We assume that the number
957 // of local indices on a process can be stored in a size_t;
958 // numLocalElements_ is a size_t, so this variable and that should
959 // have the same type.
960 const size_t numLocalElements(entryList.size());
961
962 initialNonuniformDebugCheck(funcName, numGlobalElements,
964
965 // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
966 // reduction is redundant, since the directory Map will have to do
967 // the same thing. Thus, we could do the scan and broadcast for
968 // the directory Map here, and give the computed offsets to the
969 // directory Map's constructor. However, a reduction costs less
970 // than a scan and broadcast, so this still saves time if users of
971 // this Map don't ever need the Directory (i.e., if they never
972 // call getRemoteIndexList on this Map).
973 if (numGlobalElements != GSTI) {
974 numGlobalElements_ = numGlobalElements; // Use the user's value.
975 } else { // The user wants us to compute the sum.
976 reduceAll(*comm, REDUCE_SUM,
977 static_cast<GST>(numLocalElements),
978 outArg(numGlobalElements_));
979 }
980
981 // mfh 20 Feb 2013: We've never quite done the right thing for
982 // duplicate GIDs here. Duplicate GIDs have always been counted
983 // distinctly in numLocalElements_, and thus should get a
984 // different LID. However, we've always used std::map or a hash
985 // table for the GID -> LID lookup table, so distinct GIDs always
986 // map to the same LID. Furthermore, the order of the input GID
987 // list matters, so it's not desirable to sort for determining
988 // uniqueness.
989 //
990 // I've chosen for now to write this code as if the input GID list
991 // contains no duplicates. If this is not desired, we could use
992 // the lookup table itself to determine uniqueness: If we haven't
993 // seen the GID before, it gets a new LID and it's added to the
994 // LID -> GID and GID -> LID tables. If we have seen the GID
995 // before, it doesn't get added to either table. I would
996 // implement this, but it would cost more to do the double lookups
997 // in the table (one to check, and one to insert).
998 //
999 // More importantly, since we build the GID -> LID table in (a
1000 // thread-) parallel (way), the order in which duplicate GIDs may
1001 // get inserted is not defined. This would make the assignment of
1002 // LID to GID nondeterministic.
1003
1004 numLocalElements_ = numLocalElements;
1005 indexBase_ = indexBase;
1007 minMyGID_ = indexBase_;
1008 maxMyGID_ = indexBase_;
1009
1010 // NOTE (mfh 27 May 2015): While finding the initial contiguous
1011 // GID range requires looking at all the GIDs in the range,
1012 // dismissing an interval of GIDs only requires looking at the
1013 // first and last GIDs. Thus, we could do binary search backwards
1014 // from the end in order to catch the common case of a contiguous
1015 // interval followed by noncontiguous entries. On the other hand,
1016 // we could just expose this case explicitly as yet another Map
1017 // constructor, and avoid the trouble of detecting it.
1018 if (numLocalElements_ > 0) {
1019 // Find contiguous GID range, with the restriction that the
1020 // beginning of the range starts with the first entry. While
1021 // doing so, fill in the LID -> GID table.
1022 typename decltype(lgMap_)::non_const_type lgMap(view_alloc("lgMap2", WithoutInitializing), numLocalElements_);
1023
1024 // Because you can't use lambdas in constructors on CUDA. Or using private/protected data.
1025 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
1026 Kokkos::deep_copy(typename device_type::execution_space(), lgMap, entryList);
1028 computeConstantsOnDevice(entryList, minMyGID_, maxMyGID_, firstContiguousGID_, lastContiguousGID_, lastContiguousGID_loc);
1030 auto nonContigGids = Kokkos::subview(entryList, std::pair<size_t, size_t>(firstNonContiguous_loc, entryList.extent(0)));
1031
1032 // NOTE: We do not fill the glMapHost_ and lgMapHost_ views here. They will be filled lazily later
1035
1036 // "Commit" the local-to-global lookup table we filled in above.
1037 lgMap_ = lgMap;
1038
1039 } else {
1040 minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
1041 maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
1042 // This insures tests for GIDs in the range
1043 // [firstContiguousGID_, lastContiguousGID_] fail for processes
1044 // with no local elements.
1045 firstContiguousGID_ = indexBase_ + 1;
1046 lastContiguousGID_ = indexBase_;
1047 // glMap_ was default constructed, so it's already empty.
1048 }
1049
1050 // Compute the min and max of all processes' GIDs. If
1051 // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
1052 // are both indexBase_. This is wrong, but fixing it would
1053 // require either a fancy sparse all-reduce, or a custom reduction
1054 // operator that ignores invalid values ("invalid" means
1055 // Tpetra::Details::OrdinalTraits<GO>::invalid()).
1056 //
1057 // Also, while we're at it, use the same all-reduce to figure out
1058 // if the Map is distributed. "Distributed" means that there is
1059 // at least one process with a number of local elements less than
1060 // the number of global elements.
1061 //
1062 // We're computing the min and max of all processes' GIDs using a
1063 // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
1064 // and y are signed). (This lets us combine the min and max into
1065 // a single all-reduce.) If each process sets localDist=1 if its
1066 // number of local elements is strictly less than the number of
1067 // global elements, and localDist=0 otherwise, then a MAX
1068 // all-reduce on localDist tells us if the Map is distributed (1
1069 // if yes, 0 if no). Thus, we can append localDist onto the end
1070 // of the data and get the global result from the all-reduce.
1071 if (std::numeric_limits<GO>::is_signed) {
1072 // Does my process NOT own all the elements?
1073 const GO localDist =
1074 (as<GST>(numLocalElements_) < numGlobalElements_) ? 1 : 0;
1075
1076 GO minMaxInput[3];
1077 minMaxInput[0] = -minMyGID_;
1078 minMaxInput[1] = maxMyGID_;
1079 minMaxInput[2] = localDist;
1080
1081 GO minMaxOutput[3];
1082 minMaxOutput[0] = 0;
1083 minMaxOutput[1] = 0;
1084 minMaxOutput[2] = 0;
1085 reduceAll<int, GO>(*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
1086 minAllGID_ = -minMaxOutput[0];
1087 maxAllGID_ = minMaxOutput[1];
1088 const GO globalDist = minMaxOutput[2];
1089 distributed_ = (comm_->getSize() > 1 && globalDist == 1);
1090 } else { // unsigned; use two reductions
1091 // This is always correct, no matter the signedness of GO.
1092 reduceAll<int, GO>(*comm_, REDUCE_MIN, minMyGID_, outArg(minAllGID_));
1093 reduceAll<int, GO>(*comm_, REDUCE_MAX, maxMyGID_, outArg(maxAllGID_));
1094 distributed_ = checkIsDist();
1095 }
1096
1097 contiguous_ = false; // "Contiguous" is conservative.
1098
1099 TEUCHOS_TEST_FOR_EXCEPTION(
1100 minAllGID_ < indexBase_,
1101 std::invalid_argument,
1102 "Tpetra::Map constructor (noncontiguous): "
1103 "Minimum global ID = "
1104 << minAllGID_ << " over all process(es) is "
1105 "less than the given indexBase = "
1106 << indexBase_ << ".");
1107
1108 // Create the Directory on demand in getRemoteIndexList().
1109 // setupDirectory ();
1110
1111 if (verbose) {
1112 std::ostringstream os;
1113 os << *prefix << "Done" << endl;
1114 std::cerr << os.str();
1115 }
1116}
1117
1118template <class LocalOrdinal, class GlobalOrdinal, class Node>
1120 if (!Kokkos::is_initialized()) {
1121 std::ostringstream os;
1122 os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1123 "Kokkos::finalize() has been called. This is user error! There are "
1124 "two likely causes: "
1125 << std::endl
1126 << " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1127 << std::endl
1128 << " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1129 "of a Tpetra::Map) at the same scope in main() as Kokkos::finalize() "
1130 "or Tpetra::finalize()."
1131 << std::endl
1132 << std::endl
1133 << "Don't do either of these! Please refer to GitHib Issue #2372."
1134 << std::endl;
1135 ::Tpetra::Details::printOnce(std::cerr, os.str(),
1136 this->getComm().getRawPtr());
1137 } else {
1138 using ::Tpetra::Details::mpiIsFinalized;
1139 using ::Tpetra::Details::mpiIsInitialized;
1140 using ::Tpetra::Details::teuchosCommIsAnMpiComm;
1141
1142 Teuchos::RCP<const Teuchos::Comm<int>> comm = this->getComm();
1143 if (!comm.is_null() && teuchosCommIsAnMpiComm(*comm) &&
1144 mpiIsInitialized() && mpiIsFinalized()) {
1145 // Tpetra itself does not require MPI, even if building with
1146 // MPI. It is legal to create Tpetra objects that do not use
1147 // MPI, even in an MPI program. However, calling Tpetra stuff
1148 // after MPI_Finalize() has been called is a bad idea, since
1149 // some Tpetra defaults may use MPI if available.
1150 std::ostringstream os;
1151 os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1152 "MPI_Finalize() has been called. This is user error! There are "
1153 "two likely causes: "
1154 << std::endl
1155 << " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1156 << std::endl
1157 << " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1158 "of a Tpetra::Map) at the same scope in main() as MPI_finalize() or "
1159 "Tpetra::finalize()."
1160 << std::endl
1161 << std::endl
1162 << "Don't do either of these! Please refer to GitHib Issue #2372."
1163 << std::endl;
1164 ::Tpetra::Details::printOnce(std::cerr, os.str(), comm.getRawPtr());
1165 }
1166 }
1167 // mfh 20 Mar 2018: We can't check Tpetra::isInitialized() yet,
1168 // because Tpetra does not yet require Tpetra::initialize /
1169 // Tpetra::finalize.
1170}
1171
1172template <class LocalOrdinal, class GlobalOrdinal, class Node>
1175 getComm().is_null(), std::logic_error,
1176 "Tpetra::Map::isOneToOne: "
1177 "getComm() returns null. Please report this bug to the Tpetra "
1178 "developers.");
1179
1180 // This is a collective operation, if it hasn't been called before.
1181 setupDirectory();
1182 return directory_->isOneToOne(*this);
1183}
1184
1185template <class LocalOrdinal, class GlobalOrdinal, class Node>
1189 if (isContiguous()) {
1190 if (globalIndex < getMinGlobalIndex() ||
1191 globalIndex > getMaxGlobalIndex()) {
1192 return Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid();
1193 }
1194 return static_cast<LocalOrdinal>(globalIndex - getMinGlobalIndex());
1195 } else if (globalIndex >= firstContiguousGID_ &&
1196 globalIndex <= lastContiguousGID_) {
1197 return static_cast<LocalOrdinal>(globalIndex - firstContiguousGID_);
1198 } else {
1199 // If the given global index is not in the table, this returns
1200 // the same value as OrdinalTraits<LocalOrdinal>::invalid().
1201 // glMapHost_ is Host and does not assume UVM
1202 lazyPushToHost();
1203 return glMapHost_.get(globalIndex);
1204 }
1205}
1206
1207template <class LocalOrdinal, class GlobalOrdinal, class Node>
1211 if (localIndex < getMinLocalIndex() || localIndex > getMaxLocalIndex()) {
1212 return Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid();
1213 }
1214 if (isContiguous()) {
1215 return getMinGlobalIndex() + localIndex;
1216 } else {
1217 // This is a host Kokkos::View access, with no RCP or ArrayRCP
1218 // involvement. As a result, it is thread safe.
1219 //
1220 // lgMapHost_ is a host pointer; this does NOT assume UVM.
1221 lazyPushToHost();
1222 return lgMapHost_[localIndex];
1223 }
1224}
1225
1226template <class LocalOrdinal, class GlobalOrdinal, class Node>
1228 const local_ordinal_type localIndices[], size_t numEntries, global_ordinal_type globalIndices[]) const {
1229 auto const minGI = getMinGlobalIndex();
1230 auto const minLI = getMinLocalIndex();
1231 auto const maxLI = getMaxLocalIndex();
1232 if (isContiguous()) {
1233 for (size_t i = 0; i < numEntries; i++) {
1234 auto lclInd = localIndices[i];
1236 return true;
1237 }
1238 globalIndices[i] = minGI + lclInd;
1239 }
1240 } else {
1241 // This is a host Kokkos::View access, with no RCP or ArrayRCP
1242 // involvement. As a result, it is thread safe.
1243 //
1244 // lgMapHost_ is a host pointer; this does NOT assume UVM.
1245 lazyPushToHost();
1246 for (size_t i = 0; i < numEntries; i++) {
1247 auto lclInd = localIndices[i];
1248 if (lclInd < minLI || lclInd > maxLI) {
1249 return true;
1250 }
1251 globalIndices[i] = lgMapHost_[lclInd];
1252 }
1253 }
1254 return false;
1255}
1256
1257template <class LocalOrdinal, class GlobalOrdinal, class Node>
1260 if (localIndex < getMinLocalIndex() || localIndex > getMaxLocalIndex()) {
1261 return false;
1262 } else {
1263 return true;
1264 }
1265}
1266
1267template <class LocalOrdinal, class GlobalOrdinal, class Node>
1270 return this->getLocalElement(globalIndex) !=
1271 Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid();
1272}
1273
1274template <class LocalOrdinal, class GlobalOrdinal, class Node>
1276 return uniform_;
1277}
1278
1279template <class LocalOrdinal, class GlobalOrdinal, class Node>
1281 return contiguous_;
1282}
1283
1284template <class LocalOrdinal, class GlobalOrdinal, class Node>
1287 getLocalMap() const {
1288 return local_map_type(glMap_, lgMap_, getIndexBase(),
1289 getMinGlobalIndex(), getMaxGlobalIndex(),
1290 firstContiguousGID_, lastContiguousGID_,
1291 getLocalNumElements(), isContiguous());
1292}
1293
1294template <class LocalOrdinal, class GlobalOrdinal, class Node>
1297 using Teuchos::outArg;
1298 using Teuchos::REDUCE_MIN;
1299 using Teuchos::reduceAll;
1300 //
1301 // Tests that avoid the Boolean all-reduce below by using
1302 // globally consistent quantities.
1303 //
1304 if (this == &map) {
1305 // Pointer equality on one process always implies pointer
1306 // equality on all processes, since Map is immutable.
1307 return true;
1308 } else if (getComm()->getSize() != map.getComm()->getSize()) {
1309 // The two communicators have different numbers of processes.
1310 // It's not correct to call isCompatible() in that case. This
1311 // may result in the all-reduce hanging below.
1312 return false;
1313 } else if (getGlobalNumElements() != map.getGlobalNumElements()) {
1314 // Two Maps are definitely NOT compatible if they have different
1315 // global numbers of indices.
1316 return false;
1317 } else if (isContiguous() && isUniform() &&
1318 map.isContiguous() && map.isUniform()) {
1319 // Contiguous uniform Maps with the same number of processes in
1320 // their communicators, and with the same global numbers of
1321 // indices, are always compatible.
1322 return true;
1323 } else if (!isContiguous() && !map.isContiguous() &&
1324 lgMap_.extent(0) != 0 && map.lgMap_.extent(0) != 0 &&
1325 lgMap_.data() == map.lgMap_.data()) {
1326 // Noncontiguous Maps whose global index lists are nonempty and
1327 // have the same pointer must be the same (and therefore
1328 // contiguous).
1329 //
1330 // Nonempty is important. For example, consider a communicator
1331 // with two processes, and two Maps that share this
1332 // communicator, with zero global indices on the first process,
1333 // and different nonzero numbers of global indices on the second
1334 // process. In that case, on the first process, the pointers
1335 // would both be NULL.
1336 return true;
1337 }
1338
1340 getGlobalNumElements() != map.getGlobalNumElements(), std::logic_error,
1341 "Tpetra::Map::isCompatible: There's a bug in this method. We've already "
1342 "checked that this condition is true above, but it's false here. "
1343 "Please report this bug to the Tpetra developers.");
1344
1345 // Do both Maps have the same number of indices on each process?
1346 const int locallyCompat =
1347 (getLocalNumElements() == map.getLocalNumElements()) ? 1 : 0;
1348
1349 int globallyCompat = 0;
1351 return (globallyCompat == 1);
1352}
1353
1354template <class LocalOrdinal, class GlobalOrdinal, class Node>
1357 using Teuchos::ArrayView;
1358 using GO = global_ordinal_type;
1359 using size_type = typename ArrayView<const GO>::size_type;
1360
1361 // If both Maps are contiguous, we can compare their GID ranges
1362 // easily by looking at the min and max GID on this process.
1363 // Otherwise, we'll compare their GID lists. If only one Map is
1364 // contiguous, then we only have to call getLocalElementList() on
1365 // the noncontiguous Map. (It's best to avoid calling it on a
1366 // contiguous Map, since it results in unnecessary storage that
1367 // persists for the lifetime of the Map.)
1368
1369 if (this == &map) {
1370 // Pointer equality on one process always implies pointer
1371 // equality on all processes, since Map is immutable.
1372 return true;
1373 } else if (getLocalNumElements() != map.getLocalNumElements()) {
1374 return false;
1375 } else if (getMinGlobalIndex() != map.getMinGlobalIndex() ||
1376 getMaxGlobalIndex() != map.getMaxGlobalIndex()) {
1377 return false;
1378 } else {
1379 if (isContiguous()) {
1380 if (map.isContiguous()) {
1381 return true; // min and max match, so the ranges match.
1382 } else { // *this is contiguous, but map is not contiguous
1384 !this->isContiguous() || map.isContiguous(), std::logic_error,
1385 "Tpetra::Map::locallySameAs: BUG");
1386 ArrayView<const GO> rhsElts = map.getLocalElementList();
1387 const GO minLhsGid = this->getMinGlobalIndex();
1388 const size_type numRhsElts = rhsElts.size();
1389 for (size_type k = 0; k < numRhsElts; ++k) {
1390 const GO curLhsGid = minLhsGid + static_cast<GO>(k);
1391 if (curLhsGid != rhsElts[k]) {
1392 return false; // stop on first mismatch
1393 }
1394 }
1395 return true;
1396 }
1397 } else if (map.isContiguous()) { // *this is not contiguous, but map is
1399 this->isContiguous() || !map.isContiguous(), std::logic_error,
1400 "Tpetra::Map::locallySameAs: BUG");
1401 ArrayView<const GO> lhsElts = this->getLocalElementList();
1402 const GO minRhsGid = map.getMinGlobalIndex();
1403 const size_type numLhsElts = lhsElts.size();
1404 for (size_type k = 0; k < numLhsElts; ++k) {
1405 const GO curRhsGid = minRhsGid + static_cast<GO>(k);
1406 if (curRhsGid != lhsElts[k]) {
1407 return false; // stop on first mismatch
1408 }
1409 }
1410 return true;
1411 } else if (this->lgMap_.data() == map.lgMap_.data()) {
1412 // Pointers to LID->GID "map" (actually just an array) are the
1413 // same, and the number of GIDs are the same.
1414 return this->getLocalNumElements() == map.getLocalNumElements();
1415 } else { // we actually have to compare the GIDs
1416 if (this->getLocalNumElements() != map.getLocalNumElements()) {
1417 return false; // We already checked above, but check just in case
1418 } else {
1419 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename node_type::execution_space>;
1420
1421 auto lhsLclMap = getLocalMap();
1422 auto rhsLclMap = map.getLocalMap();
1423
1425 Kokkos::parallel_reduce(
1426 "Tpetra::Map::locallySameAs",
1427 range_type(0, this->getLocalNumElements()),
1429 if (lhsLclMap.getGlobalElement(lid) != rhsLclMap.getGlobalElement(lid))
1430 ++numMismatches;
1431 },
1433
1434 return (numMismatchedElements == 0);
1435 }
1436 }
1437 }
1438}
1439
1440template <class LocalOrdinal, class GlobalOrdinal, class Node>
1443 if (this == &map)
1444 return true;
1445
1446 // We are going to check if lmap1 is fitted into lmap2:
1447 // Is lmap1 (map) a subset of lmap2 (this)?
1448 // And do the first lmap1.getLocalNumElements() global elements
1449 // of lmap1,lmap2 owned on each process exactly match?
1450 auto lmap1 = map.getLocalMap();
1451 auto lmap2 = this->getLocalMap();
1452
1453 auto numLocalElements1 = lmap1.getLocalNumElements();
1454 auto numLocalElements2 = lmap2.getLocalNumElements();
1455
1457 // There are more indices in the first map on this process than in second map.
1458 return false;
1459 }
1460
1461 if (lmap1.isContiguous() && lmap2.isContiguous()) {
1462 // When both Maps are contiguous, just check the interval inclusion.
1463 return ((lmap1.getMinGlobalIndex() == lmap2.getMinGlobalIndex()) &&
1464 (lmap1.getMaxGlobalIndex() <= lmap2.getMaxGlobalIndex()));
1465 }
1466
1467 if (lmap1.getMinGlobalIndex() < lmap2.getMinGlobalIndex() ||
1468 lmap1.getMaxGlobalIndex() > lmap2.getMaxGlobalIndex()) {
1469 // The second map does not include the first map bounds, and thus some of
1470 // the first map global indices are not in the second map.
1471 return false;
1472 }
1473
1474 using LO = local_ordinal_type;
1475 using range_type =
1476 Kokkos::RangePolicy<LO, typename node_type::execution_space>;
1477
1478 // Check all elements.
1479 LO numDiff = 0;
1480 Kokkos::parallel_reduce(
1481 "isLocallyFitted",
1482 range_type(0, numLocalElements1),
1483 KOKKOS_LAMBDA(const LO i, LO& diff) {
1484 diff += (lmap1.getGlobalElement(i) != lmap2.getGlobalElement(i));
1485 },
1486 numDiff);
1487
1488 return (numDiff == 0);
1489}
1490
1491template <class LocalOrdinal, class GlobalOrdinal, class Node>
1494 using Teuchos::outArg;
1495 using Teuchos::REDUCE_MIN;
1496 using Teuchos::reduceAll;
1497 //
1498 // Tests that avoid the Boolean all-reduce below by using
1499 // globally consistent quantities.
1500 //
1501 if (this == &map) {
1502 // Pointer equality on one process always implies pointer
1503 // equality on all processes, since Map is immutable.
1504 return true;
1505 } else if (getComm()->getSize() != map.getComm()->getSize()) {
1506 // The two communicators have different numbers of processes.
1507 // It's not correct to call isSameAs() in that case. This
1508 // may result in the all-reduce hanging below.
1509 return false;
1510 } else if (getGlobalNumElements() != map.getGlobalNumElements()) {
1511 // Two Maps are definitely NOT the same if they have different
1512 // global numbers of indices.
1513 return false;
1514 } else if (getMinAllGlobalIndex() != map.getMinAllGlobalIndex() ||
1515 getMaxAllGlobalIndex() != map.getMaxAllGlobalIndex() ||
1516 getIndexBase() != map.getIndexBase()) {
1517 // If the global min or max global index doesn't match, or if
1518 // the index base doesn't match, then the Maps aren't the same.
1519 return false;
1520 } else if (isDistributed() != map.isDistributed()) {
1521 // One Map is distributed and the other is not, which means that
1522 // the Maps aren't the same.
1523 return false;
1524 } else if (isContiguous() && isUniform() &&
1525 map.isContiguous() && map.isUniform()) {
1526 // Contiguous uniform Maps with the same number of processes in
1527 // their communicators, with the same global numbers of indices,
1528 // and with matching index bases and ranges, must be the same.
1529 return true;
1530 }
1531
1532 // The two communicators must have the same number of processes,
1533 // with process ranks occurring in the same order. This uses
1534 // MPI_COMM_COMPARE. The MPI 3.1 standard (Section 6.4) says:
1535 // "Operations that access communicators are local and their
1536 // execution does not require interprocess communication."
1537 // However, just to be sure, I'll put this call after the above
1538 // tests that don't communicate.
1539 if (!::Tpetra::Details::congruent(*comm_, *(map.getComm()))) {
1540 return false;
1541 }
1542
1543 // If we get this far, we need to check local properties and then
1544 // communicate local sameness across all processes.
1545 const int isSame_lcl = locallySameAs(map) ? 1 : 0;
1546
1547 // Return true if and only if all processes report local sameness.
1548 int isSame_gbl = 0;
1550 return isSame_gbl == 1;
1551}
1552
1553namespace { // (anonymous)
1554template <class LO, class GO, class DT>
1555class FillLgMap {
1556 public:
1557 FillLgMap(const Kokkos::View<GO*, DT>& lgMap,
1558 const GO startGid)
1559 : lgMap_(lgMap)
1560 , startGid_(startGid) {
1561 Kokkos::RangePolicy<LO, typename DT::execution_space>
1562 range(static_cast<LO>(0), static_cast<LO>(lgMap.size()));
1563 Kokkos::parallel_for(range, *this);
1564 }
1565
1566 KOKKOS_INLINE_FUNCTION void operator()(const LO& lid) const {
1567 lgMap_(lid) = startGid_ + static_cast<GO>(lid);
1568 }
1569
1570 private:
1571 const Kokkos::View<GO*, DT> lgMap_;
1572 const GO startGid_;
1573};
1574
1575} // namespace
1576
1577template <class LocalOrdinal, class GlobalOrdinal, class Node>
1578typename Map<LocalOrdinal, GlobalOrdinal, Node>::global_indices_array_type
1580 using std::endl;
1581 using LO = local_ordinal_type;
1582 using GO = global_ordinal_type;
1583 using const_lg_view_type = decltype(lgMap_);
1584 using lg_view_type = typename const_lg_view_type::non_const_type;
1585 const bool debug = Details::Behavior::debug("Map");
1586 const bool verbose = Details::Behavior::verbose("Map");
1587
1588 std::unique_ptr<std::string> prefix;
1589 if (verbose) {
1591 comm_.getRawPtr(), "Map", "getMyGlobalIndices");
1592 std::ostringstream os;
1593 os << *prefix << "Start" << endl;
1594 std::cerr << os.str();
1595 }
1596
1597 // If the local-to-global mapping doesn't exist yet, and if we
1598 // have local entries, then create and fill the local-to-global
1599 // mapping.
1601 lgMap_.extent(0) == 0 && numLocalElements_ > 0;
1602
1604 if (verbose) {
1605 std::ostringstream os;
1606 os << *prefix << "Need to create lgMap" << endl;
1607 std::cerr << os.str();
1608 }
1609 if (debug) {
1610 // The local-to-global mapping should have been set up already
1611 // for a noncontiguous map.
1612 TEUCHOS_TEST_FOR_EXCEPTION(!isContiguous(), std::logic_error,
1613 "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1614 "mapping (lgMap_) should have been set up already for a "
1615 "noncontiguous Map. Please report this bug to the Tpetra "
1616 "developers.");
1617 }
1618 const LO numElts = static_cast<LO>(getLocalNumElements());
1619
1620 using Kokkos::view_alloc;
1621 using Kokkos::WithoutInitializing;
1622 lg_view_type lgMap("lgMap3", numElts);
1623 if (verbose) {
1624 std::ostringstream os;
1625 os << *prefix << "Fill lgMap" << endl;
1626 std::cerr << os.str();
1627 }
1629
1630 if (verbose) {
1631 std::ostringstream os;
1632 os << *prefix << "Copy lgMap to lgMapHost" << endl;
1633 std::cerr << os.str();
1634 }
1635
1636 auto lgMapHost = Kokkos::create_mirror_view(Kokkos::HostSpace(), lgMap);
1637 // DEEP_COPY REVIEW - DEVICE-TO-HOST
1639 Kokkos::deep_copy(exec_instance, lgMapHost, lgMap);
1640
1641 // There's a non-trivial chance we'll grab this on the host,
1642 // so let's make sure the copy finishes
1643 exec_instance.fence();
1644
1645 // "Commit" the local-to-global lookup table we filled in above.
1646 lgMap_ = lgMap;
1647 lgMapHost_ = lgMapHost;
1648 } else {
1649 lazyPushToHost();
1650 }
1651
1652 if (verbose) {
1653 std::ostringstream os;
1654 os << *prefix << "Done" << endl;
1655 std::cerr << os.str();
1656 }
1657 return lgMapHost_;
1658}
1659
1660template <class LocalOrdinal, class GlobalOrdinal, class Node>
1661typename Map<LocalOrdinal, GlobalOrdinal, Node>::global_indices_array_device_type
1663 using std::endl;
1664 using LO = local_ordinal_type;
1665 using GO = global_ordinal_type;
1666 using const_lg_view_type = decltype(lgMap_);
1667 using lg_view_type = typename const_lg_view_type::non_const_type;
1668 const bool debug = Details::Behavior::debug("Map");
1669 const bool verbose = Details::Behavior::verbose("Map");
1670
1671 std::unique_ptr<std::string> prefix;
1672 if (verbose) {
1674 comm_.getRawPtr(), "Map", "getMyGlobalIndicesDevice");
1675 std::ostringstream os;
1676 os << *prefix << "Start" << endl;
1677 std::cerr << os.str();
1678 }
1679
1680 // If the local-to-global mapping doesn't exist yet, and if we
1681 // have local entries, then create and fill the local-to-global
1682 // mapping.
1684 lgMap_.extent(0) == 0 && numLocalElements_ > 0;
1685
1687 if (verbose) {
1688 std::ostringstream os;
1689 os << *prefix << "Need to create lgMap" << endl;
1690 std::cerr << os.str();
1691 }
1692 if (debug) {
1693 // The local-to-global mapping should have been set up already
1694 // for a noncontiguous map.
1695 TEUCHOS_TEST_FOR_EXCEPTION(!isContiguous(), std::logic_error,
1696 "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1697 "mapping (lgMap_) should have been set up already for a "
1698 "noncontiguous Map. Please report this bug to the Tpetra "
1699 "developers.");
1700 }
1701 const LO numElts = static_cast<LO>(getLocalNumElements());
1702
1703 using Kokkos::view_alloc;
1704 using Kokkos::WithoutInitializing;
1705 lg_view_type lgMap("lgMap4", numElts);
1706 if (verbose) {
1707 std::ostringstream os;
1708 os << *prefix << "Fill lgMap" << endl;
1709 std::cerr << os.str();
1710 }
1712
1713 // "Commit" the local-to-global lookup table we filled in above.
1714 lgMap_ = lgMap;
1715 }
1716
1717 if (verbose) {
1718 std::ostringstream os;
1719 os << *prefix << "Done" << endl;
1720 std::cerr << os.str();
1721 }
1722 return lgMap_;
1723}
1724
1725template <class LocalOrdinal, class GlobalOrdinal, class Node>
1726Teuchos::ArrayView<const GlobalOrdinal>
1728 using GO = global_ordinal_type;
1729
1730 // If the local-to-global mapping doesn't exist yet, and if we
1731 // have local entries, then create and fill the local-to-global
1732 // mapping.
1733 (void)this->getMyGlobalIndices();
1734
1735 // This does NOT assume UVM; lgMapHost_ is a host pointer.
1736 lazyPushToHost();
1737 const GO* lgMapHostRawPtr = lgMapHost_.data();
1738 // The third argument forces ArrayView not to try to track memory
1739 // in a debug build. We have to use it because the memory does
1740 // not belong to a Teuchos memory management class.
1741 return Teuchos::ArrayView<const GO>(
1743 lgMapHost_.extent(0),
1744 Teuchos::RCP_DISABLE_NODE_LOOKUP);
1745}
1746
1747template <class LocalOrdinal, class GlobalOrdinal, class Node>
1749 return distributed_;
1750}
1751
1752template <class LocalOrdinal, class GlobalOrdinal, class Node>
1754 using Teuchos::TypeNameTraits;
1755 std::ostringstream os;
1756
1757 os << "Tpetra::Map: {"
1758 << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name()
1759 << ", GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name()
1760 << ", NodeType: " << TypeNameTraits<Node>::name();
1761 if (this->getObjectLabel() != "") {
1762 os << ", Label: \"" << this->getObjectLabel() << "\"";
1763 }
1764 os << ", Global number of entries: " << getGlobalNumElements()
1765 << ", Number of processes: " << getComm()->getSize()
1766 << ", Uniform: " << (isUniform() ? "true" : "false")
1767 << ", Contiguous: " << (isContiguous() ? "true" : "false")
1768 << ", Distributed: " << (isDistributed() ? "true" : "false")
1769 << "}";
1770 return os.str();
1771}
1772
1777template <class LocalOrdinal, class GlobalOrdinal, class Node>
1778std::string
1780 localDescribeToString(const Teuchos::EVerbosityLevel vl) const {
1781 using LO = local_ordinal_type;
1782 using std::endl;
1783
1784 // This preserves current behavior of Map.
1785 if (vl < Teuchos::VERB_HIGH) {
1786 return std::string();
1787 }
1788 auto outStringP = Teuchos::rcp(new std::ostringstream());
1789 Teuchos::RCP<Teuchos::FancyOStream> outp =
1790 Teuchos::getFancyOStream(outStringP);
1791 Teuchos::FancyOStream& out = *outp;
1792
1793 auto comm = this->getComm();
1794 const int myRank = comm->getRank();
1795 const int numProcs = comm->getSize();
1796 out << "Process " << myRank << " of " << numProcs << ":" << endl;
1797 Teuchos::OSTab tab1(out);
1798
1799 const LO numEnt = static_cast<LO>(this->getLocalNumElements());
1800 out << "My number of entries: " << numEnt << endl
1801 << "My minimum global index: " << this->getMinGlobalIndex() << endl
1802 << "My maximum global index: " << this->getMaxGlobalIndex() << endl;
1803
1804 if (vl == Teuchos::VERB_EXTREME) {
1805 out << "My global indices: [";
1806 const LO minLclInd = this->getMinLocalIndex();
1807 for (LO k = 0; k < numEnt; ++k) {
1808 out << minLclInd + this->getGlobalElement(k);
1809 if (k + 1 < numEnt) {
1810 out << ", ";
1811 }
1812 }
1813 out << "]" << endl;
1814 }
1815
1816 out.flush(); // make sure the ostringstream got everything
1817 return outStringP->str();
1818}
1819
1820template <class LocalOrdinal, class GlobalOrdinal, class Node>
1822 describe(Teuchos::FancyOStream& out,
1823 const Teuchos::EVerbosityLevel verbLevel) const {
1824 using std::endl;
1825 using Teuchos::TypeNameTraits;
1826 using Teuchos::VERB_DEFAULT;
1827 using Teuchos::VERB_HIGH;
1828 using Teuchos::VERB_LOW;
1829 using Teuchos::VERB_NONE;
1830 using LO = local_ordinal_type;
1831 using GO = global_ordinal_type;
1832 const Teuchos::EVerbosityLevel vl =
1834
1835 if (vl == VERB_NONE) {
1836 return; // don't print anything
1837 }
1838 // If this Map's Comm is null, then the Map does not participate
1839 // in collective operations with the other processes. In that
1840 // case, it is not even legal to call this method. The reasonable
1841 // thing to do in that case is nothing.
1842 auto comm = this->getComm();
1843 if (comm.is_null()) {
1844 return;
1845 }
1846 const int myRank = comm->getRank();
1847 const int numProcs = comm->getSize();
1848
1849 // Only Process 0 should touch the output stream, but this method
1850 // in general may need to do communication. Thus, we may need to
1851 // preserve the current tab level across multiple "if (myRank ==
1852 // 0) { ... }" inner scopes. This is why we sometimes create
1853 // OSTab instances by pointer, instead of by value. We only need
1854 // to create them by pointer if the tab level must persist through
1855 // multiple inner scopes.
1856 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
1857
1858 if (myRank == 0) {
1859 // At every verbosity level but VERB_NONE, Process 0 prints.
1860 // By convention, describe() always begins with a tab before
1861 // printing.
1862 tab0 = Teuchos::rcp(new Teuchos::OSTab(out));
1863 out << "\"Tpetra::Map\":" << endl;
1864 tab1 = Teuchos::rcp(new Teuchos::OSTab(out));
1865 {
1866 out << "Template parameters:" << endl;
1867 Teuchos::OSTab tab2(out);
1868 out << "LocalOrdinal: " << TypeNameTraits<LO>::name() << endl
1869 << "GlobalOrdinal: " << TypeNameTraits<GO>::name() << endl
1870 << "Node: " << TypeNameTraits<Node>::name() << endl;
1871 }
1872 const std::string label = this->getObjectLabel();
1873 if (label != "") {
1874 out << "Label: \"" << label << "\"" << endl;
1875 }
1876 out << "Global number of entries: " << getGlobalNumElements() << endl
1877 << "Minimum global index: " << getMinAllGlobalIndex() << endl
1878 << "Maximum global index: " << getMaxAllGlobalIndex() << endl
1879 << "Index base: " << getIndexBase() << endl
1880 << "Number of processes: " << numProcs << endl
1881 << "Uniform: " << (isUniform() ? "true" : "false") << endl
1882 << "Contiguous: " << (isContiguous() ? "true" : "false") << endl
1883 << "Distributed: " << (isDistributed() ? "true" : "false") << endl;
1884 }
1885
1886 // This is collective over the Map's communicator.
1887 if (vl >= VERB_HIGH) { // VERB_HIGH or VERB_EXTREME
1888 const std::string lclStr = this->localDescribeToString(vl);
1890 }
1891}
1892
1893template <class LocalOrdinal, class GlobalOrdinal, class Node>
1894Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>>
1896 replaceCommWithSubset(const Teuchos::RCP<const Teuchos::Comm<int>>& newComm) const {
1897 using Teuchos::RCP;
1898 using Teuchos::rcp;
1899 using GST = global_size_t;
1900 using LO = local_ordinal_type;
1901 using GO = global_ordinal_type;
1902 using map_type = Map<LO, GO, Node>;
1903
1904 // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
1905 // the Map by calling its ordinary public constructor, using the
1906 // original Map's data. This only involves O(1) all-reduces over
1907 // the new communicator, which in the common case only includes a
1908 // small number of processes.
1909
1910 // Create the Map to return.
1911 if (newComm.is_null() || newComm->getSize() < 1) {
1912 return Teuchos::null; // my process does not participate in the new Map
1913 } else if (newComm->getSize() == 1) {
1914 lazyPushToHost();
1915
1916 // The case where the new communicator has only one process is
1917 // easy. We don't have to communicate to get all the
1918 // information we need. Use the default comm to create the new
1919 // Map, then fill in all the fields directly.
1920 RCP<map_type> newMap(new map_type());
1921
1922 newMap->comm_ = newComm;
1923 // mfh 07 Oct 2016: Preserve original behavior, even though the
1924 // original index base may no longer be the globally min global
1925 // index. See #616 for why this doesn't matter so much anymore.
1926 newMap->indexBase_ = this->indexBase_;
1927 newMap->numGlobalElements_ = this->numLocalElements_;
1928 newMap->numLocalElements_ = this->numLocalElements_;
1929 newMap->minMyGID_ = this->minMyGID_;
1930 newMap->maxMyGID_ = this->maxMyGID_;
1931 newMap->minAllGID_ = this->minMyGID_;
1932 newMap->maxAllGID_ = this->maxMyGID_;
1933 newMap->firstContiguousGID_ = this->firstContiguousGID_;
1934 newMap->lastContiguousGID_ = this->lastContiguousGID_;
1935 // Since the new communicator has only one process, neither
1936 // uniformity nor contiguity have changed.
1937 newMap->uniform_ = this->uniform_;
1938 newMap->contiguous_ = this->contiguous_;
1939 // The new communicator only has one process, so the new Map is
1940 // not distributed.
1941 newMap->distributed_ = false;
1942 newMap->lgMap_ = this->lgMap_;
1943 newMap->lgMapHost_ = this->lgMapHost_;
1944 newMap->glMap_ = this->glMap_;
1945 newMap->glMapHost_ = this->glMapHost_;
1946 // It's OK not to initialize the new Map's Directory.
1947 // This is initialized lazily, on first call to getRemoteIndexList.
1948
1949 return newMap;
1950 } else { // newComm->getSize() != 1
1951 // Even if the original Map is contiguous, the new Map might not
1952 // be, especially if the excluded processes have ranks != 0 or
1953 // newComm->getSize()-1. The common case for this method is to
1954 // exclude many (possibly even all but one) processes, so it
1955 // likely doesn't pay to do the global communication (over the
1956 // original communicator) to figure out whether we can optimize
1957 // the result Map. Thus, we just set up the result Map as
1958 // noncontiguous.
1959 //
1960 // TODO (mfh 07 Oct 2016) We don't actually need to reconstruct
1961 // the global-to-local table, etc. Optimize this code path to
1962 // avoid unnecessary local work.
1963
1964 // Make Map (re)compute the global number of elements.
1965 const GST RECOMPUTE = Tpetra::Details::OrdinalTraits<GST>::invalid();
1966 // TODO (mfh 07 Oct 2016) If we use any Map constructor, we have
1967 // to use the noncontiguous Map constructor, since the new Map
1968 // might not be contiguous. Even if the old Map was contiguous,
1969 // some process in the "middle" might have been excluded. If we
1970 // want to avoid local work, we either have to do the setup by
1971 // hand, or write a new Map constructor.
1972#if 1
1973 // The disabled code here throws the following exception in
1974 // Map's replaceCommWithSubset test:
1975 //
1976 // Throw test that evaluated to true: static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::ArithTraits<ValueType>::max ())
1977 // 10:
1978 // 10: Tpetra::Details::FixedHashTable: The number of keys -3 is greater than the maximum representable ValueType value 2147483647. This means that it is not possible to use this constructor.
1979 // 10: Process 3: origComm->replaceCommWithSubset(subsetComm) threw an exception: /scratch/prj/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Details_FixedHashTable_def.hpp:1044:
1980
1981 auto lgMap = this->getMyGlobalIndices();
1982 using size_type =
1983 typename std::decay<decltype(lgMap.extent(0))>::type;
1984 const size_type lclNumInds =
1985 static_cast<size_type>(this->getLocalNumElements());
1986 using Teuchos::TypeNameTraits;
1987 TEUCHOS_TEST_FOR_EXCEPTION(lgMap.extent(0) != lclNumInds, std::logic_error,
1988 "Tpetra::Map::replaceCommWithSubset: Result of getMyGlobalIndices() "
1989 "has length "
1990 << lgMap.extent(0) << " (of type " << TypeNameTraits<size_type>::name() << ") != this->getLocalNumElements()"
1991 " = "
1992 << this->getLocalNumElements() << ". The latter, upon being "
1993 "cast to size_type = "
1995 "becomes "
1996 << lclNumInds << ". Please report this bug to the Tpetra "
1997 "developers.");
1998#else
1999 Teuchos::ArrayView<const GO> lgMap = this->getLocalElementList();
2000#endif // 1
2001
2002 const GO indexBase = this->getIndexBase();
2003 // map stores HostSpace of CudaSpace but constructor is still CudaUVMSpace
2004 auto lgMap_device = Kokkos::create_mirror_view_and_copy(device_type(), lgMap);
2005 return rcp(new map_type(RECOMPUTE, lgMap_device, indexBase, newComm));
2006 }
2007}
2008
2009template <class LocalOrdinal, class GlobalOrdinal, class Node>
2010Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>>
2012 removeEmptyProcesses() const {
2013 using Teuchos::Comm;
2014 using Teuchos::null;
2015 using Teuchos::outArg;
2016 using Teuchos::RCP;
2017 using Teuchos::rcp;
2018 using Teuchos::REDUCE_MIN;
2019 using Teuchos::reduceAll;
2020
2021 // Create the new communicator. split() returns a valid
2022 // communicator on all processes. On processes where color == 0,
2023 // ignore the result. Passing key == 0 tells MPI to order the
2024 // processes in the new communicator by their rank in the old
2025 // communicator.
2026 const int color = (numLocalElements_ == 0) ? 0 : 1;
2027 // MPI_Comm_split must be called collectively over the original
2028 // communicator. We can't just call it on processes with color
2029 // one, even though we will ignore its result on processes with
2030 // color zero.
2031 RCP<const Comm<int>> newComm = comm_->split(color, 0);
2032 if (color == 0) {
2033 newComm = null;
2034 }
2035
2036 // Create the Map to return.
2037 if (newComm.is_null()) {
2038 return null; // my process does not participate in the new Map
2039 } else {
2040 RCP<Map> map = rcp(new Map());
2041
2042 map->comm_ = newComm;
2043 map->indexBase_ = indexBase_;
2044 map->numGlobalElements_ = numGlobalElements_;
2045 map->numLocalElements_ = numLocalElements_;
2046 map->minMyGID_ = minMyGID_;
2047 map->maxMyGID_ = maxMyGID_;
2048 map->minAllGID_ = minAllGID_;
2049 map->maxAllGID_ = maxAllGID_;
2050 map->firstContiguousGID_ = firstContiguousGID_;
2051 map->lastContiguousGID_ = lastContiguousGID_;
2052
2053 // Uniformity and contiguity have not changed. The directory
2054 // has changed, but we've taken care of that above.
2055 map->uniform_ = uniform_;
2056 map->contiguous_ = contiguous_;
2057
2058 // If the original Map was NOT distributed, then the new Map
2059 // cannot be distributed.
2060 //
2061 // If the number of processes in the new communicator is 1, then
2062 // the new Map is not distributed.
2063 //
2064 // Otherwise, we have to check the new Map using an all-reduce
2065 // (over the new communicator). For example, the original Map
2066 // may have had some processes with zero elements, and all other
2067 // processes with the same number of elements as in the whole
2068 // Map. That Map is technically distributed, because of the
2069 // processes with zero elements. Removing those processes would
2070 // make the new Map locally replicated.
2071 if (!distributed_ || newComm->getSize() == 1) {
2072 map->distributed_ = false;
2073 } else {
2074 const int iOwnAllGids = (numLocalElements_ == numGlobalElements_) ? 1 : 0;
2075 int allProcsOwnAllGids = 0;
2077 map->distributed_ = (allProcsOwnAllGids == 1) ? false : true;
2078 }
2079
2080 map->lgMap_ = lgMap_;
2081 map->lgMapHost_ = lgMapHost_;
2082 map->glMap_ = glMap_;
2083 map->glMapHost_ = glMapHost_;
2084
2085 // Map's default constructor creates an uninitialized Directory.
2086 // The Directory will be initialized on demand in
2087 // getRemoteIndexList().
2088 //
2089 // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the
2090 // directory more efficiently than just recreating it. If
2091 // directory recreation proves a bottleneck, we can always
2092 // revisit this. On the other hand, Directory creation is only
2093 // collective over the new, presumably much smaller
2094 // communicator, so it may not be worth the effort to optimize.
2095
2096 return map;
2097 }
2098}
2099
2100template <class LocalOrdinal, class GlobalOrdinal, class Node>
2103 directory_.is_null(), std::logic_error,
2104 "Tpetra::Map::setupDirectory: "
2105 "The Directory is null. "
2106 "Please report this bug to the Tpetra developers.");
2107
2108 // Only create the Directory if it hasn't been created yet.
2109 // This is a collective operation.
2110 if (!directory_->initialized()) {
2111 directory_->initialize(*this);
2112 }
2113}
2114
2115template <class LocalOrdinal, class GlobalOrdinal, class Node>
2118 getRemoteIndexList(const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
2119 const Teuchos::ArrayView<int>& PIDs,
2120 const Teuchos::ArrayView<LocalOrdinal>& LIDs) const {
2122 using std::endl;
2123 using Tpetra::Details::OrdinalTraits;
2124 using size_type = Teuchos::ArrayView<int>::size_type;
2125
2126 const bool verbose = Details::Behavior::verbose("Map");
2128 std::unique_ptr<std::string> prefix;
2129 if (verbose) {
2130 prefix = Details::createPrefix(comm_.getRawPtr(),
2131 "Map", "getRemoteIndexList(GIDs,PIDs,LIDs)");
2132 std::ostringstream os;
2133 os << *prefix << "Start: ";
2134 verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2135 os << endl;
2136 std::cerr << os.str();
2137 }
2138
2139 // Empty Maps (i.e., containing no indices on any processes in the
2140 // Map's communicator) are perfectly valid. In that case, if the
2141 // input GID list is nonempty, we fill the output arrays with
2142 // invalid values, and return IDNotPresent to notify the caller.
2143 // It's perfectly valid to give getRemoteIndexList GIDs that the
2144 // Map doesn't own. SubmapImport test 2 needs this functionality.
2145 if (getGlobalNumElements() == 0) {
2146 if (GIDs.size() == 0) {
2147 if (verbose) {
2148 std::ostringstream os;
2149 os << *prefix << "Done; both Map & input are empty" << endl;
2150 std::cerr << os.str();
2151 }
2152 return AllIDsPresent; // trivially
2153 } else {
2154 if (verbose) {
2155 std::ostringstream os;
2156 os << *prefix << "Done: Map is empty on all processes, "
2157 "so all output PIDs & LIDs are invalid (-1)."
2158 << endl;
2159 std::cerr << os.str();
2160 }
2161 for (size_type k = 0; k < PIDs.size(); ++k) {
2163 }
2164 for (size_type k = 0; k < LIDs.size(); ++k) {
2166 }
2167 return IDNotPresent;
2168 }
2169 }
2170
2171 // getRemoteIndexList must be called collectively, and Directory
2172 // initialization is collective too, so it's OK to initialize the
2173 // Directory on demand.
2174
2175 if (verbose) {
2176 std::ostringstream os;
2177 os << *prefix << "Call setupDirectory" << endl;
2178 std::cerr << os.str();
2179 }
2180 setupDirectory();
2181 if (verbose) {
2182 std::ostringstream os;
2183 os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2184 std::cerr << os.str();
2185 }
2187 directory_->getDirectoryEntries(*this, GIDs, PIDs, LIDs);
2188 if (verbose) {
2189 std::ostringstream os;
2190 os << *prefix << "Done; getDirectoryEntries returned "
2191 << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2192 << "; ";
2193 verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2194 os << ", ";
2195 verbosePrintArray(os, LIDs, "LIDs", maxNumToPrint);
2196 os << endl;
2197 std::cerr << os.str();
2198 }
2199 return retVal;
2200}
2201
2202template <class LocalOrdinal, class GlobalOrdinal, class Node>
2205 getRemoteIndexList(const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
2206 const Teuchos::ArrayView<int>& PIDs) const {
2208 using std::endl;
2209
2210 const bool verbose = Details::Behavior::verbose("Map");
2212 std::unique_ptr<std::string> prefix;
2213 if (verbose) {
2214 prefix = Details::createPrefix(comm_.getRawPtr(),
2215 "Map", "getRemoteIndexList(GIDs,PIDs)");
2216 std::ostringstream os;
2217 os << *prefix << "Start: ";
2218 verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2219 os << endl;
2220 std::cerr << os.str();
2221 }
2222
2223 if (getGlobalNumElements() == 0) {
2224 if (GIDs.size() == 0) {
2225 if (verbose) {
2226 std::ostringstream os;
2227 os << *prefix << "Done; both Map & input are empty" << endl;
2228 std::cerr << os.str();
2229 }
2230 return AllIDsPresent; // trivially
2231 } else {
2232 if (verbose) {
2233 std::ostringstream os;
2234 os << *prefix << "Done: Map is empty on all processes, "
2235 "so all output PIDs are invalid (-1)."
2236 << endl;
2237 std::cerr << os.str();
2238 }
2239 for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size(); ++k) {
2240 PIDs[k] = Tpetra::Details::OrdinalTraits<int>::invalid();
2241 }
2242 return IDNotPresent;
2243 }
2244 }
2245
2246 // getRemoteIndexList must be called collectively, and Directory
2247 // initialization is collective too, so it's OK to initialize the
2248 // Directory on demand.
2249
2250 if (verbose) {
2251 std::ostringstream os;
2252 os << *prefix << "Call setupDirectory" << endl;
2253 std::cerr << os.str();
2254 }
2255 setupDirectory();
2256 if (verbose) {
2257 std::ostringstream os;
2258 os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2259 std::cerr << os.str();
2260 }
2262 directory_->getDirectoryEntries(*this, GIDs, PIDs);
2263 if (verbose) {
2264 std::ostringstream os;
2265 os << *prefix << "Done; getDirectoryEntries returned "
2266 << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2267 << "; ";
2268 verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2269 os << endl;
2270 std::cerr << os.str();
2271 }
2272 return retVal;
2273}
2274
2275template <class LocalOrdinal, class GlobalOrdinal, class Node>
2277 using exec_space = typename Node::device_type::execution_space;
2278 if (lgMap_.extent(0) != lgMapHost_.extent(0)) {
2279 Tpetra::Details::ProfilingRegion pr("Map::lazyPushToHost() - pushing data");
2280 // NOTE: We check lgMap_ and not glMap_, since the latter can
2281 // be somewhat error prone for contiguous maps
2282
2283 // create_mirror_view preserves const-ness. create_mirror does not
2284 auto lgMap_host = Kokkos::create_mirror(Kokkos::HostSpace(), lgMap_);
2285
2286 // Since this was computed on the default stream, we can copy on the stream and then fence
2287 // the stream
2288 Kokkos::deep_copy(exec_space(), lgMap_host, lgMap_);
2289 exec_space().fence();
2290 lgMapHost_ = lgMap_host;
2291
2292 // Make host version - when memory spaces match these just do trivial assignment
2293 glMapHost_ = global_to_local_table_host_type(glMap_);
2294 }
2295}
2296
2297template <class LocalOrdinal, class GlobalOrdinal, class Node>
2298Teuchos::RCP<const Teuchos::Comm<int>>
2300 return comm_;
2301}
2302
2303template <class LocalOrdinal, class GlobalOrdinal, class Node>
2305 checkIsDist() const {
2306 using std::endl;
2307 using Teuchos::as;
2308 using Teuchos::outArg;
2309 using Teuchos::REDUCE_MIN;
2310 using Teuchos::reduceAll;
2311
2312 const bool verbose = Details::Behavior::verbose("Map");
2313 std::unique_ptr<std::string> prefix;
2314 if (verbose) {
2316 comm_.getRawPtr(), "Map", "checkIsDist");
2317 std::ostringstream os;
2318 os << *prefix << "Start" << endl;
2319 std::cerr << os.str();
2320 }
2321
2322 bool global = false;
2323 if (comm_->getSize() > 1) {
2324 // The communicator has more than one process, but that doesn't
2325 // necessarily mean the Map is distributed.
2326 int localRep = 0;
2327 if (numGlobalElements_ == as<global_size_t>(numLocalElements_)) {
2328 // The number of local elements on this process equals the
2329 // number of global elements.
2330 //
2331 // NOTE (mfh 22 Nov 2011) Does this still work if there were
2332 // duplicates in the global ID list on input (the third Map
2333 // constructor), so that the number of local elements (which
2334 // are not duplicated) on this process could be less than the
2335 // number of global elements, even if this process owns all
2336 // the elements?
2337 localRep = 1;
2338 }
2339 int allLocalRep;
2340 reduceAll<int, int>(*comm_, REDUCE_MIN, localRep, outArg(allLocalRep));
2341 if (allLocalRep != 1) {
2342 // At least one process does not own all the elements.
2343 // This makes the Map a distributed Map.
2344 global = true;
2345 }
2346 }
2347 // If the communicator has only one process, then the Map is not
2348 // distributed.
2349
2350 if (verbose) {
2351 std::ostringstream os;
2352 os << *prefix << "Done; global=" << (global ? "true" : "false")
2353 << endl;
2354 std::cerr << os.str();
2355 }
2356 return global;
2357}
2358
2359} // namespace Tpetra
2360
2361template <class LocalOrdinal, class GlobalOrdinal>
2362Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal>>
2363Tpetra::createLocalMap(const size_t numElements,
2364 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2365 typedef LocalOrdinal LO;
2366 typedef GlobalOrdinal GO;
2367 using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2368 return createLocalMapWithNode<LO, GO, NT>(numElements, comm);
2369}
2370
2371template <class LocalOrdinal, class GlobalOrdinal>
2372Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal>>
2373Tpetra::createUniformContigMap(const global_size_t numElements,
2374 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2375 typedef LocalOrdinal LO;
2376 typedef GlobalOrdinal GO;
2377 using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2378 return createUniformContigMapWithNode<LO, GO, NT>(numElements, comm);
2379}
2380
2381template <class LocalOrdinal, class GlobalOrdinal, class Node>
2382Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2383Tpetra::createUniformContigMapWithNode(const global_size_t numElements,
2384 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2385 using Teuchos::rcp;
2387 const GlobalOrdinal indexBase = static_cast<GlobalOrdinal>(0);
2388
2389 return rcp(new map_type(numElements, indexBase, comm, GloballyDistributed));
2390}
2391
2392template <class LocalOrdinal, class GlobalOrdinal, class Node>
2393Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2394Tpetra::createLocalMapWithNode(const size_t numElements,
2395 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2396 using Teuchos::rcp;
2399 const GlobalOrdinal indexBase = 0;
2400 const global_size_t globalNumElts = static_cast<global_size_t>(numElements);
2401
2402 return rcp(new map_type(globalNumElts, indexBase, comm, LocallyReplicated));
2403}
2404
2405template <class LocalOrdinal, class GlobalOrdinal, class Node>
2406Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2408 const size_t localNumElements,
2409 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2410 using Teuchos::rcp;
2412 const GlobalOrdinal indexBase = 0;
2413
2414 return rcp(new map_type(numElements, localNumElements, indexBase, comm));
2415}
2416
2417template <class LocalOrdinal, class GlobalOrdinal>
2418Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal>>
2420 const size_t localNumElements,
2421 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2422 typedef LocalOrdinal LO;
2423 typedef GlobalOrdinal GO;
2424 using NT = typename Tpetra::Map<LO, GO>::node_type;
2425
2426 return Tpetra::createContigMapWithNode<LO, GO, NT>(numElements, localNumElements, comm);
2427}
2428
2429template <class LocalOrdinal, class GlobalOrdinal>
2430Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal>>
2431Tpetra::createNonContigMap(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2432 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2433 typedef LocalOrdinal LO;
2434 typedef GlobalOrdinal GO;
2435 using NT = typename Tpetra::Map<LO, GO>::node_type;
2436
2437 return Tpetra::createNonContigMapWithNode<LO, GO, NT>(elementList, comm);
2438}
2439
2440template <class LocalOrdinal, class GlobalOrdinal, class Node>
2441Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2442Tpetra::createNonContigMapWithNode(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2443 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) {
2444 using Teuchos::rcp;
2446 using GST = Tpetra::global_size_t;
2447 const GST INV = Tpetra::Details::OrdinalTraits<GST>::invalid();
2448 // FIXME (mfh 22 Jul 2016) This is what I found here, but maybe this
2449 // shouldn't be zero, given that the index base is supposed to equal
2450 // the globally min global index?
2451 const GlobalOrdinal indexBase = 0;
2452
2453 return rcp(new map_type(INV, elementList, indexBase, comm));
2454}
2455
2456template <class LO, class GO, class NT>
2457Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>
2458Tpetra::createOneToOne(const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& M) {
2459 using Details::verbosePrintArray;
2460 using std::cerr;
2461 using std::endl;
2462 using Teuchos::Array;
2463 using Teuchos::ArrayView;
2464 using Teuchos::as;
2465 using Teuchos::rcp;
2466 using map_type = Tpetra::Map<LO, GO, NT>;
2467 using GST = global_size_t;
2468
2469 const bool verbose = Details::Behavior::verbose("Map");
2470 std::unique_ptr<std::string> prefix;
2471 if (verbose) {
2472 auto comm = M.is_null() ? Teuchos::null : M->getComm();
2473 prefix = Details::createPrefix(
2474 comm.getRawPtr(), "createOneToOne(Map)");
2475 std::ostringstream os;
2476 os << *prefix << "Start" << endl;
2477 cerr << os.str();
2478 }
2479 const size_t maxNumToPrint = verbose ? Details::Behavior::verbosePrintCountThreshold() : size_t(0);
2480 const GST GINV = Tpetra::Details::OrdinalTraits<GST>::invalid();
2481 const int myRank = M->getComm()->getRank();
2482
2483 // Bypasses for special cases where either M is known to be
2484 // one-to-one, or the one-to-one version of M is easy to compute.
2485 // This is why we take M as an RCP, not as a const reference -- so
2486 // that we can return M itself if it is 1-to-1.
2487 if (!M->isDistributed()) {
2488 // For a locally replicated Map, we assume that users want to push
2489 // all the GIDs to Process 0.
2490
2491 // mfh 05 Nov 2013: getGlobalNumElements() does indeed return what
2492 // you think it should return, in this special case of a locally
2493 // replicated contiguous Map.
2494 const GST numGlobalEntries = M->getGlobalNumElements();
2495 if (M->isContiguous()) {
2496 const size_t numLocalEntries =
2497 (myRank == 0) ? as<size_t>(numGlobalEntries) : size_t(0);
2498 if (verbose) {
2499 std::ostringstream os;
2500 os << *prefix << "Input is locally replicated & contiguous; "
2501 "numLocalEntries="
2502 << numLocalEntries << endl;
2503 cerr << os.str();
2504 }
2505 auto retMap =
2506 rcp(new map_type(numGlobalEntries, numLocalEntries,
2507 M->getIndexBase(), M->getComm()));
2508 if (verbose) {
2509 std::ostringstream os;
2510 os << *prefix << "Done" << endl;
2511 cerr << os.str();
2512 }
2513 return retMap;
2514 } else {
2515 if (verbose) {
2516 std::ostringstream os;
2517 os << *prefix << "Input is locally replicated & noncontiguous"
2518 << endl;
2519 cerr << os.str();
2520 }
2521 ArrayView<const GO> myGids =
2522 (myRank == 0) ? M->getLocalElementList() : Teuchos::null;
2523 auto retMap =
2524 rcp(new map_type(GINV, myGids(), M->getIndexBase(),
2525 M->getComm()));
2526 if (verbose) {
2527 std::ostringstream os;
2528 os << *prefix << "Done" << endl;
2529 cerr << os.str();
2530 }
2531 return retMap;
2532 }
2533 } else if (M->isContiguous()) {
2534 if (verbose) {
2535 std::ostringstream os;
2536 os << *prefix << "Input is distributed & contiguous" << endl;
2537 cerr << os.str();
2538 }
2539 // Contiguous, distributed Maps are one-to-one by construction.
2540 // (Locally replicated Maps can be contiguous.)
2541 return M;
2542 } else {
2543 if (verbose) {
2544 std::ostringstream os;
2545 os << *prefix << "Input is distributed & noncontiguous" << endl;
2546 cerr << os.str();
2547 }
2549 const size_t numMyElems = M->getLocalNumElements();
2550 ArrayView<const GO> myElems = M->getLocalElementList();
2551 Array<int> owner_procs_vec(numMyElems);
2552
2553 if (verbose) {
2554 std::ostringstream os;
2555 os << *prefix << "Call Directory::getDirectoryEntries: ";
2556 verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2557 os << endl;
2558 cerr << os.str();
2559 }
2560 directory.getDirectoryEntries(*M, myElems, owner_procs_vec());
2561 if (verbose) {
2562 std::ostringstream os;
2563 os << *prefix << "getDirectoryEntries result: ";
2564 verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2565 os << endl;
2566 cerr << os.str();
2567 }
2568
2569 Array<GO> myOwned_vec(numMyElems);
2570 size_t numMyOwnedElems = 0;
2571 for (size_t i = 0; i < numMyElems; ++i) {
2572 const GO GID = myElems[i];
2573 const int owner = owner_procs_vec[i];
2574
2575 if (myRank == owner) {
2576 myOwned_vec[numMyOwnedElems++] = GID;
2577 }
2578 }
2579 myOwned_vec.resize(numMyOwnedElems);
2580
2581 if (verbose) {
2582 std::ostringstream os;
2583 os << *prefix << "Create Map: ";
2584 verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2585 os << endl;
2586 cerr << os.str();
2587 }
2588 auto retMap = rcp(new map_type(GINV, myOwned_vec(),
2589 M->getIndexBase(), M->getComm()));
2590 if (verbose) {
2591 std::ostringstream os;
2592 os << *prefix << "Done" << endl;
2593 cerr << os.str();
2594 }
2595 return retMap;
2596 }
2597}
2598
2599template <class LocalOrdinal, class GlobalOrdinal, class Node>
2600Teuchos::RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>>
2603 using Details::Behavior;
2604 using Details::verbosePrintArray;
2605 using std::cerr;
2606 using std::endl;
2607 using Teuchos::Array;
2608 using Teuchos::ArrayView;
2609 using Teuchos::RCP;
2610 using Teuchos::rcp;
2611 using Teuchos::toString;
2612 using LO = LocalOrdinal;
2613 using GO = GlobalOrdinal;
2614 using map_type = Tpetra::Map<LO, GO, Node>;
2615
2616 const bool verbose = Behavior::verbose("Map");
2617 std::unique_ptr<std::string> prefix;
2618 if (verbose) {
2619 auto comm = M.is_null() ? Teuchos::null : M->getComm();
2620 prefix = Details::createPrefix(
2621 comm.getRawPtr(), "createOneToOne(Map,TieBreak)");
2622 std::ostringstream os;
2623 os << *prefix << "Start" << endl;
2624 cerr << os.str();
2625 }
2626 const size_t maxNumToPrint = verbose ? Behavior::verbosePrintCountThreshold() : size_t(0);
2627
2628 // FIXME (mfh 20 Feb 2013) We should have a bypass for contiguous
2629 // Maps (which are 1-to-1 by construction).
2630
2632 if (verbose) {
2633 std::ostringstream os;
2634 os << *prefix << "Initialize Directory" << endl;
2635 cerr << os.str();
2636 }
2637 directory.initialize(*M, tie_break);
2638 if (verbose) {
2639 std::ostringstream os;
2640 os << *prefix << "Done initializing Directory" << endl;
2641 cerr << os.str();
2642 }
2643 size_t numMyElems = M->getLocalNumElements();
2644 ArrayView<const GO> myElems = M->getLocalElementList();
2645 Array<int> owner_procs_vec(numMyElems);
2646 if (verbose) {
2647 std::ostringstream os;
2648 os << *prefix << "Call Directory::getDirectoryEntries: ";
2649 verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2650 os << endl;
2651 cerr << os.str();
2652 }
2653 directory.getDirectoryEntries(*M, myElems, owner_procs_vec());
2654 if (verbose) {
2655 std::ostringstream os;
2656 os << *prefix << "getDirectoryEntries result: ";
2657 verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2658 os << endl;
2659 cerr << os.str();
2660 }
2661
2662 const int myRank = M->getComm()->getRank();
2663 Array<GO> myOwned_vec(numMyElems);
2664 size_t numMyOwnedElems = 0;
2665 for (size_t i = 0; i < numMyElems; ++i) {
2666 const GO GID = myElems[i];
2667 const int owner = owner_procs_vec[i];
2668 if (myRank == owner) {
2669 myOwned_vec[numMyOwnedElems++] = GID;
2670 }
2671 }
2672 myOwned_vec.resize(numMyOwnedElems);
2673
2674 // FIXME (mfh 08 May 2014) The above Directory should be perfectly
2675 // valid for the new Map. Why can't we reuse it?
2676 const global_size_t GINV =
2677 Tpetra::Details::OrdinalTraits<global_size_t>::invalid();
2678 if (verbose) {
2679 std::ostringstream os;
2680 os << *prefix << "Create Map: ";
2681 verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2682 os << endl;
2683 cerr << os.str();
2684 }
2685 RCP<const map_type> retMap(new map_type(GINV, myOwned_vec(), M->getIndexBase(),
2686 M->getComm()));
2687 if (verbose) {
2688 std::ostringstream os;
2689 os << *prefix << "Done" << endl;
2690 cerr << os.str();
2691 }
2692 return retMap;
2693}
2694
2695//
2696// Explicit instantiation macro
2697//
2698// Must be expanded from within the Tpetra namespace!
2699//
2700
2702
2703#define TPETRA_MAP_INSTANT(LO, GO, NODE) \
2704 \
2705 template class Map<LO, GO, NODE>; \
2706 \
2707 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2708 createLocalMapWithNode<LO, GO, NODE>(const size_t numElements, \
2709 const Teuchos::RCP<const Teuchos::Comm<int>>& comm); \
2710 \
2711 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2712 createContigMapWithNode<LO, GO, NODE>(const global_size_t numElements, \
2713 const size_t localNumElements, \
2714 const Teuchos::RCP<const Teuchos::Comm<int>>& comm); \
2715 \
2716 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2717 createNonContigMapWithNode(const Teuchos::ArrayView<const GO>& elementList, \
2718 const Teuchos::RCP<const Teuchos::Comm<int>>& comm); \
2719 \
2720 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2721 createUniformContigMapWithNode<LO, GO, NODE>(const global_size_t numElements, \
2722 const Teuchos::RCP<const Teuchos::Comm<int>>& comm); \
2723 \
2724 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2725 createOneToOne(const Teuchos::RCP<const Map<LO, GO, NODE>>& M); \
2726 \
2727 template Teuchos::RCP<const Map<LO, GO, NODE>> \
2728 createOneToOne(const Teuchos::RCP<const Map<LO, GO, NODE>>& M, \
2729 const Tpetra::Details::TieBreak<LO, GO>& tie_break);
2730
2732#define TPETRA_MAP_INSTANT_DEFAULTNODE(LO, GO) \
2733 template Teuchos::RCP<const Map<LO, GO>> \
2734 createLocalMap<LO, GO>(const size_t, const Teuchos::RCP<const Teuchos::Comm<int>>&); \
2735 \
2736 template Teuchos::RCP<const Map<LO, GO>> \
2737 createContigMap<LO, GO>(global_size_t, size_t, \
2738 const Teuchos::RCP<const Teuchos::Comm<int>>&); \
2739 \
2740 template Teuchos::RCP<const Map<LO, GO>> \
2741 createNonContigMap(const Teuchos::ArrayView<const GO>&, \
2742 const Teuchos::RCP<const Teuchos::Comm<int>>&); \
2743 \
2744 template Teuchos::RCP<const Map<LO, GO>> \
2745 createUniformContigMap<LO, GO>(const global_size_t, \
2746 const Teuchos::RCP<const Teuchos::Comm<int>>&);
2747
2748#endif // TPETRA_MAP_DEF_HPP
Functions for initializing and finalizing Tpetra.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration of Tpetra::Details::extractMpiCommFromTeuchos.
Declaration of a function that prints strings from each process.
Declaration of Tpetra::Details::initializeKokkos.
Declaration of Tpetra::Details::printOnce.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
Description of Tpetra's behavior.
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
"Local" part of Map suitable for Kokkos kernels.
Interface for breaking ties in ownership.
Implement mapping from global ID to process ID and local ID.
A parallel distribution of indices over processes.
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
bool isOneToOne() const
Whether the Map is one to one.
std::string description() const
Implementation of Teuchos::Describable.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
Node node_type
Legacy typedef that will go away at some point.
Map()
Default constructor (that does nothing).
GlobalOrdinal global_ordinal_type
The type of global indices.
LookupStatus getRemoteIndexList(const Teuchos::ArrayView< const global_ordinal_type > &GIDList, const Teuchos::ArrayView< int > &nodeIDList, const Teuchos::ArrayView< local_ordinal_type > &LIDList) const
Return the process ranks and corresponding local indices for the given global indices.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
bool isUniform() const
Whether the range of global indices is uniform.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
typename device_type::execution_space execution_space
The Kokkos execution space.
LO local_ordinal_type
The type of local indices.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > removeEmptyProcesses() const
Advanced methods.
void lazyPushToHost() const
Push the device data to host, if needed.
bool isCompatible(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is compatible with this Map.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
bool isLocallyFitted(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is locally fitted to this Map.
virtual ~Map()
Destructor (virtual for memory safety of derived classes).
global_indices_array_device_type getMyGlobalIndicesDevice() const
Return a view of the global indices owned by this process on the Map's device.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > replaceCommWithSubset(const Teuchos::RCP< const Teuchos::Comm< int > > &newComm) const
Replace this Map's communicator with a subset communicator.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
bool isSameAs(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is identical to this Map.
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
global_indices_array_type getMyGlobalIndices() const
Return a view of the global indices owned by this process.
typename Node::device_type device_type
This class' Kokkos::Device specialization.
Implementation details of Tpetra.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void printOnce(std::ostream &out, const std::string &s, const Teuchos::Comm< int > *comm)
Print on one process of the given communicator, or at least try to do so (if MPI is not initialized).
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
bool congruent(const Teuchos::Comm< int > &comm1, const Teuchos::Comm< int > &comm2)
Whether the two communicators are congruent.
void initializeKokkos()
Initialize Kokkos, using command-line arguments (if any) given to Teuchos::GlobalMPISession.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createLocalMap(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with the default Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createUniformContigMap(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with the default Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createNonContigMap(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a non-contiguous Map using the default Kokkos::Device type.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createUniformContigMapWithNode(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with a user-specified Kokkos Node.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createContigMap(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map using the defaul...
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createLocalMapWithNode(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with a specified Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createNonContigMapWithNode(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a noncontiguous Map with a user-specified, possibly nondefault Kokkos Node ...
LocalGlobal
Enum for local versus global allocation of Map entries.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a (potentially) nonuniformly distributed, contiguous Map for a user-specifi...