Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_DirectoryImpl_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DIRECTORYIMPL_DEF_HPP
11#define TPETRA_DIRECTORYIMPL_DEF_HPP
12
15
16#include "Tpetra_Distributor.hpp"
17#include "Tpetra_Map.hpp"
18#include "Tpetra_TieBreak.hpp"
19#include "Tpetra_Util.hpp"
20#include "Tpetra_Details_FixedHashTable.hpp"
21#include "Teuchos_Comm.hpp"
22#include <memory>
23#include <sstream>
24
25// FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
26#ifdef HAVE_TPETRACORE_MPI
27#include <mpi.h>
28#endif // HAVE_TPETRACORE_MPI
29// FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
30
31namespace Tpetra {
32namespace Details {
33template <class LO, class GO, class NT>
37 const Teuchos::ArrayView<const GO>& globalIDs,
38 const Teuchos::ArrayView<int>& nodeIDs,
39 const Teuchos::ArrayView<LO>& localIDs,
40 const bool computeLIDs) const {
41 // Ensure that globalIDs, nodeIDs, and localIDs (if applicable)
42 // all have the same size, before modifying any output arguments.
44 std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
45 "Output arrays do not have the right sizes. nodeIDs.size() = "
46 << nodeIDs.size() << " != globalIDs.size() = " << globalIDs.size() << ".");
48 computeLIDs && localIDs.size() != globalIDs.size(),
49 std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
50 "Output array do not have the right sizes. localIDs.size() = "
51 << localIDs.size() << " != globalIDs.size() = " << globalIDs.size() << ".");
52
53 // Initially, fill nodeIDs and localIDs (if applicable) with
54 // invalid values. The "invalid" process ID is -1 (this means
55 // the same thing as MPI_ANY_SOURCE to Teuchos, so it's an
56 // "invalid" process ID); the invalid local ID comes from
57 // OrdinalTraits.
58 std::fill(nodeIDs.begin(), nodeIDs.end(), -1);
59 if (computeLIDs) {
60 std::fill(localIDs.begin(), localIDs.end(),
61 Teuchos::OrdinalTraits<LO>::invalid());
62 }
63 // Actually do the work.
64 return this->getEntriesImpl(map, globalIDs, nodeIDs, localIDs, computeLIDs);
65}
66
67template <class LO, class GO, class NT>
70 : numProcs_(map.getComm()->getSize()) {}
71
72template <class LO, class GO, class NT>
74 isOneToOne(const Teuchos::Comm<int>& /* comm */) const {
75 // A locally replicated Map is one-to-one only if there is no
76 // replication, that is, only if the Map's communicator only has
77 // one process.
78 return (numProcs_ == 1);
79}
80
81template <class LO, class GO, class NT>
82std::string
84 std::ostringstream os;
85 os << "ReplicatedDirectory"
86 << "<" << Teuchos::TypeNameTraits<LO>::name()
87 << ", " << Teuchos::TypeNameTraits<GO>::name()
88 << ", " << Teuchos::TypeNameTraits<NT>::name() << ">";
89 return os.str();
90}
91
92template <class LO, class GO, class NT>
94 ContiguousUniformDirectory(const map_type& map) {
95 TEUCHOS_TEST_FOR_EXCEPTION(!map.isContiguous(), std::invalid_argument,
96 Teuchos::typeName(*this) << " constructor: Map is not contiguous.");
97 TEUCHOS_TEST_FOR_EXCEPTION(!map.isUniform(), std::invalid_argument,
98 Teuchos::typeName(*this) << " constructor: Map is not uniform.");
99}
100
101template <class LO, class GO, class NT>
102std::string
104 std::ostringstream os;
105 os << "ContiguousUniformDirectory"
106 << "<" << Teuchos::TypeNameTraits<LO>::name()
107 << ", " << Teuchos::TypeNameTraits<GO>::name()
108 << ", " << Teuchos::TypeNameTraits<NT>::name() << ">";
109 return os.str();
110}
111
112template <class LO, class GO, class NT>
116 const Teuchos::ArrayView<const GO>& globalIDs,
117 const Teuchos::ArrayView<int>& nodeIDs,
118 const Teuchos::ArrayView<LO>& localIDs,
119 const bool computeLIDs) const {
120 using Teuchos::Comm;
121 using Teuchos::RCP;
122 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
123 const LO invalidLid = Teuchos::OrdinalTraits<LO>::invalid();
125
126 RCP<const Comm<int> > comm = map.getComm();
127 const GO g_min = map.getMinAllGlobalIndex();
128
129 // Let N_G be the global number of elements in the Map,
130 // and P be the number of processes in its communicator.
131 // Then, N_G = P * N_L + R = R*(N_L + 1) + (P - R)*N_L.
132 //
133 // The first R processes own N_L+1 elements.
134 // The remaining P-R processes own N_L elements.
135 //
136 // Let g be the current GID, g_min be the global minimum GID,
137 // and g_0 = g - g_min. If g is a valid GID in this Map, then
138 // g_0 is in [0, N_G - 1].
139 //
140 // If g is a valid GID in this Map and g_0 < R*(N_L + 1), then
141 // the rank of the process that owns g is floor(g_0 / (N_L +
142 // 1)), and its corresponding local index on that process is g_0
143 // mod (N_L + 1).
144 //
145 // Let g_R = g_0 - R*(N_L + 1). If g is a valid GID in this Map
146 // and g_0 >= R*(N_L + 1), then the rank of the process that
147 // owns g is then R + floor(g_R / N_L), and its corresponding
148 // local index on that process is g_R mod N_L.
149
150 const size_type N_G =
151 static_cast<size_type>(map.getGlobalNumElements());
152 const size_type P = static_cast<size_type>(comm->getSize());
153 const size_type N_L = N_G / P;
154 const size_type R = N_G - N_L * P; // N_G mod P
155 const size_type N_R = R * (N_L + static_cast<size_type>(1));
156
157#ifdef HAVE_TPETRA_DEBUG
159 N_G != P * N_L + R, std::logic_error,
160 "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
161 "N_G = "
162 << N_G << " != P*N_L + R = " << P << "*" << N_L << " + " << R
163 << " = " << P * N_L + R << ". "
164 "Please report this bug to the Tpetra developers.");
165#endif // HAVE_TPETRA_DEBUG
166
167 const size_type numGids = globalIDs.size(); // for const loop bound
168 // Avoid signed/unsigned comparisons below, in case GO is
169 // unsigned. (Integer literals are generally signed.)
170 const GO ONE = static_cast<GO>(1);
171
172 if (computeLIDs) {
173 for (size_type k = 0; k < numGids; ++k) {
174 const GO g_0 = globalIDs[k] - g_min;
175
176 // The first test is a little strange just in case GO is
177 // unsigned. Compilers raise a warning on tests like "x <
178 // 0" if x is unsigned, but don't usually raise a warning if
179 // the expression is a bit more complicated than that.
180 if (g_0 + ONE < ONE || g_0 >= static_cast<GO>(N_G)) {
181 nodeIDs[k] = -1;
184 } else if (g_0 < static_cast<GO>(N_R)) {
185 // The GID comes from the initial sequence of R processes.
186 nodeIDs[k] = static_cast<int>(g_0 / static_cast<GO>(N_L + 1));
187 localIDs[k] = static_cast<LO>(g_0 % static_cast<GO>(N_L + 1));
188 } else if (g_0 >= static_cast<GO>(N_R)) {
189 // The GID comes from the remaining P-R processes.
190 const GO g_R = g_0 - static_cast<GO>(N_R);
191 nodeIDs[k] = static_cast<int>(R + g_R / N_L);
192 localIDs[k] = static_cast<int>(g_R % N_L);
193 }
194#ifdef HAVE_TPETRA_DEBUG
195 else {
196 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
197 "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
198 "should never get here. "
199 "Please report this bug to the Tpetra developers.");
200 }
201#endif // HAVE_TPETRA_DEBUG
202 }
203 } else { // don't compute local indices
204 for (size_type k = 0; k < numGids; ++k) {
205 const GO g_0 = globalIDs[k] - g_min;
206 // The first test is a little strange just in case GO is
207 // unsigned. Compilers raise a warning on tests like "x <
208 // 0" if x is unsigned, but don't usually raise a warning if
209 // the expression is a bit more complicated than that.
210 if (g_0 + ONE < ONE || g_0 >= static_cast<GO>(N_G)) {
211 nodeIDs[k] = -1;
213 } else if (g_0 < static_cast<GO>(N_R)) {
214 // The GID comes from the initial sequence of R processes.
215 nodeIDs[k] = static_cast<int>(g_0 / static_cast<GO>(N_L + 1));
216 } else if (g_0 >= static_cast<GO>(N_R)) {
217 // The GID comes from the remaining P-R processes.
218 const GO g_R = g_0 - static_cast<GO>(N_R);
219 nodeIDs[k] = static_cast<int>(R + g_R / N_L);
220 }
221#ifdef HAVE_TPETRA_DEBUG
222 else {
223 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
224 "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
225 "should never get here. "
226 "Please report this bug to the Tpetra developers.");
227 }
228#endif // HAVE_TPETRA_DEBUG
229 }
230 }
231 return res;
232}
233
234template <class LO, class GO, class NT>
236 DistributedContiguousDirectory(const map_type& map) {
237 using Teuchos::arcp;
238 using Teuchos::gatherAll;
239 using Teuchos::RCP;
240
241 RCP<const Teuchos::Comm<int> > comm = map.getComm();
242
243 TEUCHOS_TEST_FOR_EXCEPTION(!map.isDistributed(), std::invalid_argument,
244 Teuchos::typeName(*this) << " constructor: Map is not distributed.");
245 TEUCHOS_TEST_FOR_EXCEPTION(!map.isContiguous(), std::invalid_argument,
246 Teuchos::typeName(*this) << " constructor: Map is not contiguous.");
247
248 const int numProcs = comm->getSize();
249
250 // Make room for the min global ID on each process, plus one
251 // entry at the end for the "max cap."
252 allMinGIDs_ = arcp<GO>(numProcs + 1);
253 // Get my process' min global ID.
254 GO minMyGID = map.getMinGlobalIndex();
255 // Gather all of the min global IDs into the first numProcs
256 // entries of allMinGIDs_.
257
258 // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
259 //
260 // The purpose of this giant hack is that gatherAll appears to
261 // interpret the "receive count" argument differently than
262 // MPI_Allgather does. Matt Bettencourt reports Valgrind issues
263 // (memcpy with overlapping data) with MpiComm<int>::gatherAll,
264 // which could relate either to this, or to OpenMPI.
265#ifdef HAVE_TPETRACORE_MPI
267 bool useRawMpi = true;
268 if (typeid(GO) == typeid(int)) {
270 } else if (typeid(GO) == typeid(long)) {
272 } else {
273 useRawMpi = false;
274 }
275 if (useRawMpi) {
276 using Teuchos::MpiComm;
277 using Teuchos::rcp_dynamic_cast;
278 RCP<const MpiComm<int> > mpiComm =
279 rcp_dynamic_cast<const MpiComm<int> >(comm);
280 // It could be a SerialComm instead, even in an MPI build, so
281 // be sure to check.
282 if (!comm.is_null()) {
283 MPI_Comm rawMpiComm = *(mpiComm->getRawMpiComm());
284 const int err =
285 MPI_Allgather(&minMyGID, 1, rawMpiType,
286 allMinGIDs_.getRawPtr(), 1, rawMpiType,
287 rawMpiComm);
288 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
289 "Tpetra::DistributedContiguousDirectory: MPI_Allgather failed");
290 } else {
291 gatherAll<int, GO>(*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr());
292 }
293 } else {
294 gatherAll<int, GO>(*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr());
295 }
296#else // NOT HAVE_TPETRACORE_MPI
297 gatherAll<int, GO>(*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr());
298#endif // HAVE_TPETRACORE_MPI
299 // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
300
301 // gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
302
303 // Put the max cap at the end. Adding one lets us write loops
304 // over the global IDs with the usual strict less-than bound.
305 allMinGIDs_[numProcs] = map.getMaxAllGlobalIndex() + Teuchos::OrdinalTraits<GO>::one();
306}
307
308template <class LO, class GO, class NT>
309std::string
311 std::ostringstream os;
312 os << "DistributedContiguousDirectory"
313 << "<" << Teuchos::TypeNameTraits<LO>::name()
314 << ", " << Teuchos::TypeNameTraits<GO>::name()
315 << ", " << Teuchos::TypeNameTraits<NT>::name() << ">";
316 return os.str();
317}
318
319template <class LO, class GO, class NT>
323 const Teuchos::ArrayView<const GO>& globalIDs,
324 const Teuchos::ArrayView<int>& nodeIDs,
325 const Teuchos::ArrayView<LO>& localIDs,
326 const bool computeLIDs) const {
327 using Teuchos::Array;
328 using Teuchos::ArrayRCP;
329 using Teuchos::ArrayView;
330 using Teuchos::as;
331 using Teuchos::Comm;
332 using Teuchos::RCP;
333
335 RCP<const Teuchos::Comm<int> > comm = map.getComm();
336 const int myRank = comm->getRank();
337
338 // Map is on one process or is locally replicated.
339 typename ArrayView<int>::iterator procIter = nodeIDs.begin();
340 typename ArrayView<LO>::iterator lidIter = localIDs.begin();
342 for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
343 if (map.isNodeGlobalElement(*gidIter)) {
344 *procIter++ = myRank;
345 if (computeLIDs) {
346 *lidIter++ = map.getLocalElement(*gidIter);
347 }
348 } else {
349 // Advance the pointers, leaving these values set to invalid
350 procIter++;
351 if (computeLIDs) {
352 lidIter++;
353 }
355 }
356 }
357 return res;
358}
359
360template <class LO, class GO, class NT>
364 const Teuchos::ArrayView<const GO>& globalIDs,
365 const Teuchos::ArrayView<int>& nodeIDs,
366 const Teuchos::ArrayView<LO>& localIDs,
367 const bool computeLIDs) const {
368 using Teuchos::Array;
369 using Teuchos::ArrayRCP;
370 using Teuchos::ArrayView;
371 using Teuchos::as;
372 using Teuchos::Comm;
373 using Teuchos::RCP;
374
375 RCP<const Teuchos::Comm<int> > comm = map.getComm();
376 const int numProcs = comm->getSize();
377 const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
378 const GO noGIDsOnProc = std::numeric_limits<GO>::max();
380
381 // Find the first initialized GID for search below
385 if (allMinGIDs_[firstProcWithGIDs] != noGIDsOnProc) break;
386 }
387
388 // If Map is empty, return invalid values for all requested lookups
389 // This case should not happen, as an empty Map is not considered
390 // Distributed.
392 // No entries in Map
393 res = (globalIDs.size() > 0) ? IDNotPresent : AllIDsPresent;
394 std::fill(nodeIDs.begin(), nodeIDs.end(), -1);
395 if (computeLIDs)
396 std::fill(localIDs.begin(), localIDs.end(), LINVALID);
397 return res;
398 }
399
400 const GO one = as<GO>(1);
401 const GO nOverP = as<GO>(map.getGlobalNumElements() / as<global_size_t>(numProcs - firstProcWithGIDs));
402
403 // Map is distributed but contiguous.
404 typename ArrayView<int>::iterator procIter = nodeIDs.begin();
405 typename ArrayView<LO>::iterator lidIter = localIDs.begin();
407 for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
408 LO LID = LINVALID; // Assume not found until proven otherwise
409 int image = -1;
410 GO GID = *gidIter;
411 // Guess uniform distribution (TODO: maybe replace by a binary search)
412 // We go through all this trouble to avoid overflow and
413 // signed / unsigned casting mistakes (that were made in
414 // previous versions of this code).
415 int curRank;
416 const GO firstGuess = firstProcWithGIDs + GID / std::max(nOverP, one);
417 curRank = as<int>(std::min(firstGuess, as<GO>(numProcs - 1)));
418
419 // This while loop will stop because
420 // allMinGIDs_[np] == global num elements
421 while (allMinGIDs_[curRank] == noGIDsOnProc) curRank++;
422
423 bool badGID = false;
424 while (curRank >= firstProcWithGIDs && GID < allMinGIDs_[curRank]) {
425 curRank--;
426 while (curRank >= firstProcWithGIDs &&
427 allMinGIDs_[curRank] == noGIDsOnProc) curRank--;
428 }
430 // GID is lower than all GIDs in map
431 badGID = true;
432 } else if (curRank == numProcs) {
433 // GID is higher than all GIDs in map
434 badGID = true;
435 } else {
436 // we have the lower bound; now limit from above
437 int above = curRank + 1;
438 while (allMinGIDs_[above] == noGIDsOnProc) above++;
439
440 while (GID >= allMinGIDs_[above]) {
441 curRank = above;
442 if (curRank == numProcs) {
443 // GID is higher than all GIDs in map
444 badGID = true;
445 break;
446 }
447 above++;
448 while (allMinGIDs_[above] == noGIDsOnProc) above++;
449 }
450 }
451
452 if (!badGID) {
453 image = curRank;
454 LID = as<LO>(GID - allMinGIDs_[image]);
455 } else {
457 }
458 *procIter++ = image;
459 if (computeLIDs) {
460 *lidIter++ = LID;
461 }
462 }
463 return res;
464}
465
466template <class LO, class GO, class NT>
469 : oneToOneResult_(ONE_TO_ONE_NOT_CALLED_YET)
470 , // to be revised below
471 locallyOneToOne_(true)
472 , // to be revised below
473 useHashTables_(false) // to be revised below
474{
475 initialize(map, Teuchos::null);
476}
477
478template <class LO, class GO, class NT>
479DistributedNoncontiguousDirectory<LO, GO, NT>::
480 DistributedNoncontiguousDirectory(const map_type& map,
481 const tie_break_type& tie_break)
482 : oneToOneResult_(ONE_TO_ONE_NOT_CALLED_YET)
483 , // to be revised below
484 locallyOneToOne_(true)
485 , // to be revised below
486 useHashTables_(false) // to be revised below
487{
488 initialize(map, Teuchos::ptrFromRef(tie_break));
489}
490
491template <class LO, class GO, class NT>
492void DistributedNoncontiguousDirectory<LO, GO, NT>::
493 initialize(const map_type& map,
494 Teuchos::Ptr<const tie_break_type> tie_break) {
495 using std::cerr;
496 using std::endl;
497 using Teuchos::arcp;
498 using Teuchos::Array;
499 using Teuchos::ArrayRCP;
500 using Teuchos::ArrayView;
501 using Teuchos::as;
502 using Teuchos::RCP;
503 using Teuchos::rcp;
504 using Teuchos::typeName;
505 using Teuchos::TypeNameTraits;
506 typedef Array<int>::size_type size_type;
507
508 // This class' implementation of getEntriesImpl() currently
509 // encodes the following assumptions:
510 //
511 // 1. global_size_t >= GO
512 // 2. global_size_t >= int
513 // 3. global_size_t >= LO
514 //
515 // We check these assumptions here.
516 TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(GO),
517 std::logic_error, typeName(*this) << ": sizeof(Tpetra::"
518 "global_size_t) = "
519 << sizeof(global_size_t) << " < sizeof(Global"
520 "Ordinal = "
521 << TypeNameTraits<LO>::name() << ") = " << sizeof(GO) << ".");
522 TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(int),
523 std::logic_error, typeName(*this) << ": sizeof(Tpetra::"
524 "global_size_t) = "
525 << sizeof(global_size_t) << " < sizeof(int) = " << sizeof(int) << ".");
526 TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(LO),
527 std::logic_error, typeName(*this) << ": sizeof(Tpetra::"
528 "global_size_t) = "
529 << sizeof(global_size_t) << " < sizeof(Local"
530 "Ordinal = "
531 << TypeNameTraits<LO>::name() << ") = " << sizeof(LO) << ".");
532
533 RCP<const Teuchos::Comm<int> > comm = map.getComm();
534 const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
535 const GO minAllGID = map.getMinAllGlobalIndex();
536 const GO maxAllGID = map.getMaxAllGlobalIndex();
537
538 // The "Directory Map" (see below) will have a range of elements
539 // from the minimum to the maximum GID of the user Map, and a
540 // minimum GID of minAllGID from the user Map. It doesn't
541 // actually have to store all those entries, though do beware of
542 // calling getLocalElementList on it (see Bug 5822).
543 const global_size_t numGlobalEntries = maxAllGID - minAllGID + 1;
544
545 // We can't afford to replicate the whole directory on each
546 // process, so create the "Directory Map", a uniform contiguous
547 // Map that describes how we will distribute the directory over
548 // processes.
549 //
550 // FIXME (mfh 08 May 2012) Here we're setting minAllGID to be
551 // the index base. The index base should be separate from the
552 // minimum GID.
553 directoryMap_ = rcp(new map_type(numGlobalEntries, minAllGID, comm,
554 GloballyDistributed));
555 // The number of Directory elements that my process owns.
556 const size_t dir_numMyEntries = directoryMap_->getLocalNumElements();
557
558 // Fix for Bug 5822: If the input Map is "sparse," that is if
559 // the difference between the global min and global max GID is
560 // much larger than the global number of elements in the input
561 // Map, then it's possible that the Directory Map might have
562 // many more entries than the input Map on this process. This
563 // can cause memory scalability issues. In that case, we switch
564 // from the array-based implementation of Directory storage to
565 // the hash table - based implementation. We don't use hash
566 // tables all the time, because they are slower in the common
567 // case of a nonsparse Map.
568 //
569 // NOTE: This is a per-process decision. Some processes may use
570 // array-based storage, whereas others may use hash table -
571 // based storage.
572
573 // A hash table takes a constant factor more space, more or
574 // less, than an array. Thus, it's not worthwhile, even in
575 // terms of memory usage, always to use a hash table.
576 // Furthermore, array lookups are faster than hash table
577 // lookups, so it may be worthwhile to use an array even if it
578 // takes more space. The "sparsity threshold" governs when to
579 // switch to a hash table - based implementation.
580 const size_t inverseSparsityThreshold = 10;
581 useHashTables_ =
582 (dir_numMyEntries >= inverseSparsityThreshold * map.getLocalNumElements());
583
584 // Get list of process IDs that own the directory entries for the
585 // Map GIDs. These will be the targets of the sends that the
586 // Distributor will do.
587 const int myRank = comm->getRank();
588 const size_t numMyEntries = map.getLocalNumElements();
589 Array<int> sendImageIDs(numMyEntries);
590 ArrayView<const GO> myGlobalEntries = map.getLocalElementList();
591 // An ID not present in this lookup indicates that it lies outside
592 // of the range [minAllGID,maxAllGID] (from map_). this means
593 // something is wrong with map_, our fault.
594 const LookupStatus lookupStatus =
595 directoryMap_->getRemoteIndexList(myGlobalEntries, sendImageIDs);
596 TEUCHOS_TEST_FOR_EXCEPTION(
597 lookupStatus == IDNotPresent, std::logic_error, Teuchos::typeName(*this) << " constructor: the Directory Map could not find out where one or "
598 "more of my Map's indices should go. The input to getRemoteIndexList "
599 "is "
600 << Teuchos::toString(myGlobalEntries) << ", and the output is " << Teuchos::toString(sendImageIDs()) << ". The input Map itself has "
601 "the following entries on the calling process "
602 << map.getComm()->getRank() << ": " << Teuchos::toString(map.getLocalElementList()) << ", and has " << map.getGlobalNumElements() << " total global indices in [" << map.getMinAllGlobalIndex() << "," << map.getMaxAllGlobalIndex() << "]. The Directory Map has " << directoryMap_->getGlobalNumElements() << " total global indices in "
603 "["
604 << directoryMap_->getMinAllGlobalIndex() << "," << directoryMap_->getMaxAllGlobalIndex() << "], and the calling process "
605 "has GIDs ["
606 << directoryMap_->getMinGlobalIndex() << "," << directoryMap_->getMaxGlobalIndex() << "]. "
607 "This probably means there is a bug in Map or Directory. "
608 "Please report this bug to the Tpetra developers.");
609
610 // Initialize the distributor using the list of process IDs to
611 // which to send. We'll use the distributor to send out triples
612 // of (GID, process ID, LID). We're sending the entries to the
613 // processes that the Directory Map says should own them, which is
614 // why we called directoryMap_->getRemoteIndexList() above.
615 Distributor distor(comm);
616 const size_t numReceives = distor.createFromSends(sendImageIDs);
617
618 // NOTE (mfh 21 Mar 2012) The following code assumes that
619 // sizeof(GO) >= sizeof(int) and sizeof(GO) >= sizeof(LO).
620 //
621 // Create and fill buffer of (GID, PID, LID) triples to send
622 // out. We pack the (GID, PID, LID) triples into a single Array
623 // of GO, casting the PID from int to GO and the LID from LO to
624 // GO as we do so.
625 //
626 // FIXME (mfh 23 Mar 2014) This assumes that sizeof(LO) <=
627 // sizeof(GO) and sizeof(int) <= sizeof(GO). The former is
628 // required, and the latter is generally the case, but we should
629 // still check for this.
630 const int packetSize = 3; // We're sending triples, so packet size is 3.
631 Kokkos::View<GO*, Kokkos::HostSpace> exportEntries("exportEntries", packetSize * numMyEntries);
632 {
633 size_t exportIndex = 0;
634 for (size_t i = 0; i < numMyEntries; ++i) {
635 exportEntries[exportIndex++] = myGlobalEntries[i];
636 exportEntries[exportIndex++] = as<GO>(myRank);
637 exportEntries[exportIndex++] = as<GO>(i);
638 }
639 }
640 // Buffer of data to receive. The Distributor figured out for
641 // us how many packets we're receiving, when we called its
642 // createFromSends() method to set up the distribution plan.
643 Kokkos::View<GO*, Kokkos::HostSpace> importElements("importElements", packetSize * distor.getTotalReceiveLength());
644
645 // Distribute the triples of (GID, process ID, LID).
646 distor.doPostsAndWaits(exportEntries, packetSize, importElements);
647
648 // Unpack the redistributed data. Both implementations of
649 // Directory storage map from an LID in the Directory Map (which
650 // is the LID of the GID to store) to either a PID or an LID in
651 // the input Map. Each "packet" (contiguous chunk of
652 // importElements) contains a triple: (GID, PID, LID).
653 if (useHashTables_) {
654 // Create the hash tables. We know exactly how many elements
655 // to expect in each hash table. FixedHashTable's constructor
656 // currently requires all the keys and values at once, so we
657 // have to extract them in temporary arrays. It may be
658 // possible to rewrite FixedHashTable to use a "start fill" /
659 // "end fill" approach that avoids the temporary arrays, but
660 // we won't try that for now.
661
662 // The constructors of Array and ArrayRCP that take a number
663 // of elements all initialize the arrays. Instead, allocate
664 // raw arrays, then hand them off to ArrayRCP, to avoid the
665 // initial unnecessary initialization without losing the
666 // benefit of exception safety (and bounds checking, in a
667 // debug build).
668 LO* tableKeysRaw = NULL;
669 LO* tableLidsRaw = NULL;
670 int* tablePidsRaw = NULL;
671 try {
672 tableKeysRaw = new LO[numReceives];
673 tableLidsRaw = new LO[numReceives];
674 tablePidsRaw = new int[numReceives];
675 } catch (...) {
676 if (tableKeysRaw != NULL) {
677 delete[] tableKeysRaw;
678 }
679 if (tableLidsRaw != NULL) {
680 delete[] tableLidsRaw;
681 }
682 if (tablePidsRaw != NULL) {
683 delete[] tablePidsRaw;
684 }
685 throw;
686 }
687 ArrayRCP<LO> tableKeys(tableKeysRaw, 0, numReceives, true);
688 ArrayRCP<LO> tableLids(tableLidsRaw, 0, numReceives, true);
689 ArrayRCP<int> tablePids(tablePidsRaw, 0, numReceives, true);
690
691 if (tie_break.is_null()) {
692 // Fill the temporary arrays of keys and values.
693 size_type importIndex = 0;
694 for (size_type i = 0; i < static_cast<size_type>(numReceives); ++i) {
695 const GO curGID = importElements[importIndex++];
696 const LO curLID = directoryMap_->getLocalElement(curGID);
697 TEUCHOS_TEST_FOR_EXCEPTION(
698 curLID == LINVALID, std::logic_error,
699 Teuchos::typeName(*this) << " constructor: Incoming global index "
700 << curGID << " does not have a corresponding local index in the "
701 "Directory Map. Please report this bug to the Tpetra developers.");
702 tableKeys[i] = curLID;
703 tablePids[i] = importElements[importIndex++];
704 tableLids[i] = importElements[importIndex++];
705 }
706 // Set up the hash tables. The hash tables' constructor
707 // detects whether there are duplicates, so that we can set
708 // locallyOneToOne_.
709 lidToPidTable_ =
710 rcp(new lidToPidTable_type(tableKeys(), tablePids()));
711 locallyOneToOne_ = !(lidToPidTable_->hasDuplicateKeys());
712 lidToLidTable_ =
713 rcp(new lidToLidTable_type(tableKeys(), tableLids()));
714 } else { // tie_break is NOT null
715
716 // For each directory Map LID received, collect all the
717 // corresponding (PID,LID) pairs. If the input Map is not
718 // one-to-one, corresponding directory Map LIDs will have
719 // more than one pair. In that case, we will use the
720 // TieBreak object to pick exactly one pair.
721 typedef std::map<LO, std::vector<std::pair<int, LO> > > pair_table_type;
722 pair_table_type ownedPidLidPairs;
723
724 // For each directory Map LID received, collect the zero or
725 // more input Map (PID,LID) pairs into ownedPidLidPairs.
726 size_type importIndex = 0;
727 for (size_type i = 0; i < static_cast<size_type>(numReceives); ++i) {
728 const GO curGID = importElements[importIndex++];
729 const LO dirMapLid = directoryMap_->getLocalElement(curGID);
730 TEUCHOS_TEST_FOR_EXCEPTION(
731 dirMapLid == LINVALID, std::logic_error,
732 Teuchos::typeName(*this) << " constructor: Incoming global index "
733 << curGID << " does not have a corresponding local index in the "
734 "Directory Map. Please report this bug to the Tpetra developers.");
735 tableKeys[i] = dirMapLid;
736 const int PID = importElements[importIndex++];
737 const int LID = importElements[importIndex++];
738
739 // These may change below. We fill them in just to ensure
740 // that they won't have invalid values.
741 tablePids[i] = PID;
742 tableLids[i] = LID;
743
744 // For every directory Map LID, we have to remember all
745 // (PID, LID) pairs. The TieBreak object will arbitrate
746 // between them in the loop below.
747 ownedPidLidPairs[dirMapLid].push_back(std::make_pair(PID, LID));
748 }
749
750 // Use TieBreak to arbitrate between (PID,LID) pairs
751 // corresponding to each directory Map LID.
752 //
753 // FIXME (mfh 23 Mar 2014) How do I know that i is the same
754 // as the directory Map LID?
755 // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
756 // contiguous, but the directory map is. Need to set tablePids[i]
757 // and tableLids[i], so need to loop over numReceives (as that is
758 // how those arrays are allocated). FIXED
759
760 for (size_type i = 0; i < static_cast<size_type>(numReceives); ++i) {
761 const LO dirMapLid = tableKeys[i];
762 const std::vector<std::pair<int, LO> >& pidLidList =
763 ownedPidLidPairs[dirMapLid];
764 const size_t listLen = pidLidList.size();
765 if (listLen == 0) continue; // KDD This will never happen
766 const GO dirMapGid = directoryMap_->getGlobalElement(dirMapLid);
767 if (listLen > 1) {
768 locallyOneToOne_ = false;
769 }
770 // If there is some (PID,LID) pair for the current input
771 // Map LID, then it makes sense to invoke the TieBreak
772 // object to arbitrate between the options. Even if
773 // there is only one (PID,LID) pair, we still want to
774 // give the TieBreak object a chance to do whatever it
775 // likes to do, in terms of side effects (e.g., track
776 // (PID,LID) pairs).
777 const size_type index =
778 static_cast<size_type>(tie_break->selectedIndex(dirMapGid,
779 pidLidList));
780 tablePids[i] = pidLidList[index].first;
781 tableLids[i] = pidLidList[index].second;
782 }
783
784 // Set up the hash tables.
785 lidToPidTable_ =
786 rcp(new lidToPidTable_type(tableKeys(), tablePids()));
787 lidToLidTable_ =
788 rcp(new lidToLidTable_type(tableKeys(), tableLids()));
789 }
790 } else {
791 if (tie_break.is_null()) {
792 // Use array-based implementation of Directory storage.
793 // Allocate these arrays and fill them with invalid values,
794 // in case the input Map's GID list is sparse (i.e., does
795 // not populate all GIDs from minAllGID to maxAllGID).
796 PIDs_ = arcp<int>(dir_numMyEntries);
797 std::fill(PIDs_.begin(), PIDs_.end(), -1);
798 LIDs_ = arcp<LO>(dir_numMyEntries);
799 std::fill(LIDs_.begin(), LIDs_.end(), LINVALID);
800 // Fill in the arrays with PIDs resp. LIDs.
801 size_type importIndex = 0;
802 for (size_type i = 0; i < static_cast<size_type>(numReceives); ++i) {
803 const GO curGID = importElements[importIndex++];
804 const LO curLID = directoryMap_->getLocalElement(curGID);
805 TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error,
806 Teuchos::typeName(*this) << " constructor: Incoming global index "
807 << curGID << " does not have a corresponding local index in the "
808 "Directory Map. Please report this bug to the Tpetra developers.");
809
810 // If PIDs_[curLID] is not -1, then curGID is a duplicate
811 // on the calling process, so the Directory is not locally
812 // one-to-one.
813 if (PIDs_[curLID] != -1) {
814 locallyOneToOne_ = false;
815 }
816 PIDs_[curLID] = importElements[importIndex++];
817 LIDs_[curLID] = importElements[importIndex++];
818 }
819 } else {
820 PIDs_ = arcp<int>(dir_numMyEntries);
821 LIDs_ = arcp<LO>(dir_numMyEntries);
822 std::fill(PIDs_.begin(), PIDs_.end(), -1);
823
824 // All received (PID, LID) pairs go into ownedPidLidPairs.
825 // This is a map from the directory Map's LID to the (PID,
826 // LID) pair (where the latter LID comes from the input Map,
827 // not the directory Map). If the input Map is not
828 // one-to-one, corresponding LIDs will have
829 // ownedPidLidPairs[curLID].size() > 1. In that case, we
830 // will use the TieBreak object to pick exactly one pair.
831 Array<std::vector<std::pair<int, LO> > > ownedPidLidPairs(dir_numMyEntries);
832 size_t importIndex = 0;
833 for (size_t i = 0; i < numReceives; ++i) {
834 const GO GID = importElements[importIndex++];
835 const int PID = importElements[importIndex++];
836 const LO LID = importElements[importIndex++];
837
838 const LO dirMapLid = directoryMap_->getLocalElement(GID);
839 TEUCHOS_TEST_FOR_EXCEPTION(
840 dirMapLid == LINVALID, std::logic_error,
841 Teuchos::typeName(*this) << " constructor: Incoming global index "
842 << GID << " does not have a corresponding local index in the "
843 "Directory Map. Please report this bug to the Tpetra developers.");
844 ownedPidLidPairs[dirMapLid].push_back(std::make_pair(PID, LID));
845 }
846
847 // Use TieBreak to arbitrate between (PID,LID) pairs
848 // corresponding to each directory Map LID.
849 //
850 // FIXME (mfh 23 Mar 2014) How do I know that i is the same
851 // as the directory Map LID?
852 // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
853 // contiguous. Loop over all ownedPidLidPairs; skip those that have
854 // empty lists. FIXED
855
856 for (size_t i = 0; i < dir_numMyEntries; ++i) {
857 const std::vector<std::pair<int, LO> >& pidLidList =
858 ownedPidLidPairs[i];
859 const size_t listLen = pidLidList.size();
860 if (listLen == 0) continue; // KDD will happen for GIDs not in
861 // KDD the user's source map
862 const LO dirMapLid = static_cast<LO>(i);
863 const GO dirMapGid = directoryMap_->getGlobalElement(dirMapLid);
864 if (listLen > 1) {
865 locallyOneToOne_ = false;
866 }
867 // If there is some (PID,LID) pair for the current input
868 // Map LID, then it makes sense to invoke the TieBreak
869 // object to arbitrate between the options. Even if
870 // there is only one (PID,LID) pair, we still want to
871 // give the TieBreak object a chance to do whatever it
872 // likes to do, in terms of side effects (e.g., track
873 // (PID,LID) pairs).
874 const size_type index =
875 static_cast<size_type>(tie_break->selectedIndex(dirMapGid,
876 pidLidList));
877 PIDs_[i] = pidLidList[index].first;
878 LIDs_[i] = pidLidList[index].second;
879 }
880 }
881 }
882}
883
884template <class LO, class GO, class NT>
885std::string
887 std::ostringstream os;
888 os << "DistributedNoncontiguousDirectory"
889 << "<" << Teuchos::TypeNameTraits<LO>::name()
890 << ", " << Teuchos::TypeNameTraits<GO>::name()
891 << ", " << Teuchos::TypeNameTraits<NT>::name() << ">";
892 return os.str();
893}
894
895template <class LO, class GO, class NT>
898 getEntriesImpl(const map_type& map,
899 const Teuchos::ArrayView<const GO>& globalIDs,
900 const Teuchos::ArrayView<int>& nodeIDs,
901 const Teuchos::ArrayView<LO>& localIDs,
902 const bool computeLIDs) const {
903 using Details::Behavior;
905 using std::cerr;
906 using std::endl;
907 using Teuchos::Array;
908 using Teuchos::ArrayRCP;
909 using Teuchos::ArrayView;
910 using Teuchos::as;
911 using Teuchos::RCP;
912 using Teuchos::toString;
913 using size_type = typename Array<GO>::size_type;
914 const char funcPrefix[] =
915 "Tpetra::"
916 "DistributedNoncontiguousDirectory::getEntriesImpl: ";
917 const char errSuffix[] =
918 " Please report this bug to the Tpetra developers.";
919
920 RCP<const Teuchos::Comm<int> > comm = map.getComm();
921 const bool verbose = Behavior::verbose("Directory") ||
922 Behavior::verbose("Tpetra::Directory");
923 const size_t maxNumToPrint = verbose ? Behavior::verbosePrintCountThreshold() : size_t(0);
924
925 std::unique_ptr<std::string> procPrefix;
926 if (verbose) {
927 std::ostringstream os;
928 os << "Proc ";
929 if (map.getComm().is_null()) {
930 os << "?";
931 } else {
932 os << map.getComm()->getRank();
933 }
934 os << ": ";
935 procPrefix = std::unique_ptr<std::string>(
936 new std::string(os.str()));
937 os << funcPrefix << "{";
939 os << ", ";
941 os << ", ";
943 os << ", computeLIDs: "
944 << (computeLIDs ? "true" : "false") << "}" << endl;
945 cerr << os.str();
946 }
947
948 const size_t numEntries = globalIDs.size();
949 const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
951
952 //
953 // Set up directory structure.
954 //
955
956 // If we're computing LIDs, we also have to include them in each
957 // packet, along with the GID and process ID.
958 const int packetSize = computeLIDs ? 3 : 2;
959
960 // For data distribution, we use: Surprise! A Distributor!
961 Distributor distor(comm);
962
963 // Get directory locations for the requested list of entries.
964 Array<int> dirImages(numEntries);
965 if (verbose) {
966 std::ostringstream os;
967 os << *procPrefix << "Call directoryMap_->getRemoteIndexList"
968 << endl;
969 cerr << os.str();
970 }
971 res = directoryMap_->getRemoteIndexList(globalIDs, dirImages());
972 if (verbose) {
973 std::ostringstream os;
974 os << *procPrefix << "Director Map getRemoteIndexList out ";
976 os << endl;
977 cerr << os.str();
978 }
979
980 // Check for unfound globalIDs and set corresponding nodeIDs to -1
981 size_t numMissing = 0;
982 if (res == IDNotPresent) {
983 for (size_t i = 0; i < numEntries; ++i) {
984 if (dirImages[i] == -1) {
985 nodeIDs[i] = -1;
986 if (computeLIDs) {
988 }
989 numMissing++;
990 }
991 }
992 }
993
996 if (verbose) {
997 std::ostringstream os;
998 os << *procPrefix << "Call Distributor::createFromRecvs"
999 << endl;
1000 cerr << os.str();
1001 }
1002 distor.createFromRecvs(globalIDs, dirImages(), sendGIDs, sendImages);
1003 if (verbose) {
1004 std::ostringstream os;
1005 os << *procPrefix << "Distributor::createFromRecvs result: "
1006 << "{";
1008 os << ", ";
1010 os << "}" << endl;
1011 cerr << os.str();
1012 }
1013 const size_type numSends = sendGIDs.size();
1014
1015 //
1016 // mfh 13 Nov 2012:
1017 //
1018 // The code below temporarily stores LO, GO, and int values in
1019 // an array of global_size_t. If one of the signed types (LO
1020 // and GO should both be signed) happened to be -1 (or some
1021 // negative number, but -1 is the one that came up today), then
1022 // conversion to global_size_t will result in a huge
1023 // global_size_t value, and thus conversion back may overflow.
1024 // (Teuchos::as doesn't know that we meant it to be an LO or GO
1025 // all along.)
1026 //
1027 // The overflow normally would not be a problem, since it would
1028 // just go back to -1 again. However, Teuchos::as does range
1029 // checking on conversions in a debug build, so it throws an
1030 // exception (std::range_error) in this case. Range checking is
1031 // generally useful in debug mode, so we don't want to disable
1032 // this behavior globally.
1033 //
1034 // We solve this problem by forgoing use of Teuchos::as for the
1035 // conversions below from LO, GO, or int to global_size_t, and
1036 // the later conversions back from global_size_t to LO, GO, or
1037 // int.
1038 //
1039 // I've recorded this discussion as Bug 5760.
1040 //
1041
1042 // global_size_t >= GO
1043 // global_size_t >= size_t >= int
1044 // global_size_t >= size_t >= LO
1045 // Therefore, we can safely store all of these in a global_size_t
1046 Kokkos::View<global_size_t*, Kokkos::HostSpace> exports("exports", packetSize * numSends);
1047 {
1048 // Packet format:
1049 // - If computing LIDs: (GID, PID, LID)
1050 // - Otherwise: (GID, PID)
1051 //
1052 // "PID" means "process ID" (a.k.a. "node ID," a.k.a. "rank").
1053
1054 // Current position to which to write in exports array. If
1055 // sending pairs, we pack the (GID, PID) pair for gid =
1056 // sendGIDs[k] in exports[2*k], exports[2*k+1]. If sending
1057 // triples, we pack the (GID, PID, LID) pair for gid =
1058 // sendGIDs[k] in exports[3*k, 3*k+1, 3*k+2].
1059 size_t exportsIndex = 0;
1060
1061 if (useHashTables_) {
1062 if (verbose) {
1063 std::ostringstream os;
1064 os << *procPrefix << "Pack exports (useHashTables_ true)"
1065 << endl;
1066 cerr << os.str();
1067 }
1068 for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1069 const GO curGID = sendGIDs[gidIndex];
1070 // Don't use as() here (see above note).
1071 exports[exportsIndex++] = static_cast<global_size_t>(curGID);
1072 const LO curLID = directoryMap_->getLocalElement(curGID);
1073 TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error, funcPrefix << "Directory Map's global index " << curGID << " lacks "
1074 "a corresponding local index."
1075 << errSuffix);
1076 // Don't use as() here (see above note).
1077 exports[exportsIndex++] =
1078 static_cast<global_size_t>(lidToPidTable_->get(curLID));
1079 if (computeLIDs) {
1080 // Don't use as() here (see above note).
1081 exports[exportsIndex++] =
1082 static_cast<global_size_t>(lidToLidTable_->get(curLID));
1083 }
1084 }
1085 } else {
1086 if (verbose) {
1087 std::ostringstream os;
1088 os << *procPrefix << "Pack exports (useHashTables_ false)"
1089 << endl;
1090 cerr << os.str();
1091 }
1092 for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1093 const GO curGID = sendGIDs[gidIndex];
1094 // Don't use as() here (see above note).
1095 exports[exportsIndex++] = static_cast<global_size_t>(curGID);
1096 const LO curLID = directoryMap_->getLocalElement(curGID);
1097 TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error, funcPrefix << "Directory Map's global index " << curGID << " lacks "
1098 "a corresponding local index."
1099 << errSuffix);
1100 // Don't use as() here (see above note).
1101 exports[exportsIndex++] =
1102 static_cast<global_size_t>(PIDs_[curLID]);
1103 if (computeLIDs) {
1104 // Don't use as() here (see above note).
1105 exports[exportsIndex++] =
1106 static_cast<global_size_t>(LIDs_[curLID]);
1107 }
1108 }
1109 }
1110
1111 TEUCHOS_TEST_FOR_EXCEPTION(exportsIndex > exports.size(), std::logic_error,
1112 funcPrefix << "On Process " << comm->getRank() << ", "
1113 "exportsIndex = "
1114 << exportsIndex << " > exports.size() = "
1115 << exports.size() << "." << errSuffix);
1116 }
1117
1118 TEUCHOS_TEST_FOR_EXCEPTION(numEntries < numMissing, std::logic_error, funcPrefix << "On Process " << comm->getRank() << ", numEntries = " << numEntries << " < numMissing = " << numMissing << "." << errSuffix);
1119
1120 //
1121 // mfh 13 Nov 2012: See note above on conversions between
1122 // global_size_t and LO, GO, or int.
1123 //
1124 const size_t numRecv = numEntries - numMissing;
1125
1126 {
1127 const size_t importLen = packetSize * distor.getTotalReceiveLength();
1128 const size_t requiredImportLen = numRecv * packetSize;
1129 const int myRank = comm->getRank();
1131 importLen < requiredImportLen, std::logic_error,
1132 "Tpetra::Details::DistributedNoncontiguousDirectory::getEntriesImpl: "
1133 "On Process "
1134 << myRank << ": The 'imports' array must have length "
1135 "at least "
1136 << requiredImportLen << ", but its actual length is " << importLen << ". numRecv: " << numRecv << ", packetSize: " << packetSize << ", numEntries (# GIDs): " << numEntries << ", numMissing: " << numMissing << ": distor.getTotalReceiveLength(): "
1137 << distor.getTotalReceiveLength() << ". " << std::endl
1138 << "Distributor description: " << distor.description() << ". "
1139 << std::endl
1140 << "Please report this bug to the Tpetra developers.");
1141 }
1142
1143 Kokkos::View<global_size_t*, Kokkos::HostSpace> imports("imports", packetSize * distor.getTotalReceiveLength());
1144 // FIXME (mfh 20 Mar 2014) One could overlap the sort2() below
1145 // with communication, by splitting this call into doPosts and
1146 // doWaits. The code is still correct in this form, however.
1147 if (verbose) {
1148 std::ostringstream os;
1149 os << *procPrefix << "Call doPostsAndWaits: {"
1150 << "packetSize: " << packetSize << ", ";
1151 verbosePrintArray(os, exports, "exports", maxNumToPrint);
1152 os << "}" << endl;
1153 cerr << os.str();
1154 }
1155 distor.doPostsAndWaits(exports, packetSize, imports);
1156 if (verbose) {
1157 std::ostringstream os;
1158 os << *procPrefix << "doPostsAndWaits result: ";
1159 verbosePrintArray(os, imports, "imports", maxNumToPrint);
1160 os << endl;
1161 cerr << os.str();
1162 }
1163
1164 Array<GO> sortedIDs(globalIDs); // deep copy (for later sorting)
1165 Array<GO> offset(numEntries); // permutation array (sort2 output)
1166 for (GO ii = 0; ii < static_cast<GO>(numEntries); ++ii) {
1167 offset[ii] = ii;
1168 }
1169 sort2(sortedIDs.begin(), sortedIDs.begin() + numEntries, offset.begin());
1170 if (verbose) {
1171 std::ostringstream os;
1172 os << *procPrefix;
1174 os << ", ";
1176 os << endl;
1177 cerr << os.str();
1178 }
1179
1180 size_t importsIndex = 0;
1181
1182 // we know these conversions are in range, because we loaded this data
1183 //
1184 // Don't use as() for conversions here; we know they are in range.
1185 for (size_t i = 0; i < numRecv; ++i) {
1186 const GO curGID = static_cast<GO>(imports[importsIndex++]);
1187 auto p1 = std::equal_range(sortedIDs.begin(),
1188 sortedIDs.end(), curGID);
1189 if (p1.first != p1.second) {
1190 const size_t j = p1.first - sortedIDs.begin();
1191 nodeIDs[offset[j]] =
1192 static_cast<int>(imports[importsIndex++]);
1193 if (computeLIDs) {
1194 localIDs[offset[j]] =
1195 static_cast<LO>(imports[importsIndex++]);
1196 }
1197 if (nodeIDs[offset[j]] == -1) {
1198 res = IDNotPresent;
1199 }
1200 }
1201 }
1202
1203 TEUCHOS_TEST_FOR_EXCEPTION(size_t(importsIndex) > size_t(imports.size()),
1204 std::logic_error, funcPrefix << "On Process " << comm->getRank() << ": importsIndex = " << importsIndex << " > imports.size() = " << imports.size() << ". "
1205 "numRecv: "
1206 << numRecv << ", packetSize: " << packetSize << ", "
1207 "numEntries (# GIDs): "
1208 << numEntries << ", numMissing: " << numMissing << ": distor.getTotalReceiveLength(): " << distor.getTotalReceiveLength() << "." << errSuffix);
1209 if (verbose) {
1210 std::ostringstream os;
1211 os << *procPrefix << funcPrefix << "Done!" << endl;
1212 cerr << os.str();
1213 }
1214 return res;
1215}
1216
1217template <class LO, class GO, class NT>
1219 isOneToOne(const Teuchos::Comm<int>& comm) const {
1220 if (oneToOneResult_ == ONE_TO_ONE_NOT_CALLED_YET) {
1221 const int lcl121 = isLocallyOneToOne() ? 1 : 0;
1222 int gbl121 = 0;
1223 Teuchos::reduceAll<int, int>(comm, Teuchos::REDUCE_MIN, lcl121,
1224 Teuchos::outArg(gbl121));
1225 oneToOneResult_ = (gbl121 == 1) ? ONE_TO_ONE_TRUE : ONE_TO_ONE_FALSE;
1226 }
1227 return (oneToOneResult_ == ONE_TO_ONE_TRUE);
1228}
1229} // namespace Details
1230} // namespace Tpetra
1231
1232//
1233// Explicit instantiation macro
1234//
1235// Must be expanded from within the Tpetra namespace!
1236//
1237#define TPETRA_DIRECTORYIMPL_INSTANT(LO, GO, NODE) \
1238 namespace Details { \
1239 template class Directory<LO, GO, NODE>; \
1240 template class ReplicatedDirectory<LO, GO, NODE>; \
1241 template class ContiguousUniformDirectory<LO, GO, NODE>; \
1242 template class DistributedContiguousDirectory<LO, GO, NODE>; \
1243 template class DistributedNoncontiguousDirectory<LO, GO, NODE>; \
1244 }
1245
1246#endif // TPETRA_DIRECTORYIMPL_DEF_HPP
Interface for breaking ties in ownership.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
Description of Tpetra's behavior.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Implementation of Directory for a contiguous, uniformly distributed Map.
std::string description() const override
A one-line human-readable description of this object.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
LookupStatus getEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Implementation of Directory for a distributed contiguous Map.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
std::string description() const override
A one-line human-readable description of this object.
Implementation of Directory for a distributed noncontiguous Map.
std::string description() const override
A one-line human-readable description of this object.
bool isOneToOne(const Teuchos::Comm< int > &comm) const override
Whether the Directory's input Map is (globally) one to one.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
bool isOneToOne(const Teuchos::Comm< int > &comm) const override
Whether the Directory's input Map is (globally) one to one.
std::string description() const override
A one-line human-readable description of this object.
ReplicatedDirectory()=default
Constructor (that takes no arguments).
Sets up and executes a communication plan for a Tpetra DistObject.
A parallel distribution of indices over processes.
Implementation details of Tpetra.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void initialize(int *argc, char ***argv)
Initialize Tpetra.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
size_t global_size_t
Global size_t object.